<a href="https://colab.research.google.com/github/horti-stanford/cs230-milestone/blob/main/process_data_inkml_to_rgb_annotations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive

drive.mount('/cs230-root')

Mounted at /cs230-root


In [3]:
# Want to convert inkml files in train, valid, test sets to RGB images size
# 256 x 128
# The inkml files create images of different sizes and need to be rasterized to
# use for offline HME
# Directory for data: /cs230-root/MyDrive/cs230-project/mathwriting-2024-excerpt
# Train input set:  train -> train-images
# Valid input set: valid -> valid-images
# Test input set: test -> test-images

# Convert Latex annotations to strings. Strings need to be tokenized to be used
# for training
# Train labels: train -> train-labels
# Valid labels: valid  -> valid-labels
# Test labels: test -> test-labels

# Note that we will run this first on the "excerpt set" which has 100 test,
# train, valid images with annotations(labels)


In [4]:
# Convert inkml to (256 x 128) images
# Modified code from https://github.com/google-research/google-research/blob/master/mathwriting/mathwriting_code_examples.ipynb



In [5]:
# Import packages
import json
import os
import pprint
import re

import numpy as np
import matplotlib.pyplot as pl
import matplotlib.patches as mpl_patches

from xml.etree import ElementTree

In [6]:
# Input: Filename
# Output: strokes, annotations

def read_inkml_file(filename: str):
  """Modified simple reader for MathWriting's InkML files."""
  with open(filename, "r") as f:
    root = ElementTree.fromstring(f.read())
  strokes = []
  annotations = {}

  for element in root:
    tag_name = element.tag.removeprefix('{http://www.w3.org/2003/InkML}')
    if tag_name == 'annotation':
      annotations[element.attrib.get('type')] = element.text

    elif tag_name == 'trace':
      points = element.text.split(',')
      stroke_x, stroke_y, stroke_t = [], [], []
      for point in points:
        x, y, t = point.split(' ')
        stroke_x.append(float(x))
        stroke_y.append(float(y))
        stroke_t.append(float(t))
      strokes.append(np.array((stroke_x, stroke_y, stroke_t)))

  return strokes, annotations

In [7]:
!apt-get install libcairo2-dev libjpeg-dev libgif-dev
!pip install pycairo

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libjpeg-dev is already the newest version (8c-2ubuntu10).
libjpeg-dev set to manually installed.
libgif-dev is already the newest version (5.1.9-2ubuntu0.1).
libgif-dev set to manually installed.
The following additional packages will be installed:
  libblkid-dev libblkid1 libcairo-script-interpreter2 libffi-dev libglib2.0-dev libglib2.0-dev-bin
  libice-dev liblzo2-2 libmount-dev libmount1 libpixman-1-dev libselinux1-dev libsepol-dev
  libsm-dev libxcb-render0-dev libxcb-shm0-dev
Suggested packages:
  libcairo2-doc libgirepository1.0-dev libglib2.0-doc libgdk-pixbuf2.0-bin | libgdk-pixbuf2.0-dev
  libxml2-utils libice-doc cryptsetup-bin libsm-doc
The following NEW packages will be installed:
  libblkid-dev libcairo-script-interpreter2 libcairo2-dev libffi-dev libglib2.0-dev
  libglib2.0-dev-bin libice-dev liblzo2-2 libmount-dev libpixman-1-dev libselinux1-dev libsepol-dev
  libsm-dev libxc

In [8]:
import cairo
import math
import PIL
import PIL.Image


def cairo_to_pil(surface: cairo.ImageSurface) -> PIL.Image.Image:
  """Converts a ARGB Cairo surface into an RGB PIL image."""
  size = (surface.get_width(), surface.get_height())
  stride = surface.get_stride()
  with surface.get_data() as memory:
    return PIL.Image.frombuffer(
        'RGB', size, memory.tobytes(), 'raw', 'BGRX', stride
    )

def render_ink(
    strokes,
    annotations,
    margin = 0,
    stroke_width = 1.0,
    stroke_color = (0.0, 0.0, 0.0),
    background_color = (1.0, 1.0, 1.0),
) -> PIL.Image.Image:
  """Renders an ink as a PIL image using Cairo.

  The image size is chosen to fit the entire ink while having one pixel per
  InkML unit.

  Args:
    margin: size of the blank margin around the image (pixels)
    stroke_width: width of each stroke (pixels)
    stroke_color: color to paint the strokes with
    background_color: color to fill the background with

  Returns:
    Rendered ink, as a PIL image.
  """

  # Compute transformation to fit the ink in the image.
  xmin, ymin = np.vstack([stroke[:2].min(axis=1) for stroke in strokes]).min(axis=0)
  xmax, ymax = np.vstack([stroke[:2].max(axis=1) for stroke in strokes]).max(axis=0)
  width = int(xmax - xmin + 2*margin)
  height = int(ymax - ymin + 2*margin)

  shift_x = - xmin + margin
  shift_y = - ymin + margin


  def apply_transform(ink_x: float, ink_y: float):
    return ink_x + shift_x, ink_y + shift_y

  # Create the canvas with the background color
  surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height)
  ctx = cairo.Context(surface)
  ctx.set_source_rgb(*background_color)
  ctx.paint()

  # Set pen parameters
  ctx.set_source_rgb(*stroke_color)
  ctx.set_line_width(stroke_width)
  ctx.set_line_cap(cairo.LineCap.ROUND)
  ctx.set_line_join(cairo.LineJoin.ROUND)

  for stroke in strokes:
    if len(stroke[0]) == 1:
      # For isolated points we just draw a filled disk with a diameter equal
      # to the line width.
      x, y = apply_transform(stroke[0, 0], stroke[1, 0])
      ctx.arc(x, y, stroke_width / 2, 0, 2 * math.pi)
      ctx.fill()

    else:
      ctx.move_to(*apply_transform(stroke[0,0], stroke[1,0]))

      for ink_x, ink_y in stroke[:2, 1:].T:
        ctx.line_to(*apply_transform(ink_x, ink_y))
      ctx.stroke()

  return cairo_to_pil(surface)


In [10]:
# Assumes these files from mathwriting-2024-excerpt are available
# Assumes folder train-images exists
import os
path ='/cs230-root/MyDrive/cs230-project/mathwriting-2024-excerpt/'


def process_dir(infldr, outfldr, lblfldr):
  directory_files = os.listdir(infldr)
  nfiles = len(directory_files)

  for i in range(nfiles):
     # Rasterize strokes, get annotations
     f=infldr + '/' + directory_files[i]
     strokes, annotations = read_inkml_file(f)
     img = render_ink(strokes, annotations)
     # Resize input images to standard size
     resized = img.resize((256,128))
     # Write output to rgb file
     of=outfldr + '/' + directory_files[i] + ".rgb"
     resized.save(of)
     # Write LaTeX string for image as the label
     lf=lblfldr + '/' + directory_files[i] + ".lbl"
     f = open(lf, "w")
     f.write(annotations['normalizedLabel'])
     f.close

In [15]:
# Process training data
process_dir(path+'train/', path+'train-images/', path+'train-labels')

In [12]:
# Process dev data
process_dir(path+'valid/', path+'valid-images/', path+'valid-labels')

In [14]:
# Process test data
process_dir(path+'test/', path+'test-images/', path+'test-labels')