# Input cleaning notebook

The tablet generates files that include the raw ink data, and the best guess at the actual text. To use it as testing/training/validation data, we need to give the inks the correct labels. This notebook is for that.

In [None]:
import training

inks = training.load_inks('../handwriting.log')
print(len(inks))

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt

def show_batch(pairs, tensor=False):
  fig, axs = plt.subplots(len(pairs), 1, figsize=(10,len(pairs) * 2))
  if len(pairs) == 1:
    axs = [axs]
  for ax, (label, ink) in zip(axs, pairs):
    ax.set_title(label)
    ax.invert_yaxis()
    if tensor:
      strokes = [[]]
      l = [0, 0, 0]
      for p in ink:
        xyz = [x + y for x, y in zip(l, p)]
        strokes[-1].append(xyz)
        if p[3] > 0.5:
            strokes.append([])
        l = xyz
      ink = strokes[:] # remove the last appended (empty) array
    
    for stroke in ink:
      ax.plot([p[0] for p in stroke], [p[1] for p in stroke])
        
    # ax.imshow(tf.transpose(data).numpy(), cmap='hot', interpolation='nearest')
    ax.axis('equal')
  plt.show()

In [None]:
corrected = []

In [None]:
from IPython.display import clear_output
inks_to_correct = inks[350:450]
for guess, ink in inks_to_correct:
    clear_output(wait=True)
    show_batch([(guess, ink)])
    actual = input()
    if actual == '-':
        continue
    if actual == '':
        actual = guess
    corrected.append((actual, ink))

In [None]:
len(corrected)

In [None]:
training.save_inks(corrected, 'cleaned-inks-3.txt')