# Preparing data for SketchRNN and ml5.js

In [1]:
import json
from pprint import pprint
from rdp import rdp
import numpy as np

In [3]:
# File paths
simulation_large = "../data/simulation_large.txt"
simulation = "../data/simulation_large.json"
training = "../data/simulation_large_traininng.txt"
one_line = "../data/one_line.json"

In [4]:
# Utils
def map(value, leftMin, leftMax, rightMin, rightMax):
    # Figure out how 'wide' each range is
    leftSpan = leftMax - leftMin
    rightSpan = rightMax - rightMin

    # Convert the left range into a 0-1 range (float)
    valueScaled = float(value - leftMin) / float(leftSpan)

    # Convert the 0-1 range into a value in the right range.
    return float(rightMin + (valueScaled * rightSpan))

def rgb2hex(r,g,b):
    return f'{int(round(r)):02x}{int(round(g)):02x}{int(round(b)):02x}'

In [6]:
# Open the original file (not in JSON)
original_file = open(simulation_large) 
num_lines = sum(1 for line in open(simulation_large))
json_data = {
    "lines": [],
    "colors": []
}
lines_for_training = []

# every line path is store as one json line 
for i, line in enumerate(original_file):
    line = json.loads(line)
    name = "line_{}".format(i)
    #json_data["colors"].append(line[name]["colors"][0])
    r = map(line[name]["colors"][0][0], 0, 1, 0, 255)
    g = map(line[name]["colors"][0][1], 0, 1, 0, 255)
    b = map(line[name]["colors"][0][2], 0, 1, 0, 255)
    json_data['colors'].append(rgb2hex(r,g,b))
       
    # apply Ramer-Douglas-Peucker stroke simplification
    reduce_positions = rdp(line[name]["positions"], epsilon=0.1)
    positions = []
    x = 0
    y = 0

    for index, pos in enumerate(reduce_positions):
        dx = map(pos[0], -100, 100, 0, 100)
        dy = map(pos[1], -100, 100, 0, 100)
        dx = float("{0:.2f}".format(dx- x));
        dy = float("{0:.2f}".format(dy - y));
        z = 0
        if index == 0:
            z = 1
        
        if index != 0:
            positions.append([dx, dy, z])

        x = x + dx
        y = y + dy
    
    lines_for_training.append(np.array(positions, dtype='float16'))
    json_data['lines'].append(positions)
        
original_file.close()

# Save new file in json
with open(simulation, 'w') as outfile:
    json.dump(json_data, outfile) 


In [13]:
# Save data to train
split_perc = 0.06
total_amount = len(lines_for_training)

validation_amount = int(total_amount*split_perc)
test_amount = int(total_amount*split_perc)
train_amount = total_amount - test_amount - validation_amount
print(total_amount, validation_amount, test_amount, train_amount)

train_data = lines_for_training[0:900]
validation_data = lines_for_training[900:1050]
test_data = lines_for_training[1046:]
print(len(train_data), len(validation_data), len(test_data))

# save to sketch rnn file
filename = "../data/data_for_sketchRNN_trainingv2.npz"
np.savez_compressed(filename, train=train_data, valid=validation_data, test=test_data)

1196 71 71 1054
900 150 150


In [11]:
lines_for_training[0]

array([[-0.85,  0.17,  0.  ],
       [-0.33,  0.36,  0.  ],
       [-0.19,  0.72,  0.  ],
       [-0.2 ,  0.32,  0.  ],
       [-2.71,  1.54,  0.  ],
       [-1.07,  0.33,  0.  ],
       [-2.11,  0.27,  0.  ],
       [-1.5 , -0.07,  0.  ],
       [-2.92, -0.66,  0.  ],
       [-0.55, -0.3 ,  0.  ],
       [-0.58, -0.48,  0.  ],
       [-1.08, -0.26,  0.  ],
       [-0.33, -0.37,  0.  ],
       [-0.27, -1.97,  0.  ],
       [ 0.04, -1.  ,  0.  ],
       [ 0.33, -1.47,  0.  ],
       [ 0.15, -0.34,  0.  ],
       [ 0.64, -0.76,  0.  ],
       [ 0.54, -0.31,  0.  ],
       [ 7.52, -2.33,  0.  ],
       [ 5.23, -2.69,  0.  ],
       [ 0.79, -0.61,  0.  ],
       [ 0.51, -0.55,  0.  ],
       [ 0.41, -0.63,  0.  ],
       [ 0.37, -1.06,  0.  ],
       [-0.19, -1.86,  0.  ],
       [-0.19, -0.6 ,  0.  ],
       [-0.53, -0.84,  0.  ],
       [-0.29, -0.24,  0.  ],
       [-0.36, -0.08,  0.  ],
       [-0.46,  0.19,  0.  ],
       [-0.22,  0.43,  0.  ],
       [ 0.01,  0.25,  0.  ],
       [ 0