In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [4]:
def calculate_normalizing_scale_factor(strokes):
    """Calculate the normalizing factor explained in appendix of sketch-rnn."""
    data = []
    for i in range(len(strokes)):
        for j in range(len(strokes[i])):
            data.append(strokes[i][j, 0])
            data.append(strokes[i][j, 1])
    data = np.array(data)
    return np.std(data)

In [5]:
longest_sequence = 200 # sequences of strokes capped at 200 (avoid scribbles)

def remove_noise(noisy_data):
    clean_data = []
    scale_factor = calculate_normalizing_scale_factor(noisy_data)
    for seq in noisy_data:
        if seq.shape[0] <= longest_sequence and seq.shape[0] > 10:
            seq = np.minimum(seq, 1000)
            seq = np.maximum(seq, -1000)
            seq = np.array(seq, dtype=np.float32)
            seq[:, 0:2] /= scale_factor # normalize the strokes, so not too long
            clean_data.append(seq)
            
    return clean_data

In [6]:
# px, py = 28, 28
# px, py = w*dpi, h*dpi -> 28 = w*10

def draw_sequence(seq_strokes, dataset_type, sketch_num):
    fig = plt.figure(frameon=False)
    fig.set_size_inches(1,1)
    ax = plt.gca()
    ax.xaxis.set_ticks_position('top')
    ax.invert_yaxis()
    ax.set(facecolor = "white")
    
    x_coOrds = [0]
    y_coOrds= [0]
    pen_state = 0
    
    # start of sequence
    prev_x = 0
    prev_y = 0
    
    for stroke in seq_strokes:
        prev_x += stroke[0]
        prev_y += stroke[1]
        
        if (stroke[2] == 1):
            plt.plot(x_coOrds, y_coOrds, color="black")
            x_coOrds = [prev_x]
            y_coOrds = [prev_y]
        else:
            x_coOrds.append(prev_x)
            y_coOrds.append(prev_y)

    plt.subplot(1,1,1)
    plt.axis('off')
    plt.savefig(F"../Datasets/cat/{dataset_type}/{sketch_num}.png", dpi=28, transparent=False)

In [None]:
# Load npz data and examine contents

data_location = '../Datasets/cat.npz'
dataset = np.load(data_location, encoding='latin1')
data_train = dataset['train']
data_test = dataset['test']
data_valid = dataset['valid']

clean_train = remove_noise(data_train)
for i, sketch in enumerate(clean_train):
    if i > 18657:
        draw_sequence(sketch, "train", i)
    
clean_train = remove_noise(data_test)
for i, sketch in enumerate(clean_train):
    draw_sequence(sketch, "test", i)
    
clean_train = remove_noise(data_valid)
for i, sketch in enumerate(clean_train):
    draw_sequence(sketch, "valid", i)

  """


In [None]:
# import json
# from scipy import interpolate
# import pylab as pl
# f = open("../Datasets/full_simplified_cat.json")
# setting = json.load(f)

# for j in range(0,10):
#     for i in range(0,len(setting[j]['drawing'])):
#         x = setting[j]['drawing'][i][0]
#         y = setting[j]['drawing'][i][1]
#         f = interpolate.interp1d(x,y,kind="slinear")
#         pl.plot(x,y,'k')
#     ax = pl.gca()
#     ax.xaxis.set_ticks_position('top')
#     ax.invert_yaxis()
#     pl.axis('off')
#     pl.savefig("../Datasets/cat/%d.png"%j)
#     pl.close()