# The Quick, Draw! Dataset

The Quick Draw Dataset is a collection of 50 million drawings across 345 categories, contributed by players of the game Quick, Draw!. The drawings were captured as timestamped vectors, tagged with metadata including what the player was asked to draw and in which country the player was located. You can browse the recognized drawings on quickdraw.withgoogle.com/data or download the dataset from https://console.cloud.google.com/storage/browser/quickdraw_dataset/?pli=1.  

The architecture was ported across from the tutorial <a href='https://www.tensorflow.org/versions/master/tutorials/recurrent_quickdraw'>Recurrent Neural Networks for Drawing Classification</a> (associated repo available <a href='https://github.com/tensorflow/models/tree/master/tutorials/rnn/quickdraw'>here</a>); of which many of the details have been used here.  

<img src='https://github.com/googlecreativelab/quickdraw-dataset/raw/master/preview.jpg'/>

--- 

In [1]:
import keras 
keras.__version__

Using TensorFlow backend.


'2.0.8'

In [2]:
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
from scipy.misc import imresize
import os

In [3]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

plt.style.use('ggplot')

In [4]:
from keras import layers
from keras import models
from keras import optimizers
from keras import callbacks

from keras.utils import plot_model

from keras import preprocessing
from keras.preprocessing import image

--- 

In [None]:
def plot_accuracy_loss(history):
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(len(acc))

    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()

    plt.figure()

    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()

    plt.show()

In [92]:
def batch(x, y, batch_size=8):
    return x.reshape(batch_size, -1, 3), y

In [93]:
def pad_stroke_sequence(x, max_len=80):
    padded_x = np.zeros((x.shape[0], max_len, 3), dtype=np.float32)
    for i in range(x.shape[0]):
        X = x[i]
        if X.shape[0] > max_len:
            X = X[:max_len, :]
        elif X.shape[0] < max_len:
            padding = np.array([[0,0,0]] * (max_len-X.shape[0]), dtype=np.float32)            
            X = np.vstack((padding, X))
            
        padded_x[i] = X
        
    return padded_x

--- 

In [100]:
def create_model(input_shape=(80, 3), num_conv=[48, 64, 96], conv_len=[5, 5, 3], dropout=0.3, batch_size=8, 
                 num_rnn_layers=3, num_rnn_nodes=128, num_classes=174):
    
    model = models.Sequential() 
    for i, filters in enumerate(num_conv):
        if i == 0:
            # TODO: feasible to use a TimeDistributed wrapper here? https://keras.io/layers/wrappers/
            model.add(
                layers.Conv1D(filters=filters, 
                              kernel_size=conv_len[i], 
                              activation=None, 
                              strides=1, 
                              padding='same', 
                              name='conv1d_{}'.format(i), input_shape=input_shape))
        else:
            model.add(layers.Dropout(dropout, name="dropout_{}".format(i)))
            model.add(layers.Conv1D(filters=filters, 
                                    kernel_size=conv_len[i], 
                                    activation=None, 
                                    strides=1, 
                                    padding='same', 
                                    name='conv1d_{}'.format(i)))
      
    for i in range(num_rnn_layers):
        model.add(layers.Bidirectional(layers.LSTM(units=num_rnn_nodes, 
                                                   return_sequences=True, 
                                                   recurrent_dropout=dropout), 
                                       name="lstm_{}".format(i)))
    
    model.add(layers.Flatten())
    model.add(layers.Dense(num_classes, activation="softmax"))
    
                      
    return model 

In [105]:
def train(model, batch_size, epochs, train_x, train_y, valid_x, valid_y, max_seq_len=80, 
          load_previous_weights=True, 
          model_weights_file="output/quickdraw_weights.h5"):
    
    # load previous weights (if applicable)
    if model_weights_file is not None and os.path.isfile(model_weights_file) and load_previous_weights:
        print("Loading weights from file {}".format(model_weights_file))
        model.load_weights(model_weights_file)
    
    # compile model 
    model.compile(
        loss='categorical_crossentropy', 
        optimizer='rmsprop', 
        metrics=['accuracy'])
    
    # prepare training and validation data 
    train_x = pad_stroke_sequence(train_x)
    valid_x = pad_stroke_sequence(valid_x)
    
    checkpoint = callbacks.ModelCheckpoint(model_weights_file, 
                                           monitor='val_loss', 
                                           verbose=0, 
                                           save_best_only=True, 
                                           save_weights_only=True, 
                                           mode='auto', 
                                           period=2)
    
    early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5)
    
    history = model.fit(train_x, train_y,
                        batch_size=batch_size, 
                        epochs=epochs,
                        validation_data=(valid_x, valid_y), 
                        shuffle=True, 
                        callbacks=[checkpoint, early_stopping])
    
    return model, history 

### Load training and validation data 

In [106]:
DEST_DIR = '/Volumes/Storage/quickdraw_dataset (subset)/sketchrnn_training_data/'
EPOCHS = 1000
BATCH_SIZE = 8 
MAX_SEQ_LEN = 80

In [107]:
train_x = np.load(os.path.join(DEST_DIR, "train_x.npy"))
train_y = np.load(os.path.join(DEST_DIR, "train_y.npy"))

print("train_x {}, train_y {}".format(train_x.shape, train_y.shape))

train_x (11000,), train_y (11000, 11)


In [108]:
valid_x = np.load(os.path.join(DEST_DIR, "validation_x.npy"))
valid_y = np.load(os.path.join(DEST_DIR, "validation_y.npy"))

print("train_x {}, train_y {}".format(valid_x.shape, valid_y.shape))

train_x (110,), train_y (110, 11)


### Create model and train 

In [103]:
model = create_model(input_shape=(MAX_SEQ_LEN, 3), num_classes=11)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_0 (Conv1D)            (None, 80, 48)            768       
_________________________________________________________________
dropout_1 (Dropout)          (None, 80, 48)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 80, 64)            15424     
_________________________________________________________________
dropout_2 (Dropout)          (None, 80, 64)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 80, 96)            18528     
_________________________________________________________________
lstm_0 (Bidirectional)       (None, 80, 256)           230400    
_________________________________________________________________
lstm_1 (Bidirectional)       (None, 80, 256)           394240    
__________

In [109]:
model, training_history = train(model, batch_size=BATCH_SIZE, 
                                epochs=EPOCHS, 
                                train_x=train_x, train_y=train_y, 
                                valid_x=valid_x, valid_y=valid_y, 
                                max_seq_len=MAX_SEQ_LEN, 
                                load_previous_weights=True, 
                                model_weights_file="output/quickdraw_weights_0.h5")

(11000,)
(11000, 80, 3)
(110,)
(110, 80, 3)
Train on 11000 samples, validate on 110 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100

KeyboardInterrupt: 

In [None]:
plot_accuracy_loss(history)