In [78]:
from keras import layers
from keras.models import Model
from keras.optimizers import Adam
from keras.metrics import top_k_categorical_accuracy
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

In [82]:
import os
import datetime
from glob import glob
from sklearn.metrics import confusion_matrix, classification_report

import nbimporter
from DataParser import generateDf, getXYfromDf

In [42]:
""" Hyperparameters defined here """
base_dir = '/Volumes/JS/QuickDraw'
test_path = os.path.join(base_dir, '/test_simplified.csv')
all_train_paths = glob(os.path.join(base_dir, 'train_simplified', '*.csv'))
cols = ['countrycode', 'drawing', 'key_id', 'recognized', 'timestamp', 'word']
# WaveNet parameters
n_filters = 64
kernel_size = 2
dilation_depth = 8
pool_size_1 = 4
pool_size_2 = 8
batch_size = 4096
activation = 'softmax'

In [80]:
train_df, valid_df, test_df, word_encoder = generateDf(n_train=750, n_valid=75, n_test=50, 
                                                       n_strokes=196, path=all_train_paths)
x_train, y_train = getXYfromDf(train_df, word_encoder)
x_valid, y_valid = getXYfromDf(valid_df, word_encoder)
x_test, y_test = getXYfromDf(test_df, word_encoder)

words 340 => The Eiffel Tower, The Great Wall of China, The Mona Lisa, airplane, alarm clock, ambulance, angel, animal migration, ant, anvil, apple, arm, asparagus, axe, backpack, banana, bandage, barn, baseball, baseball bat, basket, basketball, bat, bathtub, beach, bear, beard, bed, bee, belt, bench, bicycle, binoculars, bird, birthday cake, blackberry, blueberry, book, boomerang, bottlecap, bowtie, bracelet, brain, bread, bridge, broccoli, broom, bucket, bulldozer, bus, bush, butterfly, cactus, cake, calculator, calendar, camel, camera, camouflage, campfire, candle, cannon, canoe, car, carrot, castle, cat, ceiling fan, cell phone, cello, chair, chandelier, church, circle, clarinet, clock, cloud, coffee cup, compass, computer, cookie, cooler, couch, cow, crab, crayon, crocodile, crown, cruise ship, cup, diamond, dishwasher, diving board, dog, dolphin, donut, door, dragon, dresser, drill, drums, duck, dumbbell, ear, elbow, elephant, envelope, eraser, eye, eyeglasses, face, fan, feathe

In [8]:
def residual_block(x, i):
    tanh_out = layers.Conv1D(n_filters, 
                      kernel_size, 
                      dilation_rate = kernel_size**i, 
                      padding='causal', 
                      name='dilated_conv_%d_tanh' % (kernel_size ** i), 
                      activation='tanh'
                      )(x)
    sigm_out = layers.Conv1D(n_filters, 
                      kernel_size, 
                      dilation_rate = kernel_size**i, 
                      padding='causal', 
                      name='dilated_conv_%d_sigm' % (kernel_size ** i), 
                      activation='sigmoid'
                      )(x)
    z = layers.Multiply(name='gated_activation_%d' % (i))([tanh_out, sigm_out])
    skip = layers.Conv1D(n_filters, 1, name='skip_%d'%(i))(z)
    res = layers.Add(name='residual_block_%d' % (i))([skip, x])
    return res, skip

In [39]:
def WaveNet(inputShape, outputShape):
    stroke_input = layers.Input(shape=inputShape, name='featureInput')
    x = layers.Conv1D(n_filters, kernel_size, dilation_rate=1, padding='causal',
                      name='dilated_conv_1')(stroke_input)
    skip_connections = []
    for i in range(1, dilation_depth + 1):
        x, skip = residual_block(x, i)
        skip_connections.append(skip)
    x = layers.Add(name='skip_connections')(skip_connections)
#     x = layers.Activation('relu')(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    
    x = layers.Conv1D(n_filters, pool_size_1, strides=1, padding='same',
                     name='conv_5ms', activation='relu')(x)
    x = layers.Conv1D(output_shape[0], pool_size_2, padding='same', activation='relu',
                    name='conv_500ms')(x)
    x = layers.Conv1D(output_shape[0], pool_size_2, padding='same', activation='relu', 
                      name='conv_500ms_target_shape')(x)
    x = layers.AveragePooling1D(pool_size_2, padding='same',name = 'downsample_to_2Hz')(x)
    x = layers.Conv1D(output_shape[0], (int) (input_shape[0] / (pool_size_1*pool_size_2)), 
                      padding='same', name='final_conv')(x)
    x = layers.GlobalAveragePooling1D(name='final_pooling')(x)
    x = layers.Activation(activation, name='final_activation')(x)
    
    model = Model(input=stroke_input, output=x)
    print(model.summary())
    return model

In [40]:
input_shape = x_train.shape[1:]
output_shape = y_train.shape[1:]

In [43]:
model = WaveNet(input_shape, output_shape)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
featureInput (InputLayer)       (None, 196, 3)       0                                            
__________________________________________________________________________________________________
dilated_conv_1 (Conv1D)         (None, 196, 64)      448         featureInput[0][0]               
__________________________________________________________________________________________________
dilated_conv_2_tanh (Conv1D)    (None, 196, 64)      8256        dilated_conv_1[0][0]             
__________________________________________________________________________________________________
dilated_conv_2_sigm (Conv1D)    (None, 196, 64)      8256        dilated_conv_1[0][0]             
__________________________________________________________________________________________________
gated_acti



In [79]:
def top_3_accuracy(x,y): 
    return top_k_categorical_accuracy(x,y, 3)

In [75]:
def train(model):
    date = datetime.datetime.today().strftime('%H_%M_%m_%d')
    weight_save_path = './model/stroke_wn_%s' % date + '.h5'
    
    checkpoint = ModelCheckpoint(weight_save_path, monitor='val_loss',
                                verbose=1, save_best_only=True, period=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.6, 
                                  patience=5, min_lr=1e-6, mode='auto')
    early_stop = EarlyStopping(monitor'val_loss', mode='min', patience=5)
    callback = [checkpoint, early, reduce_lr]
    optimizer = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy', 
                  metrics=['accuracy', top_3_accuracy])
    model.fit(x_train, y_train, 
              validation_data=(x_valid, y_valid),
              batch_size=batch_size,
              epochs=50
              callbacks=callback)

In [84]:
def evaluate(model, weight_path):
    model = model.load_weights(weight_path)
    result = model.evaluate(x_test, y_test, batch_size=4096)
    print('Accuracy: %2.1f%%, Top 3 Accuracy %2.1f%%' % (100*lstm_results[1], 100*lstm_results[2]))

In [85]:
def sklearnReport(model, weight_path):
    model = model.load_weights(weight_path)
    test_cat = np.argmax(y_test, 1)
    pred_y = model.predict(x_test, batch_size = 4096)
    pred_cat = np.argmax(pred_y, 1)
    plt.matshow(confusion_matrix(test_cat, pred_cat))
    print(classification_report(test_cat, pred_cat, 
                            target_names = [x for x in word_encoder.classes_]))