<h1 style="color:steelblue; font-family:Ewert; font-size:200%;" class="font-effect-3d">Code Library, Style and Links</h1>

In [None]:
%%html
<style>
@import url('https://fonts.googleapis.com/css?family=Ewert|Roboto&effect=3d|ice|');
span {font-family:'Roboto'; color:black; text-shadow: 5px 5px 5px #aaa;}  
div.output_area pre{font-family:'Roboto'; font-size:110%; color: steelblue;}      
</style>

In [None]:
import numpy as np 
import pandas as pd 
import keras as ks

import os
import ast
import cv2
import warnings

import matplotlib.pylab as plt
%matplotlib inline

os.listdir("../input")

In [None]:
I = 64 # image size in pixels
S = 15 # current number of the label set {1,...,17} -> {1-20,..., 321-340}
T = 20 # number of labels in one set 
N = 24000 # number of images with the same label in the training set

files = os.listdir("../input/quickdraw-doodle-recognition/train_simplified")
file_path = '../input/quickdraw-doodle-recognition/train_simplified/'
labels = [el.replace(" ", "_")[:-4] for el in files]
print(sorted(labels))

In [None]:
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.models import Sequential
from keras.layers.advanced_activations import LeakyReLU
from keras.layers import Activation, Dropout, Dense
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D

warnings.filterwarnings('ignore', category=UserWarning)

plt.style.use('seaborn-whitegrid')
style_dict = {'background-color':'gainsboro', 'color':'steelblue', 
              'border-color': 'white', 'font-family':'Roboto'}

In [None]:
# additional functions
def display_drawing():
    for k in range (7) :  
        plt.figure(figsize=(14,2))
        plt.suptitle(files[(S-1)*T+k])
        for i in range(7):
            picture = ast.literal_eval(data[labels[(S-1)*T+k]].values[i])
            for x,y in picture:
                plt.subplot(1,7,i+1)
                plt.plot(x, y, '-o', color='gainsboro')
                plt.xticks([]); plt.yticks([])
            plt.gca().invert_yaxis()
            plt.axis('equal');
            
def get_image(data, lw=6, time_color=True):
    data = ast.literal_eval(data)
    image = np.zeros((280, 280), np.uint8)
    for t, s in enumerate(data):
        for i in range(len(s[0]) - 1):
            color = 255 - min(t, 10) * 15 if time_color else 255
            _ = cv2.line(image, (s[0][i]+10, s[1][i]+10),(s[0][i+1]+10, s[1][i+1]+10), color, lw) 
    return cv2.resize(image, (I, I))

<h1 style="color:steelblue; font-family:Ewert; font-size:200%;" class="font-effect-3d">Data Exploration</h1>

In [None]:
data = pd.DataFrame(index=range(N), columns=labels[(S-1)*T:S*T])
for i in range((S-1)*T,S*T):
    data[labels[i]] = pd.read_csv(file_path + files[i], index_col='key_id').drawing.values[:N]
    
data.shape

In [None]:
display_drawing()

In [None]:
images = []

for label in labels[(S-1)*T:S*T]:
    images.extend([get_image(data[label].iloc[i]) for i in range(N)])
    
images = np.array(images)
del data

images.shape

In [None]:
plt.figure(figsize=(15,5))
plt.subplot(1,3,1); plt.imshow(images[0])
plt.subplot(1,3,2); plt.imshow(images[30000])
plt.subplot(1,3,3); plt.imshow(images[60000])
plt.suptitle('Key Lines in the Pictures');

In [None]:
targets = np.array([[] + N * [k] for k in range(T)])
targets = ks.utils.to_categorical(targets, T).reshape(N*T,T)
targets.shape

In [None]:
x_train, x_test, y_train, y_test = \
train_test_split(images, targets, test_size = 0.2, random_state = 1)
n = int(len(x_test)/2)
x_valid, y_valid = x_test[:n], y_test[:n]
x_test, y_test = x_test[n:], y_test[n:]

del images, targets

x_train = x_train.reshape(-1,I,I,1)
x_valid = x_valid.reshape(-1,I,I,1)
x_test = x_test.reshape(-1,I,I,1)
y_train.shape, y_valid.shape, y_test.shape

<h1 style="color:steelblue; font-family:Ewert; font-size:200%;" class="font-effect-3d">The Model</h1>

In [None]:
def model():
    model = Sequential()
    
    model.add(Conv2D(32, (5, 5), padding='same', input_shape=x_train.shape[1:]))
    model.add(LeakyReLU(alpha=0.02))
    
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(196, (5, 5)))
    model.add(LeakyReLU(alpha=0.02))
    
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(GlobalMaxPooling2D())
    
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.02))
    model.add(Dropout(0.5)) 
    
    model.add(Dense(T))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = model()

In [None]:
checkpointer = \
ModelCheckpoint(filepath='weights.best.model.cv281-300.hdf5', verbose=2, save_best_only=True)
lr_reduction = \
ReduceLROnPlateau(monitor='val_loss', patience=5, verbose=2, factor=0.5)

In [None]:
history = model.fit(x_train, y_train, 
                    epochs=100, batch_size=1024, verbose=2,
                    validation_data=(x_valid, y_valid),
                    callbacks=[checkpointer, lr_reduction])

<h1 style="color:steelblue; font-family:Ewert; font-size:200%;" class="font-effect-3d">Evaluation</h1>

In [None]:
model.load_weights('weights.best.model.cv281-300.hdf5')
score = model.evaluate(x_test, y_test)
score

In [None]:
p_test = model.predict(x_test)
well_predicted = []
for p in range(len(x_test)):
    if (np.argmax(p_test[p]) == np.argmax(y_test[p])):
        well_predicted.append(labels[(S-1) * T + np.argmax(p_test[p])])
u = np.unique(well_predicted, return_counts=True)
pd.DataFrame({'labels':u[0],'correct predictions':u[1]})\
.sort_values('correct predictions',ascending=False)\
.style.set_properties(**style_dict)