<h1 style="color:steelblue; font-family:Ewert; font-size:200%;" class="font-effect-3d">Code Library, Style and Links</h1>

In [None]:
%%html
<style>
@import url('https://fonts.googleapis.com/css?family=Ewert|Roboto&effect=3d|ice|');
span {font-family:'Roboto'; color:black; text-shadow: 5px 5px 5px #aaa;}  
div.output_area pre{font-family:'Roboto'; font-size:110%; color: steelblue;}      
</style>

In [None]:
import numpy as np 
import pandas as pd 
import keras as ks

import os
import ast
import h5py
import warnings

import matplotlib.pylab as plt
%matplotlib inline

In [None]:
I = 64 # image size in pixels
S = 1 # number of the label set {1,...,10} -> {1-34,..., 307-340}
T = 20 # number of labels in one set 
N = 7000 # number of images with the same label in the training set

files = os.listdir("../input/train_simplified")
print(sorted(files))

In [None]:
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.models import Sequential
from keras.layers.advanced_activations import LeakyReLU
from keras.layers import Activation, Dropout, Dense
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D

warnings.filterwarnings('ignore', category=UserWarning)
plt.style.use('seaborn-whitegrid')
style_dict = {'background-color':'gainsboro', 
              'color':'steelblue', 
              'border-color': 'white', 
              'font-family':'Roboto'}

In [None]:
# https://stackoverflow.com/questions/25837544/get-all-points-of-a-straight-line-in-python
def get_line(x1, y1, x2, y2):
    points = []
    issteep = abs(y2-y1) > abs(x2-x1)
    if issteep:
        x1, y1 = y1, x1
        x2, y2 = y2, x2
    rev = False
    if x1 > x2:
        x1, x2 = x2, x1
        y1, y2 = y2, y1
        rev = True
    deltax = x2 - x1; deltay = abs(y2 - y1)
    error = int(deltax / 2)
    y = y1; ystep = None
    if y1 < y2:
        ystep = 1
    else:
        ystep = -1
    for x in range(x1, x2 + 1):
        if issteep:
            points.append((y, x))
        else:
            points.append((x, y))
        error -= deltay
        if error < 0:
            y += ystep
            error += deltax
    if rev:
        points.reverse()
    return points

In [None]:
# additional functions
def display_drawing():
    for k in range (5) :  
        plt.figure(figsize=(10,2))
        plt.suptitle(files[(S-1)*T+k])
        for i in range(5):
            picture = ast.literal_eval(data[labels[(S-1)*T+k]].values[i])
            for x,y in picture:
                plt.subplot(1,5,i+1)
                plt.plot(x, y, '-o', color='gainsboro')
                plt.xticks([]); plt.yticks([])
            plt.gca().invert_yaxis()
            plt.axis('equal');
            
def get_image(data, k):
    img = np.zeros((280, 280))
    picture = ast.literal_eval(data.values[k])
    for x,y in picture:
        for i in range(len(x)):
            img[y[i]+10][x[i]+10] = 1
            if (i < len(x)-1):
                x1, y1, x2, y2 = x[i], y[i], x[i+1], y[i+1]
            else:
                x1, y1, x2, y2 = x[i], y[i], x[0], y[0]
            for xl,yl in get_line(x1, y1, x2, y2):
                img[yl+10][xl+10] = 1
                
    return resize(img, (I,I))    

<h1 style="color:steelblue; font-family:Ewert; font-size:200%;" class="font-effect-3d">Data Exploration</h1>

In [None]:
data_alarm_clock = pd.read_csv('../input/train_simplified/alarm clock.csv',index_col='key_id')
data_alarm_clock.tail(3).T.style.set_properties(**style_dict)

In [None]:
file_path = '../input/train_simplified/'
labels = [el.replace(" ", "_")[:-4] for el in files]

data = pd.DataFrame(index=range(N), columns=labels[(S-1)*T:S*T])
for i in range((S-1)*T,S*T):
    data[labels[i]] = pd.read_csv(file_path + files[i], index_col='key_id').drawing.values[:N]
data.shape

In [None]:
display_drawing()

In [None]:
images = []

for label in labels[(S-1)*T:S*T]:
    images.extend([get_image(data[label], i) for i in range(N)])
    
images = np.array(images)
del data, data_alarm_clock 

images.shape

In [None]:
plt.figure(figsize=(10,5))
plt.subplot(1,2,1); plt.imshow(images[0])
plt.subplot(1,2,2); plt.imshow(images[10000])
plt.suptitle('Key Points in the Pictures');

In [None]:
targets = np.array([[] + N * [k] for k in range(T)])
targets = ks.utils.to_categorical(targets, T).reshape(N*T,T)
targets.shape

In [None]:
with h5py.File('QuickDrawImages001-020.h5', 'w') as f:
    f.create_dataset('images', data=images)
    f.create_dataset('targets', data=targets)
    f.close()

In [None]:
x_train, x_test, y_train, y_test = \
train_test_split(images, targets, test_size = 0.2, random_state = 1)
n = int(len(x_test)/2)
x_valid, y_valid = x_test[:n], y_test[:n]
x_test, y_test = x_test[n:], y_test[n:]

del images, targets

x_train = x_train.reshape(-1,I,I,1)
x_valid = x_valid.reshape(-1,I,I,1)
x_test = x_test.reshape(-1,I,I,1)
y_train.shape, y_valid.shape, y_test.shape

<h1 style="color:steelblue; font-family:Ewert; font-size:200%;" class="font-effect-3d">The Model</h1>

In [None]:
def model():
    model = Sequential()
    
    model.add(Conv2D(32, (5, 5), padding='same', input_shape=x_train.shape[1:]))
    model.add(LeakyReLU(alpha=0.02))
    
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(196, (5, 5)))
    model.add(LeakyReLU(alpha=0.02))
    
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(GlobalMaxPooling2D())
    
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.02))
    model.add(Dropout(0.5)) 
    
    model.add(Dense(T))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = model()

In [None]:
checkpointer = \
ModelCheckpoint(filepath='weights.best.model.hdf5', verbose=2, save_best_only=True)
lr_reduction = \
ReduceLROnPlateau(monitor='val_loss', patience=5, verbose=2, factor=0.5)

In [None]:
history = model.fit(x_train, y_train, 
                    epochs=100, batch_size=1024, verbose=2,
                    validation_data=(x_valid, y_valid),
                    callbacks=[checkpointer, lr_reduction])

<h1 style="color:steelblue; font-family:Ewert; font-size:200%;" class="font-effect-3d">Evaluation</h1>

In [None]:
model.load_weights('weights.best.model.hdf5')
score = model.evaluate(x_test, y_test)
score

In [None]:
p_test = model.predict(x_test)
well_predicted = []
for p in range(len(x_test)):
    if (np.argmax(p_test[p]) == np.argmax(y_test[p])):
        well_predicted.append(labels[(S-1) * T + np.argmax(p_test[p])])
u = np.unique(well_predicted, return_counts=True)
pd.DataFrame({'labels':u[0],'correct predictions':u[1]})\
.sort_values('correct predictions',ascending=False)\
.style.set_properties(**style_dict)

<h1 style="color:steelblue; font-family:Ewert; font-size:200%;" class="font-effect-3d">The Next Step</h1>

The weights for each label set have saved in the special database and will be used for image recognition in the test data.

The next notebook [Quick, Draw! Doodle Recognition 2](https://www.kaggle.com/olgabelitskaya/quick-draw-doodle-recognition-2)