In [23]:
import h5py

import numpy as np
from   sklearn.model_selection import train_test_split
import tensorflow as tf
from   tensorflow.keras.layers import (
    Conv2D, Dense, Dropout, Flatten, MaxPooling2D)
from   tensorflow.keras.losses import categorical_crossentropy as cxe
from   tensorflow.keras.models import load_model

In [2]:
!ls ../../../../data/quickdraw

full_numpy_bitmap_ambulance.npy   full_numpy_bitmap_mermaid.npy
full_numpy_bitmap_crocodile.npy   full_numpy_bitmap_raccoon.npy
full_numpy_bitmap_eye.npy         full_numpy_bitmap_rifle.npy
full_numpy_bitmap_flamingo.npy    full_numpy_bitmap_snail.npy
full_numpy_bitmap_harp.npy        full_numpy_bitmap_stethoscope.npy


In [3]:
DATA = '../../../../data/quickdraw'
FILE_TEMPLATE = f'{DATA}/full_numpy_bitmap_%s.npy'
categories = ['ambulance', 'crocodile', 'eye', 'flamingo', 'harp', 
              'mermaid', 'raccoon', 'rifle', 'snail', 'stethoscope']
filenames = [FILE_TEMPLATE % x for x in categories]

In [4]:
BATCH = 128
DIM = 28
N_IMAGES = 100000 # reduce if mem issues
N_FILES = len(categories)
IMAGES_PER_CATEGORY = N_IMAGES // N_FILES
IMAGES_PER_CATEGORY

10000

### Preprocessing

In [5]:
i = 0
for path in filenames:
    x = np.load(path)
    x = x.astype('float32') / 255.
    y = [i] * len(x)
    x = x[:IMAGES_PER_CATEGORY]
    y = y[:IMAGES_PER_CATEGORY]
    if i == 0:
        x_all = x
        y_all = y
    else:
        x_all = np.concatenate((x, x_all), axis=0)
        y_all = np.concatenate((y, y_all), axis=0)
    i += 1

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    x_all, y_all, test_size=0.2, random_state=1103)

In [7]:
X_train = X_train.reshape(X_train.shape[0], DIM, DIM, 1)
X_test = X_test.reshape(X_test.shape[0], DIM, DIM, 1)
input_shape = (DIM, DIM, 1)

In [8]:
y_train = tf.keras.utils.to_categorical(y_train, N_FILES)
y_test = tf.keras.utils.to_categorical(y_test, N_FILES)

In [9]:
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=0.1, random_state=1103)

### ConvNet model

In [10]:
DROP = 0.25

In [11]:
mod = tf.keras.Sequential()

mod.add(Conv2D(
    32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
mod.add(MaxPooling2D(pool_size=(2, 2)))
mod.add(Dropout(DROP))

mod.add(Conv2D(64, (3, 3), activation='relu'))
mod.add(MaxPooling2D(pool_size=(2, 2)))
mod.add(Dropout(DROP))

mod.add(Flatten())
mod.add(Dense(128, activation='relu'))
mod.add(Dropout(2 * DROP))
mod.add(Dense(N_FILES, activation='softmax'))

In [12]:
mod.compile(loss=cxe, 
            optimizer=tf.keras.optimizers.Adadelta(), 
            metrics=['accuracy'])

In [13]:
mod.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 5, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1600)              0

In [14]:
EPOCHS = 25

In [15]:
callbacks = [tf.keras.callbacks.TensorBoard(log_dir='./tb_log_dir')]

In [16]:
mod.fit(X_train, 
        y_train,
        epochs=EPOCHS,
        callbacks=callbacks,
        verbose=1,
        validation_data=(X_valid, y_valid))

Train on 72000 samples, validate on 8000 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x1a4d62bb90>

In [17]:
score = mod.evaluate(X_test, y_test, verbose=0)
print(f'Test loss: {score[0]}\nTest Acc: {score[1]}')

Test loss: 1.4075708951950072
Test Acc: 0.5745000243186951


In [18]:
print(categories)
print('Predicted       Actual')
print('----------------------')

for _ in range(20):
    t = np.random.randint(len(X_test))
    x1 = X_test[t].reshape(1, DIM, DIM, 1)
    p = mod.predict(x1)
    print(f'{categories[np.argmax(p)]:15s} '
          f'{categories[np.argmax(y_test[t])]}')

['ambulance', 'crocodile', 'eye', 'flamingo', 'harp', 'mermaid', 'raccoon', 'rifle', 'snail', 'stethoscope']
Predicted       Actual
----------------------
flamingo        flamingo
rifle           rifle
mermaid         mermaid
ambulance       ambulance
harp            raccoon
mermaid         mermaid
harp            harp
ambulance       eye
harp            harp
harp            harp
stethoscope     stethoscope
harp            harp
harp            rifle
rifle           rifle
crocodile       eye
flamingo        flamingo
eye             crocodile
harp            harp
flamingo        stethoscope
flamingo        stethoscope


In [None]:
#!tensorboard --logdir=./tb_log_dir

In [19]:
mod.save('./QDrawModel.h5')

In [21]:
mod = load_model('./QDrawModel.h5')
mod.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 5, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1600)              0

In [22]:
print('Predicted       Actual')
print('----------------------')

for _ in range(20):
    t = np.random.randint(len(X_test))
    x1 = X_test[t].reshape(1, DIM, DIM, 1)
    p = mod.predict(x1)
    print(f'{categories[np.argmax(p)]:15s} '
          f'{categories[np.argmax(y_test[t])]}')

Predicted       Actual
----------------------
flamingo        flamingo
snail           snail
flamingo        flamingo
rifle           rifle
raccoon         raccoon
ambulance       ambulance
mermaid         mermaid
ambulance       ambulance
flamingo        flamingo
crocodile       crocodile
snail           snail
crocodile       rifle
rifle           rifle
raccoon         raccoon
ambulance       crocodile
harp            stethoscope
eye             eye
mermaid         stethoscope
rifle           rifle
harp            mermaid


In [24]:
for filename, data in zip(['X_train', 'y_train', 'X_test', 'y_test'],
                          [X_train, y_train, X_test, y_test]):
    with h5py.File(f'{filename}.h5', 'w') as hf:
        hf.create_dataset('QuickDraw', data=data)

In [26]:
# To read again (e.g.):
hf = h5py.File('y_test.h5', 'r')
y_test = np.array(hf['QuickDraw'][:])