# Classify "Quick, draw!" drawings

"Can a neural network learn to recognize doodling?" - [quickdraw.withgoogle.com][quickdraw]

<a href="https://quickdraw.withgoogle.com/">
    <img src="images/quick-draw.png" width="400px" />
</a>

[quickdraw]:https://quickdraw.withgoogle.com/

Download "QuickDraw" data
---

Download **Numpy bitmap files .npy** - [npy files from Google Cloud][quickdraw-npy] / [GitHub repository][quickdraw-github]

[quickdraw-npy]:https://console.cloud.google.com/storage/quickdraw_dataset/full/numpy_bitmap
[quickdraw-github]:https://github.com/googlecreativelab/quickdraw-dataset

In [None]:
# Optional: Execute this cell to download data using "wget" (linux/mac)
files = {
    'plane': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/airplane.npy',
    'car': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/car.npy',
    'cat': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/cat.npy',
    'ship': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/cruise%20ship.npy',
    'bird': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/bird.npy',
    'sheep': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/sheep.npy',
    'strawberry': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/strawberry.npy',
    'flower': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/flower.npy',
    'chair': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/chair.npy',
    'book': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/book.npy'
    # Todo - add more classes from the Google Cloud page!
}
!mkdir -p 'data'
for c, url in files.items():
    !wget '{url}' -O 'data/{c}.npy' -q --show-progress

In [None]:
import glob
import os

# List .npy files and classes
npy_files = glob.glob('data/*.npy')
print('Files:', npy_files)

classes = [os.path.splitext(os.path.basename(path))[0] for path in npy_files]
print('Class names:', classes)

Create a "QuickDraw" data set
---

In [None]:
from PIL import Image
import numpy as np

# Create a class for our data set
class QuickDraw():
    def __init__(self, npy_files, max_img_per_class=np.inf):
        # Open .npy files
        self.X_list = [np.load(f, mmap_mode='r') for f in npy_files]
        self.lengths = [min(len(X), max_img_per_class) for X in self.X_list]
        
        self.n_images = sum(self.lengths)
        self.n_classes = len(npy_files)
        
    def get_pixels(self, idx):
        for label, (X, l) in enumerate(zip(self.X_list, self.lengths)):
            if idx < l:
                return X[idx], label
            idx -= l

    def __getitem__(self, idx):
        # Get image
        img, label = self.get_pixels(idx)
        img = img.reshape(28, 28) # Reshape
        img = 255 - img # White background
        
        return img, label
    
# Create the data set
quickdraw = QuickDraw(npy_files, max_img_per_class=5000)
print('Total size:', quickdraw.n_images)

img, label = quickdraw[0]
print('First image:', classes[label])
Image.fromarray(img)

Create Keras data generators
---

In [None]:
import tensorflow as tf
import tensorflow.keras as keras

# Create a class for our data set
class DataGenerator(keras.utils.Sequence):
    def __init__(self, dataset, idxs, batch_size):
        
        self.dataset = dataset
        self.idxs = idxs.copy() # We shuffle images between epochs: safer to work on a copy!
        self.batch_size = batch_size
        
    def __len__(self):
        # Compute the number of batches
        return int(np.floor(len(self.idxs)/self.batch_size))
            
    def on_epoch_end(self):
        np.random.shuffle(self.idxs) # Shuffle images after each epoch

    def __getitem__(self, i_batch):
        # Load batch of images
        imgs, labels = [], []
        for idx in self.idxs[i_batch*self.batch_size:(i_batch+1)*self.batch_size]:
            img, label = self.dataset[idx]
    
            # Preprocess image for keras
            img = np.array(img, dtype=np.float32).reshape(28, 28, 1)
            img = img/255 # Normalize (from 0..255 to 0..1)
            imgs.append(img)
            
            # One-hot encode labels
            oh_labels = keras.utils.to_categorical(label, num_classes=self.dataset.n_classes)
            labels.append(oh_labels)
        
        return np.array(imgs), np.array(labels)
    
# Define train/validation sets
idx = np.arange(quickdraw.n_images)
np.random.shuffle(idx) # Shuffle data points

valid_size = 1000
train_idxs = idx[:-valid_size]
valid_idxs = idx[-valid_size:]

# Create data generators
train_generator = DataGenerator(quickdraw, train_idxs, 32)
valid_generator = DataGenerator(quickdraw, valid_idxs, 32)

imgs, labels = train_generator[0]
print('First images:', imgs.shape, imgs.dtype, 'min/max:', imgs.min(), imgs.max())
print('Classes:', labels.shape, labels.dtype)

Visualize predictions
---

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt

# Plot a few batches
def plot_predictions(imgs, preds):
    # Set number of rows/columns in plot
    ncols = 8
    nrows = int(np.ceil(len(imgs)/ncols))
    
    # Create subplots
    fig, axes = plt.subplots(nrows, ncols, figsize=(ncols, nrows))
    for i_ax, ax in enumerate(axes.flatten()):
        if i_ax < len(imgs):
            # Get image and prediction
            img, label = imgs[i_ax, :, :, 0], np.argmax(preds[i_ax])
            
            # Plot them
            ax.imshow(img, cmap=plt.cm.gray)
            ax.set_title(classes[label], transform=ax.transAxes)

        ax.axis('off')
    plt.show()

# Plot images with true labels
imgs, labels = train_generator[0]
plot_predictions(imgs, preds=labels)

Convolutional Network
---

"Make some assumptions about the inputs to make learning more efficient" - [Andrej Karpathy Lecture][karpathy-lecture]

<a href="https://youtu.be/Y1ugnb0bobk">
    <img src="https://img.youtube.com/vi/Y1ugnb0bobk/maxresdefault.jpg" width="400px" />
</a>

[karpathy-lecture]:https://youtu.be/u6aEYuemt0M?t=10s

In [None]:
# (1st option) Dense Network
model = keras.Sequential()
model.add(keras.layers.Flatten(input_shape=(28, 28, 1)))
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=len(classes), activation='softmax'))
model.summary()

In [None]:
# (2nd option) Convolutional Network
model = keras.Sequential()
model.add(keras.layers.Conv2D(filters=16, kernel_size=5, strides=2, activation='relu', input_shape=(28, 28, 1)))
model.add(keras.layers.MaxPool2D(pool_size=2))
model.add(keras.layers.Conv2D(filters=16, kernel_size=3, strides=1, activation='relu'))
model.add(keras.layers.MaxPool2D(pool_size=2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(units=len(classes), activation='softmax'))
model.summary()

In [None]:
# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss='categorical_crossentropy',
    metrics=['acc']
)

Train model
---

In [None]:
# Trick: end training when accuracy stops improving (optional)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_acc', patience=2)

# Train model
history = model.fit_generator(
    generator=train_generator, epochs=20,
    validation_data=valid_generator,
    callbacks=[early_stopping]
)

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(12, 4))

# Plot loss values
ax1.set_title('loss: {:.4f}'.format(history.history['val_loss'][-1]))
ax1.plot(history.history['val_loss'], label='validation')
ax1.plot(history.history['loss'], label='training')
ax1.legend()

# plot accuracy values
ax2.set_title('accuracy: {:.2f}%'.format(history.history['val_acc'][-1]*100))
ax2.plot(history.history['val_acc'], label='validation')
ax2.plot(history.history['acc'], label='training')
ax2.legend()

plt.show()

In [None]:
# Plot images with true labels
imgs, labels = train_generator[0]
preds = model.predict(imgs)
plot_predictions(imgs, preds)

Save and export model
---

In [None]:
model_path = os.path.join('data', 'doodle-model.h5')
tfjs_target_dir = os.path.join('data', 'tfjs')

# Save model
keras.models.save_model(model, model_path)

If you are running the notebook via [Google Colab](https://colab.research.google.com/), run

```bash
!pip install tensorflowjs
```

In [None]:
import shutil

# Prepare model for TensorFlow.js
!tensorflowjs_converter --input_format keras '{model_path}' '{tfjs_target_dir}'

# Zip the result!
zip_file = os.path.join('data', 'tjfs-model')
shutil.make_archive(zip_file, 'zip', tfjs_target_dir);

In [None]:
import json

# Save class names
classes_file = os.path.join('data', 'classes.json')
with open(classes_file, 'w') as f:
    json.dump(classes, f)
    
!head '{classes_file}'