In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import random
import tensorflow as tf

from tensorflow.keras import layers, Model, Input, Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, Flatten, AveragePooling2D, MaxPooling2D

# Download data

In [None]:
import zipfile
from urllib.request import urlretrieve

def download_file(url, path):
    print("Downloading from %s" % url)
    zip_path = path + ".zip"
    urlretrieve(url, zip_path)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall("")
    os.remove(zip_path)    
    
url = "https://storage.googleapis.com/kaggle-data-sets/3258/5337/bundle/archive.zip?GoogleAccessId=web-data@kaggle-161607.iam.gserviceaccount.com&Expires=1586457678&Signature=Fpk8Z%2BNCFNBkVKBztZZR5QHgmGBdfjXb6g3f8SWjGz8bXPgdh3jqslYfZ3EorgG6SvMcZ17OlGQ8zkrzWbGkN0Va5cHRVq5LYfDrJBw2rIbmaw3S4TUFR6XZTgwjmfIrD69G6w7Q1US4SydNmKb42Duf7pbP51VrK3RTPBHqowBvvM5wzQpkmDd6UMkIxKVltsZRtV%2F8SLuQKz%2BtUXP5dc88vXUU1M3REQ2NuFjrOakd3%2BINEzPnIDbIbpdedD4hFe95zBjNiMURqW4a8u4FplVVw9xI4ugdqOukzKC6pXnBetlgy5R0eyUJP2Udf63tp0s6NN%2FjOp8Clf5P9yOe%2FQ%3D%3D&response-content-disposition=attachment%3B+filename%3Dsign-language-mnist.zip"
download_file(url, "sign-mnist")

# Read data

In [None]:
def unzip(data):
    first = [p[0] for p in data]
    second = [p[1] for p in data]
    return np.array(first), np.array(second)

def read_images_and_labels(filename):
    images = []
    with open(filename) as f:
        f.readline()
        for line in f:
            label, *values = line.strip().split(',')
            image = np.array([float(v) for v in values]) / 255
            image.resize((28, 28, 1))
            images.append((int(label), image))
            
    return images

def plot_images(images):
    fig, axes = plt.subplots(1, 5, figsize=(28,28))
    axes = axes.flatten()
    for img, ax in zip(images, axes):
        img = np.array(img)
        img.resize((28, 28))
        ax.imshow(img, cmap='gray')
        ax.axis('off')
    plt.tight_layout()
    plt.show()

test_images = read_images_and_labels("sign_mnist_test.csv")
train_images = read_images_and_labels("sign_mnist_train.csv")

train_len = len(train_images)
val_len = int(train_len * 0.05)
val_images = train_images[:val_len]
train_images = train_images[val_len:]

val_labels, val_images = unzip(val_images)
train_labels, train_images = unzip(train_images)
test_labels, test_images = unzip(test_images)

classes = sorted(list(set(list(train_labels) + list(val_labels) + list(test_labels))))
num_classes = max(classes) + 1

print("Train len: %d" % len(train_images))
print("Val len: %d" % len(val_images))
print("Test len: %d" % len(test_images))

plot_images([random.choice(train_images) for i in range(5)])

# Simple model

In [None]:
model = Sequential([
    Conv2D(32, kernel_size=(5, 5), strides=(1, 1), activation='tanh', input_shape=(28, 28, 1), padding="same"),
    AveragePooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),
    Dropout(0.5),
    Conv2D(64, kernel_size=(5, 5), strides=(1, 1), activation='tanh', padding='valid'),
    AveragePooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),
    Conv2D(64, kernel_size=(2, 2), strides=(1, 1), activation='tanh', padding='valid'),
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),
    Flatten(),
    Dense(120, activation="relu"),
    Dropout(0.5),
    Dense(120, activation="relu"),
    Dropout(0.5),
    Dense(num_classes, activation="softmax")
])

model.compile(optimizer="adam", loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(
    train_images, train_labels, 
    epochs=10, 
    validation_data=(val_images, val_labels), 
    batch_size=32)

In [None]:
def plot_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss=history.history['loss']
    val_loss=history.history['val_loss']

    epochs_range = range(10)

    plt.figure(figsize=(8, 8))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()
    
plot_history(history)

In [None]:
model.evaluate(test_images, test_labels)

# Augmented images

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator


image_gen_train = ImageDataGenerator(
    rotation_range=45,
    width_shift_range=.15,
    height_shift_range=.15,
    horizontal_flip=True,
    zoom_range=0.5
)

image_gen_val = ImageDataGenerator(
    rotation_range=45,
    width_shift_range=.15,
    height_shift_range=.15,
    horizontal_flip=True,
    zoom_range=0.5
)


train_data_gen = image_gen_train.flow(
    train_images, train_labels, 
    batch_size=64, shuffle=True)

val_data_gen = image_gen_train.flow(
    val_images, val_labels, 
    batch_size=64, shuffle=True)

In [None]:
augmented_images = [train_data_gen[0][0][0] for i in range(5)]
plot_images(augmented_images)

In [None]:
history = model.fit_generator(train_data_gen, epochs=20, validation_data=val_data_gen)

In [None]:
plot_history(history)
model.evaluate(test_images, test_labels)

# VGG19 network

In [None]:
inception_base = tf.keras.applications.VGG19(weights='imagenet', include_top=False, input_shape=(32, 32, 3))

for layer in inception_base.layers[:5]:
    layer.trainable = False
    
x = inception_base.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation="relu")(x)
predictions = Dense(25, activation="softmax")(x)

inception_model = Model(inception_base.input, predictions)

inception_model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
inception_model.summary()

## Resize data to fit VGG19

In [None]:
def gray_to_rgb(img):
    img = np.repeat(img, 3, 2)
    return img

train_images_3p = np.array([gray_to_rgb(img) for img in tf.image.resize(train_images, (32, 32))])
val_images_3p = np.array([gray_to_rgb(img) for img in tf.image.resize(val_images, (32, 32))])
test_images_3p = np.array([gray_to_rgb(img) for img in tf.image.resize(test_images, (32, 32))])

plt.imshow(train_images_3p[1])

In [None]:
inception_model.fit(
    train_images_3p, train_labels, 
    epochs=2,
    validation_data=(val_images_3p, val_labels), 
    validation_steps=10,
    verbose=1)

In [None]:
inception_model.evaluate(test_images_3p, test_labels)