In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

In [0]:
%cd /content/gdrive/My\ Drive/COMP\ 551\ -\ Mini\ Project\ 3

In [0]:
!pip uninstall tensorflow

In [0]:
!pip install tensorflow-gpu

In [0]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dropout, MaxPool2D, BatchNormalization, Dense, Activation
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import preprocessing as pre

np.random.seed(1)

In [0]:
# Read Training Data
x_train = pd.read_pickle('data/train_max_x')
y_train = pd.read_csv("data/train_max_y.csv").Label.values

In [0]:
# Apply image preprocessing
x_train = pre.extract_digits(x_train)

print(x_train[1])

plt.figure(figsize=(20,20))
for i in range(20):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(255 - x_train[i], cmap=plt.cm.binary)
    plt.xlabel(y_train[i])
plt.show()

In [0]:
# Dimensions of image
input_shape = (128,128,1)
num_classes = 10
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)

In [0]:
# Data Augmentation
seed = 1
batch_size=128
datagen = ImageDataGenerator(horizontal_flip=False,
                             vertical_flip=False,
                             rotation_range=20,
                             shear_range=0.10,
                             validation_split=0.0)
datagen.fit(x_train, seed=seed)
train_iterator = datagen.flow(x_train, y_train, batch_size=batch_size, seed=seed)
# val_iterator = datagen.flow(x_train, y_train, batch_size=batch_size, seed=seed, subset='validation')

In [0]:
# Our CNN Model
model = keras.Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=input_shape))
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu'))
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(64, (3, 3), activation='relu',padding='same'))
model.add(Conv2D(64, (3, 3), activation='relu',padding='same'))
model.add(Conv2D(64, (3, 3), activation='relu',padding='same'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(128, (3, 3), activation='relu',padding='same'))
model.add(Conv2D(128, (3, 3), activation='relu',padding='same'))
model.add(Conv2D(128, (3, 3), activation='relu',padding='same'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(256, (3, 3), activation='relu',padding='same'))
model.add(Conv2D(256, (3, 3), activation='relu',padding='same'))
model.add(Conv2D(256, (3, 3), activation='relu',padding='same'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(512, (3, 3), activation='relu',padding='same'))
model.add(Conv2D(512, (3, 3), activation='relu',padding='same'))
model.add(Conv2D(512, (3, 3), activation='relu',padding='same'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(1024, (3, 3), activation='relu',padding='same'))
model.add(Conv2D(1024, (3, 3), activation='relu',padding='same'))
model.add(Conv2D(1024, (3, 3), activation='relu',padding='same'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Dense(num_classes, activation='softmax'))

model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [0]:
# VGG16 Model
model = Sequential()
model.add(Conv2D(64, (3,3), padding="same", input_shape=input_shape, activation="relu"))
model.add(Conv2D(64, (3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(128, (3,3), padding="same", activation="relu"))
model.add(Conv2D(128, (3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(256, (3,3), padding="same", activation="relu"))
model.add(Conv2D(256, (3,3), padding="same", activation="relu"))
model.add(Conv2D(256, (3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(512, (3,3), padding="same", activation="relu"))
model.add(Conv2D(512, (3,3), padding="same", activation="relu"))
model.add(Conv2D(512, (3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(512, (3,3), padding="same", activation="relu"))
model.add(Conv2D(512, (3,3), padding="same", activation="relu"))
model.add(Conv2D(512, (3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Flatten())
model.add(Dense(4096,activation="relu"))
#model.add(Dense(4096,activation="relu"))
model.add(Dense(num_classes, activation="softmax"))
model.compile(optimizer=tf.keras.optimizers.SGD(),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [0]:
# Fit using Augmented Data
history = model.fit_generator(train_iterator,
                    # validation_data = val_iterator,
                    epochs = 40)

In [0]:
# Check if model was trained properly
model.summary()
train_loss, train_acc = model.evaluate(x_train, y_train, verbose=2)
print('\nTrain accuracy:', train_acc)

In [0]:
# Plot Validation vs Training Accuracies against Epochs
epochs = 80
plt.style.use("ggplot")
plt.figure(figsize=(10,10))
# plt.plot(np.arange(0, epochs), history3.history["loss"], label="train_loss")
# plt.plot(np.arange(0, epochs), history3.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, epochs), history.history["accuracy"], label="Train Accuracy")
plt.plot(np.arange(0, epochs), history.history["val_accuracy"], label="Validation Accuracy")
plt.title("Training and Validation Accuracy on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Accuracy")
plt.legend(loc="lower left")
plt.savefig("our_cnn_80_epochs.png")

In [0]:
# Load Test Data
x_test = pd.read_pickle('data/test_max_x')
x_test = pre.extract_digits(x_test)
x_test = x_test.astype(float)
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)

In [0]:
# Make predictions to CSV file
predictions = np.argmax(model.evaluate(x_train, y_train, verbose=2), axis=1)
df = pd.DataFrame(pd.Series(final_pred))
df.to_csv('data/test.csv')