# COMP 551 Assignment 3 - Localization and Modified VGG Classifier

In [4]:
import cv2
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mnist import MNIST

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, ZeroPadding2D
from keras.utils import plot_model, to_categorical
from keras.callbacks import History
from keras.optimizers import Nadam, Adam, SGD
from keras.utils import np_utils
from keras import backend as K
from keras import regularizers

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score

# loading the original MNIST hand-written digits
mndata = MNIST('')
mndata.gz = True

images, labels = mndata.load_training()

In [2]:
imgs = np.asarray(images)
imgs = np.reshape(imgs, (-1, 28, 28))
# thresholds imgs
imgs = np.where(imgs < 100, 0, 255)

# plt.imshow(imgs[1], cmap='gray')
# plt.show()

In [5]:
def cnn():
    # create model
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=(28,28,1)))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))

    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    # Compile
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = cnn()
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
zero_padding2d_1 (ZeroPaddin (None, 30, 30, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
zero_padding2d_2 (ZeroPaddin (None, 30, 30, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
zero_padding2d_3 (ZeroPaddin (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 14, 14, 64)       

In [6]:
# X_train = np.array(imgs)/255
# X_train = X_train.reshape(-1, 28, 28, 1)

# onehot_encoder = OneHotEncoder(sparse=False, categories='auto')
# y_train = onehot_encoder.fit_transform(np.reshape(labels, (-1, 1)))

# Split the data into a training and test set
X_train, X_test, y_train, y_test = train_test_split(imgs, labels, test_size=0.30, random_state=0, stratify=labels)
X_train = np.array(X_train)/255
X_test = np.array(X_test)/255

X_train = np.array(X_train).reshape(-1,28,28,1)
X_test = np.array(X_test).reshape(-1,28,28,1)

onehot_encoder = OneHotEncoder(sparse=False, categories='auto')
y_train = onehot_encoder.fit_transform(np.reshape(y_train, (-1,1)))
y_test = onehot_encoder.transform(np.reshape(y_test, (-1,1)))

In [8]:
history = model.fit(X_train, y_train, batch_size=500, epochs=20)
model.save('localization_cnn.h5')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20


Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20


Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20


Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20


Epoch 18/20
Epoch 19/20
Epoch 20/20


In [10]:
score = model.evaluate(X_test, y_test, batch_size=1000)
print(score)

[0.02568431534907884, 0.9927777647972107]


In [11]:
print(score)

[0.02568431534907884, 0.9927777647972107]


In [12]:
train_labels = pd.read_csv('train_max_y.csv')
train_images = torch.load('win15Thresh210DigitData.pkl')
test_images = pd.read_pickle('test_max_x')

print(train_labels.head())
print(test_images.shape)

   Id  Label
0   0      6
1   1      7
2   2      2
3   3      9
4   4      7
(10000, 128, 128)


In [13]:
y_pred = []
for img in train_images:
    digit_pred = [];
    if(len(img)==0):
        y_pred.append(-1)
    else: 
        for digit in img:
            digit = np.where(digit > 0, 255, 0)
            digit = digit/255;
    #         print(np.argmax(model.predict(np.reshape(digit, (-1, 28, 28, 1)))))
    #         plt.imshow(digit, cmap='gray')
    #         plt.show()

            digit_pred.append(np.argmax(model.predict(np.reshape(digit, (-1, 28, 28, 1)))))
        y_pred.append(np.amax(digit_pred));

In [14]:
print(accuracy_score(train_labels['Label'], y_pred))

0.68258
