# **MAIS 202**
Team members: Ahmad Ghawanmeh, Feng Xia, Tahseen Bin Taj

Team name: Deeply Convoluted Team


## Loading Required Sources
Loading required libraries, tensorboard and google drive link from google colab.

In [1]:
#Importing the required libraries 
import numpy as np
import pandas as pd
import keras
import h5py

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model, Sequential
from keras.layers import Dense, Dropout, Flatten, BatchNormalization, MaxPool2D, Convolution2D
from keras.callbacks import ModelCheckpoint

from scipy.ndimage import gaussian_filter
from skimage import img_as_float
from skimage.morphology import reconstruction

In [2]:
from google.colab import drive
import os
drive.mount('/content/drive')
os.chdir("/content/drive/My Drive/")

Mounted at /content/drive


# Loading Modified MNIST Dataset and Data Augmentation


Uncomment these code to download train and test datasets in your google drive if they haven't be downloaded

In [3]:
#os.chdir("/content/drive/My Drive/")
#from google.colab import files
#files.upload() #this will prompt you to update the json

#!pip install -q kaggle
#!mkdir -p ~/.kaggle
#!cp kaggle.json ~/.kaggle/
#!ls ~/.kaggle
#!chmod 600 /root/.kaggle/kaggle.json  # set permission
#!cd ~/.kaggle/kaggle.json

#!kaggle competitions download -c mais-202-fall-2020-kaggle-competition

#os.chdir('/content/drive/My Drive')  #change dir
#!mkdir train  #create a directory named train/
#!mkdir test  #create a directory named test/

#!unzip -q train_x.npy.zip -d train/  #unzip data in train/
#!unzip -q test_x.npy.zip -d test/  #unzip data in test/

In [4]:
#Loading the data
train_x = np.load("./train/train_x.npy")
test_x = np.load("./test/test_x.npy")
y_train = pd.read_csv("train_y.csv")['Label']

In [5]:
# removing the background noise for each image
#for i in range(train_x.shape[0]):
#  image = img_as_float(train_x[i])
#  image = gaussian_filter(image, 0.5)

#  seed = np.copy(image)
#  seed[1:-1, 1:-1] = image.min()
#  mask = image

#  dilated = reconstruction(seed, mask, method='dilation')
#  dilated_image = image - dilated
#  train_x[i] = dilated_image

In [6]:
# One hot encoding
y_train = to_categorical(y_train)

In [7]:
# Reshaping the data
train_x=train_x.reshape(train_x.shape[0],128,128,1)
test_x=test_x.reshape(test_x.shape[0],128,128,1)

In [8]:
# splitting the data for training and validating
X_train, X_test, y_train, y_test = train_test_split(train_x, y_train, test_size=0.1, random_state=77)

In [9]:
# augmenting the pictures
aug = ImageDataGenerator(
    rotation_range=25, 
    width_shift_range=0.15,
    shear_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2
    )

aug.fit(X_train)

# Model Training

In [10]:
model = Sequential()
model.add(Convolution2D(32, kernel_size=3, padding="same", activation='relu', input_shape=(128,128,1),
                            data_format="channels_last"))
model.add(Convolution2D(32, kernel_size=3, padding="same", activation='relu'))
model.add(Convolution2D(32, kernel_size=3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Convolution2D(64, kernel_size=3, padding="same", activation='relu'))
model.add(Convolution2D(64, kernel_size=3, padding="same", activation='relu'))
model.add(Convolution2D(64, kernel_size=3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Convolution2D(128, kernel_size=3, padding="same", activation='relu'))
model.add(Convolution2D(128, kernel_size=3, padding="same", activation='relu'))
model.add(Convolution2D(128, kernel_size=3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Convolution2D(256, kernel_size=3, padding="same", activation='relu'))
model.add(Convolution2D(256, kernel_size=3, padding="same", activation='relu'))
model.add(Convolution2D(256, kernel_size=3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
    
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

best_model="best_model.hdf5"
checkpoint = ModelCheckpoint(best_model, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [None]:
model.fit_generator(aug.flow(X_train, y_train, batch_size=400),
                              validation_data=(X_test, y_test),
                              epochs = 40, callbacks=callbacks_list )

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/40

Epoch 00001: val_accuracy improved from -inf to 0.22375, saving model to best_model.hdf5
Epoch 2/40
Epoch 00002: val_accuracy improved from 0.22375 to 0.23375, saving model to best_model.hdf5
Epoch 3/40
Epoch 00003: val_accuracy did not improve from 0.23375
Epoch 4/40
Epoch 00004: val_accuracy did not improve from 0.23375
Epoch 5/40
Epoch 00005: val_accuracy improved from 0.23375 to 0.25475, saving model to best_model.hdf5
Epoch 6/40
Epoch 00006: val_accuracy did not improve from 0.25475
Epoch 7/40
Epoch 00007: val_accuracy improved from 0.25475 to 0.40625, saving model to best_model.hdf5
Epoch 8/40
Epoch 00008: val_accuracy improved from 0.40625 to 0.64875, saving model to best_model.hdf5
Epoch 9/40
Epoch 00009: val_accuracy improved from 0.64875 to 0.70500, saving model to best_model.hdf5
Epoch 10/40
Epoch 00010: val_accuracy improved from 0.70500 to 0.73725, saving model to best_model.hdf5
Epoch 

# Accuracy Calculation

In [None]:
model = load_model(best_model)
results = np.argmax(model.predict(X_test),axis = 1)

In [None]:
y_test=np.argmax(y_test, axis=1)

In [None]:
# print the accuracy of the test set
print (f"accuracy: {accuracy_score(results, y_test)*100}%")

# Result Storage

In [None]:
results = np.argmax(model.predict(test_x),axis = 1)
res_pd = pd.DataFrame(data={"Id":range(results.shape[0]), "Label":results})
res_pd.to_csv("test_y.csv",index=False,header=True)