COMP5318 Assignment2 Best Model

README

If you want to use the pre-trained model, you only need to upload the pre-trained model, then run the import data part and the data preprocessing part(Normalisation and Increasing the Background Area) in this file, and then find the last part (use the pre-trained model). Otherwise, you can run all the code retraining models in sequence.

# 1. Import Data

In [1]:
# Code to download file into Colaboratory:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

#get the data from the drive
id = '1BMXvqjvrF4cm1J5D0AvITo9jgScUrZW9'
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('emnist-byclass.mat')

In [2]:
# Download data
from scipy import io as sio
mat = sio.loadmat('emnist-byclass.mat')
data = mat['dataset']

X_train = data['train'][0,0]['images'][0,0]
y_train = data['train'][0,0]['labels'][0,0]
X_test = data['test'][0,0]['images'][0,0]
y_test = data['test'][0,0]['labels'][0,0]

In [None]:
print(X_train.shape)
print(X_test.shape)

(697932, 784)
(116323, 784)


# 2. Data preprocessing

## Normalisation

In [3]:
# Normalization
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler() # creating an object
scaler.fit(X_train) # calculate min and max value of the training data

X_train = scaler.transform(X_train) # apply normalization to the training set

X_test = scaler.transform(X_test) # apply normalization to the test set

## Increasing the Background Area Around the Bodies of the Characters

In [4]:
# Background Area for trainingset
import cv2
import matplotlib.pyplot as plt
import numpy as np

X_train = X_train.reshape((X_train.shape[0],28, 28))
X_train = [cv2.copyMakeBorder(i, 6, 6, 6, 6,borderType=cv2.BORDER_REPLICATE) for i in X_train]
X_train = np.array(X_train)
X_train.shape

(697932, 40, 40)

In [5]:
# Background Area for testset
import cv2
import matplotlib.pyplot as plt
import numpy as np

X_test = X_test.reshape((X_test.shape[0],28, 28))
X_test = [cv2.copyMakeBorder(i, 6, 6, 6, 6,borderType=cv2.BORDER_REPLICATE) for i in X_test]
X_test = np.array(X_test)
X_test.shape

(116323, 40, 40)

# 3. Model architecture

## Train data and validation data

In [None]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import time
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split

# Make the notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)
keras.backend.clear_session()

# Set up plotting and figure/axis labels
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

In [None]:
# Split data into training set and validation set.
#X_train = X_train.reshape((X_train.shape[0],28, 28))
#X_test = X_test.reshape((X_test.shape[0],28, 28))

X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, train_size=0.9)

print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of X_valid: {X_valid.shape}")
print(f"Shape of X_test: {X_test.shape}")

Shape of X_train: (628138, 40, 40)
Shape of X_valid: (69794, 40, 40)
Shape of X_test: (116323, 40, 40)


In [None]:
# Add an extra dimension to our data
X_train = np.expand_dims(X_train, -1)
X_valid = np.expand_dims(X_valid, -1)
X_test = np.expand_dims(X_test, -1)

print(f"New shape of X_train: {X_train.shape}")

New shape of X_train: (628138, 40, 40, 1)


## Alexnet

In [None]:
# build Alexnet
alexnet_model = keras.Sequential([
    
    # Specify the input shape
    keras.Input(shape=(40, 40, 1)),
    
    # Conv and pool block 1
    keras.layers.Conv2D(48, kernel_size=(3, 3),activation="relu",padding='valid'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling2D(pool_size=(3, 3),strides=2),
    
    # Conv and pool block 2
    keras.layers.Conv2D(128, kernel_size=(3, 3),activation="relu",padding='same'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling2D(pool_size=(3, 3),strides=2),
    
    # Conv and pool block 3
    keras.layers.Conv2D(filters=192,kernel_size=(3,3),activation='relu',padding='same'),
    keras.layers.Conv2D(filters=192,kernel_size=(3,3),activation='relu',padding='same'),
    keras.layers.Conv2D(filters=128,kernel_size=(3,3),activation='relu',padding='same'),
    keras.layers.MaxPooling2D(pool_size=(3, 3), strides=2),

    # Flatten and classify using dense output layer
    keras.layers.Flatten(),
    keras.layers.Dense(256,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(256,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(62, activation="softmax"),
])

In [None]:
# Compile alexnet_model with loss, optimiser, and metrics
# Fine-tune according to the design of report
# the parameter for best model: optimizers:Adam learning rate:0.0001 batch size:128
opt = keras.optimizers.Adam(learning_rate=0.0001)

alexnet_model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

batch_size = 128

In [None]:
"""Running this cell could take several minutes"""
import time
# In order to make the best model perform better, epoch is changed to 15.
# Train the alexnet_model for 15 epochs
epochs = 15

star_time = time.time()

history = alexnet_model.fit(X_train, y_train, batch_size=batch_size,
                    epochs=epochs, validation_data=(X_valid, y_valid))

end_time = time.time()

print(end_time-star_time)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
929.1870260238647


In [None]:
# Evaluate the classifier on the test data.
loss, accuracy = alexnet_model.evaluate(X_test, y_test)
print(f"Accuracy on test data: {accuracy:.4f}")

Accuracy on test data: 0.8771


In [None]:
# load model
import torch
torch.save(alexnet_model, './group66_pretrained_model.pt')

INFO:tensorflow:Assets written to: ram://a9fdfa91-1941-4cbc-9b07-85659ea56879/assets


# Use the pretrainded model

In [7]:
# The accuracy of this model is better than that in report
import tensorflow
best_model = torch.load("group66_pretrained_model.pt")
loss, accuracy = best_model.evaluate(X_test, y_test)
print(f"Accuracy on test data: {accuracy:.4f}")

Accuracy on test data: 0.8771
