In [370]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [371]:
import pandas
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import scipy.io as sio
from sklearn.utils import shuffle

In [372]:
M = sio.loadmat("/content/drive/MyDrive/MAT4373/mnist_all.mat")
M.keys()

dict_keys(['__header__', '__version__', '__globals__', 'train0', 'test0', 'train1', 'test1', 'train2', 'test2', 'train3', 'test3', 'train4', 'test4', 'train5', 'test5', 'train6', 'test6', 'train7', 'test7', 'train8', 'test8', 'train9', 'test9'])

Concatenate the training data

In [373]:
training_data = np.concatenate([M['train0'], M['train1'], M['train2'], M['train3'], M['train4'], M['train5'], M['train6'], M['train7'], M['train8'], M['train9']], axis = 0)

insert 1 for bias

In [374]:
training_data = np.insert(training_data,0,1,axis=1)

In [375]:
training_data.shape

(60000, 785)

In [376]:
training_data

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]], dtype=uint8)

Concatenate the testing data

In [377]:
testing_data = np.concatenate([M['test0'], M['test1'], M['test2'], M['test3'], M['test4'], M['test5'], M['test6'], M['test7'], M['test8'], M['test9']], axis = 0)

In [378]:
testing_data = np.insert(testing_data,0,1,axis=1)

In [379]:
testing_data.shape

(10000, 785)

Create the training labels

In [380]:
training_size = [M['train0'].shape[0], M['train1'].shape[0], M['train2'].shape[0], M['train3'].shape[0], M['train4'].shape[0], M['train5'].shape[0], M['train6'].shape[0], M['train7'].shape[0], M['train8'].shape[0], M['train9'].shape[0]]
training_size

[5923, 6742, 5958, 6131, 5842, 5421, 5918, 6265, 5851, 5949]

In [381]:
training_target = []
for index, value in enumerate(training_size):
    for i in range(value):
        training_target.append(index)

Create the testing labels

In [382]:
testing_size = [M['test0'].shape[0], M['test1'].shape[0], M['test2'].shape[0], M['test3'].shape[0], M['test4'].shape[0], M['test5'].shape[0], M['test6'].shape[0], M['test7'].shape[0], M['test8'].shape[0], M['test9'].shape[0]]
testing_size

[980, 1135, 1032, 1010, 982, 892, 958, 1028, 974, 1009]

In [383]:
testing_target = []
for index, value in enumerate(testing_size):
    for i in range(value):
        testing_target.append(index)

In [384]:
np.unique(np.asarray(testing_target), return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([ 980, 1135, 1032, 1010,  982,  892,  958, 1028,  974, 1009]))

In [385]:
training_data, training_target = shuffle(training_data, training_target, random_state=10)

In [386]:
testing_data, testing_target = shuffle(testing_data, testing_target, random_state=20)

In [387]:
import numpy as np 
import math

np.random.seed(0)

class Layer_Dense:
    def __init__(self, n_inputs=784, n_neurons=10):
        self.weights = 0.10 * np.random.randn(n_inputs, n_neurons)
        # set bias to 0
        self.weights = np.insert(self.weights, 0,0, axis=0)

    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights)

In [388]:
class Activation_Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities

In [389]:
# calculate log loss
def calculate_loss(y_pred, y_true):
  # clip values to prevent infinity problems
  y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
  correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)
  # Calculate the negative log value to find the loss for each element in the batch
  negative_log_likelihoods = -np.log(correct_confidences)
  # Calculate the average loss of the batch using the mean
  return np.mean(negative_log_likelihoods)



In [390]:
# one-hot encode the target variable
def encode(target, size=10):
    encoded = np.zeros(shape=(target.shape[0],size))
    for i in range(target.shape[0]):
        encoded[i][target[i]] = 1
    return encoded

In [391]:
# # def encode(target, size=10):
# #     encoded = np.zeros(shape=(size))
# #     encoded[target] = 1
# #     return encoded
    
    
# def calculate_derivatives(probabilities,input,target):
#     # probabilities is of shape (50, 10)
#     # input is of shape (50, 785)
#     # target is of shape (50)
    
#     # # initial a np array of shape (785,10) to store the sum of the derivatives
#     # sum_derivatives = np.zeros(shape=(input.shape[1], probabilities.shape[1]))
#     # # Batch number = 50
#     # batch_number = input.shape[0]
#     # # Loop through the batch
#     # for i in range(batch_number):
#     #     # Initialize a np array of shape (785,10) to zero
#     #     derivative = np.zeros(shape=(input.shape[1], probabilities.shape[1]))
#     #     # Consider each input with its corresponding output probabilities
#     #     current_input = input[i]
#     #     current_probability = probabilities[i]
#     #     # Loop through all the 785 elements of  each input
#     #     for j in range(current_input.shape[0]):
#     #         product = current_input[j] * (current_probability - encode(target[i]))
#     #         derivative[j] = product
#     #     sum_derivatives = sum_derivatives + derivative
#     # return sum_derivatives/batch_number
    
#     # probabilities is of shape (50, 10)
#     # input is of shape (50, 785)
#     # target is of shape (50,10)
#     # sum_derivatives is of size (50, 785, 10)
#     derivatives = np.zeros(shape=(50,785,10))
#     w = probabilities - target
#     for i in range(50):
#       # current_w contains 10 outputs for each 10 neurons
#       current_w = w[i]
#       # current_input contains 785 entries
#       current_input = input[i]
#       for j in range(785):
#         input_entry = current_input[j]
#         derivative = input_entry * current_w
#         derivatives[i][j] = derivative
#     avg_derivative = np.mean(derivatives,axis=0)
#     return avg_derivative
      





# def adjust_weights(derivative, previous_weights, alpha = 0.01):
#     new_weights = previous_weights - (alpha * derivative)
#     return new_weights
        

In [392]:
  
def calculate_derivatives(probabilities,input,target):
  batch_size = 50
  input_size = 785
  output_size = 10
  derivatives = np.zeros(shape=(batch_size,input_size,output_size))
  diff = probabilities - target
  for i in range(batch_size):
    # current_diff contains 10 outputs for each 10 neurons
    current_diff = diff[i]
    # current_input contains 785 entries
    current_input = input[i]
    for j in range(input_size):
      input_entry = current_input[j]
      derivative = input_entry * current_diff
      derivatives[i][j] = derivative
  avg_derivative = np.mean(derivatives,axis=0)
  return avg_derivative
    
def adjust_weights(derivative, previous_weights, alpha = 0.01):
    new_weights = previous_weights - (alpha * derivative)
    return new_weights

In [397]:
# train the neural network
def train(training_data, training_target, layer, activation, batch_size=50):
  
  total_loss = 0
  for i in range(0,training_data.shape[0],batch_size):
    batch = training_data[i:i+ batch_size]
    batch_target = training_target[i: i + batch_size]
    batch_target = encode(np.array(batch_target))
    
    # forward pass
    layer.forward(batch)
    logits = layer.output
    activation.forward(logits)
    probabilities = activation.output 

    # calculate loss for current batch
    loss = calculate_loss(probabilities, batch_target)
    total_loss += loss
    # backward pass
    derivative = calculate_derivatives(probabilities, batch, batch_target)
    new_weights = adjust_weights(derivative,layer.weights)
    layer.weights = new_weights
    print(i)
  return total_loss,layer

In [394]:
# Use the trained network to make predictions

def test(testing_data, testing_target,layer,activation, batch_size=50):
  predictions = []
  for i in range(0,testing_data.shape[0],batch_size):
    test_batch = testing_data[i: i + batch_size]
    test_batch_target = testing_target[i: i + batch_size]
    test_batch_target = encode(np.array(testing_target[i: i + batch_size]))

    # forward pass
    layer.forward(test_batch)
    logits = layer.output
    activation.forward(logits)
    probabilities = activation.output

    # obtain predicted values
    prediction = np.argmax(probabilities, axis=1)
    predictions.extend(prediction.tolist())
  return predictions
  

In [None]:
layer = Layer_Dense()
activation = Activation_Softmax()
total_loss, trained_layer = train(training_data, training_target, layer, activation)
predictions = test(testing_data, testing_target,trained_layer,activation)

In [399]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(testing_target, predictions))

[[ 921    0   21    7    2   19    6    3    1    0]
 [   0 1118    4    3    0    3    2    2    3    0]
 [   2    9  962   10    5    8    9    9   13    5]
 [   0    0   32  906    1   48    1    6    8    8]
 [   1    2   15    3  912    1    8    2    6   32]
 [   2    1   10   45   11  781    8    8   22    4]
 [  10    3   18    3   11   32  877    2    2    0]
 [   4   10   46   14    8    1    1  919    2   23]
 [   4   17   42   46   25   71    7   12  735   15]
 [   7    7   10   11   53   22    0   22    1  876]]


In [400]:
print(classification_report(testing_target, predictions))

              precision    recall  f1-score   support

           0       0.97      0.94      0.95       980
           1       0.96      0.99      0.97      1135
           2       0.83      0.93      0.88      1032
           3       0.86      0.90      0.88      1010
           4       0.89      0.93      0.91       982
           5       0.79      0.88      0.83       892
           6       0.95      0.92      0.93       958
           7       0.93      0.89      0.91      1028
           8       0.93      0.75      0.83       974
           9       0.91      0.87      0.89      1009

    accuracy                           0.90     10000
   macro avg       0.90      0.90      0.90     10000
weighted avg       0.90      0.90      0.90     10000



In [401]:
from sklearn.metrics import accuracy_score
print(accuracy_score(testing_target, predictions))

0.9007


In [402]:
total_loss

3067.999129093812