In [1]:
# Read Fashion MNIST dataset

import util_mnist_reader
X_train, y_train = util_mnist_reader.load_mnist('../data/fashion', kind='train')
X_test, y_test = util_mnist_reader.load_mnist('../data/fashion', kind='t10k')

In [2]:
# import necessary libraries here
import numpy as np
import pandas as pd
from sklearn import model_selection
import matplotlib.pyplot as plt

%matplotlib inline

In [3]:
# use train_test_split to extract a small portion of training data for validation as well
# X_train, X_val, y_train, y_val = model_selection.train_test_split(X_train, y_train, test_size = 0.05, random_state = 7)

In [5]:
class NeuralNet():
    def __init__(self, input_columns, num_hidden, num_classes):
        self.W1 = np.random.randn(input_columns, num_hidden)
        self.b1 = np.random.randn(1, num_hidden)
        self.W2 = np.random.randn(num_hidden, num_classes)
        self.b2 = np.random.randn(1, num_classes)
    
    def sigmoid(self, X):
        return (1 / (1 + np.exp(-X)))
    
    def ReLU(self, X):
        return np.maximum(0, X)
    
    def tanH(self, X):
        pass
    
    def softmax(self, X):
        exps = np.exp(X - np.max(X))
        return exps / np.sum(exps)
    
    def forward_pass(self, X, y, hidden_activation):
        hidden_activation = hidden_activation.lower()
        z1 = np.dot(X, self.W1) + self.b1
        if(hidden_activation == "sigmoid"):
            a1 = self.sigmoid(z1)
        elif(hidden_activation == "relu"):
            a1 = self.ReLU(z1)
        elif(hidden_activation == "tanh"):
            a1 = self.tanH(z1)
        else: 
            raise ValueError(hidden_activation)
        z2 = np.dot(a1, self.W2) + self.b2
        a2 = self.softmax(z2)
        return a1, a2
    
    def calc_loss(self, fp_result, y):
        num_samples = y.shape[0]
        
        log_likelihood = -np.log(fp_result[range(num_samples), y])
        loss = np.sum(log_likelihood) / num_samples
        return loss
    
    def backPropagation(self, X, y, activation1_result, fp_result):
        num_samples = X.shape[0]
        
        fp_result[range(num_samples), y] -= 1
        fp_result = fp_result / num_samples
        
        dz2 = fp_result
        
        dW2 = np.dot(activation1_result.T, dz2)
        db2 = np.sum(dz2, axis=0, keepdims=True)
        
        dz1 = np.dot(dz2, self.W2.T)
        dz1[activation1_result <= 0] = 0
        
        dW1 = np.dot(X.T, dz1)
        db1 = np.sum(dz1, axis=0, keepdims=True)
        
        return dW1, db1, dW2, db2
        
    
    def train_model(self, X, y, X_val, y_val, hidden_activation = "relu", epochs = 10, learning_rate = 0.01):
        train_loss = []
        num_samples = X.shape[0]
        
        for epoch in range(epochs):
            activation1_result, fp_result = self.forward_pass(X, y, hidden_activation)
            train_loss.append(self.calc_loss(fp_result, y))
            dW1, db1, dW2, db2 = self.backPropagation(X, y, activation1_result, fp_result)
            
            self.W1 -= learning_rate * dW1
            self.b1 -= learning_rate * db1
            self.W2 -= learning_rate * dW2
            self.b2 -= learning_rate * db2
        return train_loss
    
    def predict(self, X, hidden_activation = "relu"):
        hidden_activation = hidden_activation.lower()
        z1 = np.dot(X, self.W1) + self.b1
        a1 = self.ReLU(z1)
        z2 = np.dot(a1, self.W2) + self.b2
        y_pred = np.argmax(z2, axis=1)
        return y_pred