# Training using Neural Networks from scratch

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import util_mnist_reader
import seaborn as sns
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPool2D
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from keras import optimizers
from keras.models import Sequential

In [None]:
X, y = util_mnist_reader.load_mnist('data/fashion', kind='train')
X_test, y_test = util_mnist_reader.load_mnist('data/fashion', kind='t10k')

In [None]:
def one_hot(y, n_labels):
    mat = np.zeros((len(y), n_labels))
    for i, val in enumerate(y):
        mat[i, val] = 1
    return mat

In [None]:
X = X/255.0
y = y.reshape(-1,1)
X_test = X_test/255.0

In [None]:
y = one_hot(y,10)

In [None]:
class nn_classifier():
    def __init__(self, epochs, alpha, n_batches,hidden_layer_nodes):
        self.epochs = epochs
        self.alpha = alpha
        self.n_batches = n_batches
        self.hidden_layer_nodes = hidden_layer_nodes
        self.w1, self.w2 = self.weight_init()
        self.preds = []
        self.error = []
        self.losstrack = []
        self.accuracy_track = []

    def weight_init(self):
        w1 = np.random.uniform(-1.0, 1.0,size=(self.hidden_layer_nodes, 784))        
        w2 = np.random.uniform(-1.0, 1.0,size=(10, self.hidden_layer_nodes))
        return w1,w2
        
    def score(self,preds,actuals):
        return np.sum(actuals == preds, axis=0) / float(actuals.shape[0])
    
    def cross_entropy(self, y_target, outputs):
#         print(y_target.shape,"Y")
#         print(outputs.shape,"out")
        return -np.sum(sp.log(outputs) * y_target, axis=1)
    
    def softmax(self,z):
        sum = np.sum(np.exp(z.T), axis=1).reshape(-1,1)
        return (np.exp(z.T) / sum).T
    
    def sigmoid(self,z):
        return 1 / (1 + np.exp(-z.astype(float)))
    
    def sigmoid_prime(self,z):
        a1 = self.sigmoid(z)
        return a1 * (1 - a1)
    
    def get_argmax(self,y):
        return np.argmax(y, axis=1)
    
    def forward_pass(self, X):
        z1 = self.w1.dot(X.T)
        a1 = self.sigmoid(z1)
        z2 = self.w2.dot(a1)
        a2 = self.softmax(z2)
        return z1,a1,z2,a2
        
    def error(self, y_actual, y_preds):
        error = cross_entropy(y_actual, y_preds)
        return 0.5 * np.mean(error)
    
    def backward_pass(self,X_train,y_train,a1,a2,z1):
        intermediate_1 = a2 - y_train.T
        intermediate_2 = self.w2.T.dot(intermediate_1) * self.sigmoid_prime(z1)
        dw1 = intermediate_2.dot(X_train)
        dw2 = intermediate_1.dot(a1.T)
        self.w1 -= self.alpha * dw1
        self.w2 -= self.alpha * dw2
        
    def predict(self,X):
        z1 = self.w1.dot(X.T)
        a1 = self.sigmoid(z1)
        z2 = self.w2.dot(a1)
        a2 = self.softmax(z2)
        return self.get_argmax(a2.T)
        
    def fit(self,X,y):
        X_train = X.copy()
        y_train = y.copy()
        X_batch = np.array_split(X_train, self.n_batches)
        y_batch = np.array_split(y_train, self.n_batches)
        for i in range(self.epochs):
            preds = []
            errors = []
            for X_train,y_train in zip(X_batch, y_batch):
                z1,a1,z2,a2 = self.forward_pass(X_train)
                self.backward_pass(X_train,y_train,a1,a2,z1)
                
                error = self.cross_entropy(y_train,a2.T)
                errors.append(0.5 * np.mean(error))
        
                y_preds = self.get_argmax(a2.T)
                y_actual = self.get_argmax(y_train)
                preds.append(self.score(y_preds,y_actual))
                
            self.losstrack.append(np.mean(errors))
            self.accuracy_track.append(np.mean(preds))
            
    def plot_loss(self,):
        plt.title("Loss vs. Epochs")
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.plot(self.losstrack, label="Loss")
        plt.legend()
        plt.show()
    def plot_accuracy(self,):
        plt.title("Accuracy vs. Epochs")
        plt.xlabel("Epochs")
        plt.ylabel("Accuracy")
        plt.plot(self.accuracy_track, label="Accuracy")
        plt.legend()
        plt.show()

In [None]:
model = nn_classifier(500,10e-4,15,300)
model.fit(X,y)
model.plot_loss()
model.plot_accuracy()

In [None]:
y_test_preds = model.predict(X_test)
test_accuracy = model.score(y_test_preds, y_test)
test_accuracy

In [None]:
cm = confusion_matrix(y_test, y_test_preds)
plt.figure(figsize=(10,10))
sns.heatmap(cm, annot=True)

In [None]:
report = classification_report(y_test, y_test_preds)
print(report)

## Changed Learning rate to 10e-5 and Epochs to 100

In [None]:
model = nn_classifier(100,10e-5,15,300)
model.fit(X,y)
model.plot_loss()
model.plot_accuracy()

In [None]:
y_test_preds = model.predict(X_test)
test_accuracy = model.score(y_test_preds, y_test)
test_accuracy

In [None]:
cm = confusion_matrix(y_test, y_test_preds)
plt.figure(figsize=(10,10))
sns.heatmap(cm, annot=True)

In [None]:
report = classification_report(y_test, y_test_preds)
print(report)

# Training using Keras Neural Network Model

In [None]:
X_train, y_train = util_mnist_reader.load_mnist('data/fashion', kind='train')
X_test, y_test = util_mnist_reader.load_mnist('data/fashion', kind='t10k')

In [None]:
model_nn = Sequential()
model_nn.add(Dense(units = 300, activation = 'sigmoid', input_dim = 784))
model_nn.add(Dense(units = 200, activation = 'sigmoid'))
model_nn.add(Dense(units = 75, activation = 'sigmoid'))
model_nn.add(Dense(units = 10, activation = 'softmax'))

In [None]:
model_nn.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])

In [None]:
train_model = model_nn.fit(X_train, one_hot(y_train, 10), epochs = 300, batch_size = 60)

In [None]:
print(model_nn.evaluate(X_test, one_hot(y_test, 10), batch_size = 25))

In [None]:
plt.plot(train_model.history['accuracy'])
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("With Learning rate 0.01")

In [None]:
plt.plot(train_model.history['loss'])
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("With Learning rate 0.01")

In [None]:
y_test_preds = model_nn.predict_classes(X_test)
cm = confusion_matrix(y_test, y_test_preds)
sns.heatmap(cm, annot=True)

In [None]:
report = classification_report(y_test, y_test_preds)
print(report)

In [None]:
model_nn = Sequential()
model_nn.add(Dense(units = 200, activation = 'sigmoid', input_dim = 784))
model_nn.add(Dense(units = 100, activation = 'sigmoid'))
model_nn.add(Dense(units = 50, activation = 'sigmoid'))
model_nn.add(Dense(units = 10, activation = 'softmax'))
optimizers.sgd(learning_rate=10e-4)
model_nn.compile(loss='categorical_crossentropy',optimizer='sgd', metrics=['accuracy'])

In [None]:
train_model = model_nn.fit(X_train, one_hot(y_train, 10), epochs = 300, batch_size = 60)

In [None]:
print(model_nn.evaluate(X_test, one_hot(y_test, 10), batch_size = 25))

In [None]:
plt.plot(train_model.history['accuracy'])
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("With Learning rate 0.0001")

In [None]:
plt.plot(train_model.history['loss'])
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("With Learning rate 0.0001")

In [None]:
y_test_preds = model_nn.predict_classes(X_test)
cm = confusion_matrix(y_test, y_test_preds)
plt.figure(figsize=(10,10))
sns.heatmap(cm, annot=True)

In [None]:
report = classification_report(y_test, y_test_preds)
print(report)

# Training with Convolution Neural Networks

In [None]:
X, y = util_mnist_reader.load_mnist('data/fashion', kind='train')
X_test, y_test = util_mnist_reader.load_mnist('data/fashion', kind='t10k')

In [None]:
X = X/255.0
y = y.reshape(-1,1)
X_test = X_test/255.0

In [None]:
X_train = X.reshape(60000,28,28,1)
X_test = X_test.reshape(10000,28,28,1)

In [None]:
y_test = one_hot(y_test,10)

In [None]:
model_cnn = Sequential()
model_cnn.add(Conv2D(200, kernel_size = 3, activation = 'sigmoid', input_shape = (28, 28, 1)))
model_cnn.add(MaxPool2D())
model_cnn.add(Conv2D(300, kernel_size = 3, activation = 'sigmoid'))
model_cnn.add(MaxPool2D())
model_cnn.add(Flatten())
model_cnn.add(Dense(units = 10, activation = 'softmax'))

In [None]:
model_cnn.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
train_model = model_cnn.fit(X_train, one_hot(y_train, 10), epochs = 10, batch_size = 600)

In [None]:
print(loss_acc = model.evaluate(X_test, one_hot(y_test, 10), batch_size = 25))

In [None]:
plt.plot(train_model.history['acc'])
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("With Learning rate 0.01")

In [None]:
plt.plot(train_model.history['loss'])
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("With Learning rate 0.01")

In [None]:
y_test_preds = model.predict_classes(X_test)
cm = confusion_matrix(y_test, test_preds)
print(cm)
report = classification_report(y_test, y_test_preds)
print(report)

In [None]:
model_cnn = Sequential()
model_cnn.add(Conv2D(100, kernel_size = 3, activation = 'sigmoid', input_shape = (28, 28, 1)))
model_cnn.add(MaxPool2D())
model_cnn.add(Conv2D(50, kernel_size = 3, activation = 'sigmoid'))
model_cnn.add(MaxPool2D())
model_cnn.add(Flatten())
model_cnn.add(Dense(units = 10, activation = 'softmax'))
optimizers.sgd(lr = 10e-4)
model_cnn.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
train_model = model_cnn.fit(X_train, one_hot(y_train, 10), epochs = 200, batch_size = 600)

In [None]:
plt.plot(train_model.history['acc'])
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("With Learning rate 0.0001")

In [None]:
plt.plot(train_model.history['loss'])
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("With Learning rate 0.0001")

In [None]:
test_preds = model.predict_classes(X_test)
cm = confusion_matrix(y_test, test_preds)
print(cm)
report = classification_report(y_test, test_preds)
print(report)