# 1. Neural network
#### In this one of the most important assignments of the program, you are expected to build a neural network from scratch and train it on the MNIST data set. Some template code has been provided as help.

#### You will create an instance of your NN class with 4 arguments: number of input layer neurons, number of hidden layer neurons, number of output layer neurons, and learning rate.

#### While initializing the instance, create two weight matrices of shapes (#hidden, #input) and (#output, #hidden).

#### You also need to define `train()` and `predict_proba()` functions. It may be more convenient to code prediction function first as it is forward propagation part of the train function.

#### Once you have coded the prediction function, you have the first part (forward propagation) of the train function. Now you need to find the error (final outputs - targets) and update the weights.

In [1]:
import numpy as np

In [2]:
def sigmoid(a):
    return np.reciprocal(np.add(1, np.exp(np.negative(a))))

In [3]:
def cost(dataset, w):
    h = sigmoid(np.matmul(w.T, x.T))
    h = h.T

    loss = np.matmul(y, np.log(h)) + np.matmul(1-y, np.log(1 - h))

    j = (-1/np.size(y)) * loss
    j = j[0]
    return j

In [4]:
# cost_evolution = []

#     h = sigmoid(np.matmul(x, weights.T))
#     loss = h - y.flatten()
#     weights = weights - alpha*np.matmul(loss, x)/y.size
#     if (i%100 == 0):
#         print((i/n)*100)

# cost_evolution.append(cost(ndf, weights))

In [5]:
class MyNN:
    
    def __init__(self, n_input_layers, n_hidden_layers, n_output_layer, alpha,
                 n_iterations):
        self.n_input_layers = n_input_layers
        self.n_hidden_layers = n_hidden_layers
        self.n_output_layer = n_output_layer
        self.alpha = alpha
        
        self.weights1 = np.random.rand(n_hidden_layers, n_input_layers)
        self.weights2 = np.random.rand(n_output_layer, n_hidden_layers)
        
        self.activation_function = sigmoid
        
        #added by me
        self.train_history = []
        self.n_iterations = n_iterations
        
    def predict_proba(self, features):
        #neuron function, forward propagation
        z1 = np.matmul(self.weights1, features)
        z1 = self.activation_function(z1)
        new_features = z1
        z2 = np.matmul(self.weights2, new_features)
        z2 = self.activation_function(z2)

        return z1, z2
    
    def train(self, features, labels):
        h_pred, y_pred = self.predict_proba(features)
        y_pred = y_pred.reshape(-1, 1)
        h_pred = h_pred.reshape(-1, 1)
        loss_y = labels - y_pred
        weights2 = self.weights2 - (self.alpha*np.matmul((self.weights2).T, loss_y*y_pred*(1 - y_pred))).T

        #might be error in the next 2 lines
        weights2_pred = self.activation_function(np.matmul(self.weights1,features)) 
        loss_h = weights2_pred - h_pred
        weights1 = self.weights1 - (self.alpha*np.matmul((self.weights1).T, loss_h*h_pred*(1 - h_pred))).T

# 2. Training on MNIST
#### Initialize your NN object with 784 input neurons, 200 hidden neurons, and 10 output neurons. Set learning rate to 0.1.

In [6]:
input_neurons = 784
hidden_neurons = 200
output_neurons = 10

learning_rate = 0.1
iterations = 1000

my_NN = MyNN(n_input_layers=input_neurons, n_hidden_layers=hidden_neurons,
             n_output_layer=output_neurons, alpha=learning_rate, 
             n_iterations= iterations)

#### Load the MNIST train data set.

In [7]:
import pandas as pd
df = pd.read_csv('mnist_train.csv', header=None)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
96,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
97,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
98,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
x_train = df.iloc[:, 1:]
y_train = df.iloc[:, 0]
x_train = x_train.to_numpy()
y_train = y_train.to_numpy()

#### Train your NN on the data set for given number of epochs.

In [9]:
def encode(index):
    a = np.zeros(10)
    a[index] = 1
    return a

In [10]:
encode(5)

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])

In [11]:
epochs = 10

for e in range(epochs):
    for i in range(x_train.shape[0]):
        my_NN.train(x_train[i, :], encode(y_train[i]))

#### Load the MNIST test set and test your NN.

In [12]:
df2 = pd.read_csv('mnist_test.csv', header=None)
df2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [13]:
x_test = df.iloc[:, 1:]
y_test = df.iloc[:, 0]
x_test = x_test.to_numpy()
y_test = y_test.to_numpy()

In [14]:
l = []
for image in x_train:
    print(my_NN.predict_proba(image)[1], "\n\n\n")

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



[1. 1. 1. 1. 1. 1. 1. 1. 1. 

In [16]:
print(y_test)

[5 0 4 1 9 2 1 3 1 4 3 5 3 6 1 7 2 8 6 9 4 0 9 1 1 2 4 3 2 7 3 8 6 9 0 5 6
 0 7 6 1 8 7 9 3 9 8 5 9 3 3 0 7 4 9 8 0 9 4 1 4 4 6 0 4 5 6 1 0 0 1 7 1 6
 3 0 2 1 1 7 9 0 2 6 7 8 3 9 0 4 6 7 4 6 8 0 7 8 3 1]
