In [1]:
import numpy as np
import pandas as pd

In [2]:
def one_hot(Y):
    one_hot_Y = np.zeros((Y.size,Y.max()+1))
    one_hot_Y[np.arange(Y.size),Y]=1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

In [3]:
#we define our activation functions and its derivative
def sig(x):
    return (1/(1+np.exp(-x)))

def d_sig(x):
    return (sig(x)*(1-sig(x)))
#we define our activation functions and its derivative
# def sig(x):
#     return np.tanh(x)

# def d_sig(x):
#     return (1/np.cosh(x))**2

In [4]:
#we initlaize weights and biases as random
def init_params(X,n1=10,n2=10):
    global W_3,B_3,W_2,B_2,W_1,B_1
    m,n = X.shape
    W_3= np.random.randn(10,n2)
    W_2= np.random.randn(n2,n1)    
    W_1= np.random.randn(n1,m)  
    B_3 = np.random.randn(10,1)
    B_2 = np.random.randn(n2,1)    
    B_1 = np.random.randn(n1,1)    

In [5]:
#we define the network functions
def network_function(X,Y=None):
    global W_3,B_3,W_2,B_2,W_1,B_1
    A_0 = X
    Z_1 = W_1@A_0 +B_1
    A_1 = sig(Z_1)
    Z_2 = W_2@A_1 +B_2
    A_2 = sig(Z_2)
    Z_3 = W_3@A_2 +B_3 
    #A_3 is actual output
    A_3 = sig(Z_3)
    #if Y is given it is training and if not it is testing
    if(not type(Y) == None):
        C=A_3-Y
        a,b=C.shape
        C=np.sum(C,axis=1,keepdims=True)/b
        C=np.linalg.norm(C)
        return C,A_3,Z_3,A_2,Z_2,A_1,Z_1,A_0
    return A_3

In [6]:
def back_propagation(X,A_3,Z_3,A_2,Z_2,A_1,Z_1,A_0,Y):
    global W_3,B_3,W_2,B_2,W_1,B_1
    (m,n)=X.shape
    
    H_3= (A_3-Y)*d_sig(Z_3)
    
    J_W3=2*(H_3@A_2.T)/n
    J_B3=2*(np.sum(H_3,axis=1,keepdims=True))/n
    
    H_2= (H_3.T@ W_3).T * d_sig(Z_2)
    
    J_W2 = 2*(H_2@A_1.T)/n
    J_B2 = 2*(np.sum(H_2,axis=1,keepdims=True))/n
    
    H_1= (H_2.T@ W_2).T * d_sig(Z_1)
    
    J_W1 = 2*(H_1@A_0.T)/n
    J_B1 = 2*(np.sum(H_1,axis=1,keepdims=True))/n
    
    #we return our jacobians
    return J_W3,J_B3,J_W2,J_B2,J_W1,J_B1

In [7]:
def get_predicitons(A3):
    #if A3 is
    # 0.3 0.5 1.2 ... 
    # 0.4 0.1 0.6 ...  ^
    # 0.2 1.3 0.4 ...  | axis 0 max value index
    #gives[1,2,0] perdictions of each column
    return np.argmax(A3,0)

def get_accuracy(predictions,Y):
    #gives label of each column
    Y_values=np.argmax(Y,0)
    print(predictions,Y_values)
    return np.sum(predictions==Y_values)/Y_values.size


#No need any explanation
#X should be a m*n array and y an 10*n array
def train_network(X,Y,iteration,learning_rate=0.1):
    global W_3,B_3,W_2,B_2,W_1,B_1
    alpha = learning_rate
    for i in range(0,iteration):
        C,A_3,Z_3,A_2,Z_2,A_1,Z_1,A_0 = network_function(X,Y)

        J_W3,J_B3,J_W2,J_B2,J_W1,J_B1 = back_propagation(X,A_3,Z_3,A_2,Z_2,A_1,Z_1,A_0,Y)
        W_3 = W_3 - alpha *J_W3
        W_2 = W_2 - alpha *J_W2
        W_1 = W_1 - alpha *J_W1
        B_3 = B_3 - alpha *J_B3
        B_2 = B_2 - alpha *J_B2
        B_1 = B_1 - alpha *J_B1
        if(i%10==0):
            print("Iteration: ",i)
            print("Accuracy: ",get_accuracy(get_predicitons(A_3),Y))
#             print("error: ",C)

In [8]:
#X will be one data set i.e mx1 array
def network_prediction(X):
    prediction = network_function(X)
    #use an decoder to ouput something like 0,0,0,1,0,0,0,0,0 = 3
    return prediction

In [9]:
#below we take data and oragnize it
data= pd.read_csv("./train.csv")

In [10]:
data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
#we take array
data = data.to_numpy()
#n=np.of inputs and m= no.of features+1 i.e  (label)
n,m = data.shape
#to avoid overfitting, we train on portion of train data
np.random.shuffle(data) #shuffles along row

In [12]:
#now we take out some train and test data from the array
data_test=data[0:1000].T #making sure 1 column is one dataset
# label: a b c d ....
# p1:    a b c d ....
# p2:    a b c d ....
Y_test = data_test[0] #label row
X_test = data_test[1:m] #pixels row

data_train = data[1000:n].T
Y_train = data_train[0]
X_train=data_train[1:m]

In [13]:
print(X_train.shape)
#Y is just a label value 1 ,2 ,3 ,4 .. we need to encode it to output values
print(Y_train.shape)
print(Y_train[0])

Y_train = one_hot(Y_train)
print(Y_train.shape)
print(Y_train[:,0])

(784, 41000)
(41000,)
5
(10, 41000)
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


In [14]:
init_params(X_train)

In [None]:
train_network(X_train,Y_train,iteration=1000,learning_rate=10)

  return (1/(1+np.exp(-x)))


Iteration:  0
[7 7 2 ... 7 2 7] [5 7 2 ... 4 5 7]
Accuracy:  0.13621951219512196
Iteration:  10
[0 0 0 ... 0 8 0] [5 7 2 ... 4 5 7]
Accuracy:  0.10307317073170731
Iteration:  20
[0 9 0 ... 0 9 0] [5 7 2 ... 4 5 7]
Accuracy:  0.1073170731707317
Iteration:  30
[0 9 0 ... 0 9 9] [5 7 2 ... 4 5 7]
Accuracy:  0.11636585365853659
Iteration:  40
[0 9 0 ... 0 9 9] [5 7 2 ... 4 5 7]
Accuracy:  0.11926829268292682
Iteration:  50
[0 9 0 ... 0 8 9] [5 7 2 ... 4 5 7]
Accuracy:  0.12453658536585366
Iteration:  60
[0 9 0 ... 0 8 9] [5 7 2 ... 4 5 7]
Accuracy:  0.13326829268292684
Iteration:  70
[0 9 0 ... 9 8 9] [5 7 2 ... 4 5 7]
Accuracy:  0.1731951219512195
Iteration:  80
[0 8 0 ... 9 8 8] [5 7 2 ... 4 5 7]
Accuracy:  0.18953658536585366
Iteration:  90
[5 3 8 ... 8 8 4] [5 7 2 ... 4 5 7]
Accuracy:  0.2031219512195122
Iteration:  100
[0 3 5 ... 3 8 4] [5 7 2 ... 4 5 7]
Accuracy:  0.2188048780487805
Iteration:  110
[5 3 5 ... 3 8 4] [5 7 2 ... 4 5 7]
Accuracy:  0.24680487804878048
Iteration:  120
[5 

array([2, 1])