In [13]:
import pandas as pd
import numpy as np

In [14]:
x=pd.read_csv("train.csv")
X=np.array(x)
X=X[:,1:]
y=X[:,0]

In [15]:
X.shape,y.shape

((42000, 784), (42000,))

In [25]:
X_train=X[:36000,:]
X_val=X[36000:,:]
y_train=y[:36000]
y_val=y[36000:]

In [26]:
IMG_SIZE = 28*28
H1_SIZE = 256
H2_SIZE = 64
OUT_SIZE = 10
BATCH_SIZE = 256
EPOCH = 50
ALPHA = 0.001

In [27]:
def accuracy(pred,y):
    return ( 100.0* np.sum(pred==y) / y.shape[0])

In [28]:
def initial_weights():
    np.random.seed(0)
    model = {}
    model['W1'] = np.random.randn(IMG_SIZE,H1_SIZE)/ np.sqrt(IMG_SIZE)
    model['B1'] = np.zeros((1,H1_SIZE))
    model['W2'] = np.random.randn(H1_SIZE,H2_SIZE)/ np.sqrt(H1_SIZE)
    model['B2'] = np.zeros((1,H2_SIZE))
    model['W3'] = np.random.randn(H2_SIZE,OUT_SIZE)/ np.sqrt(H2_SIZE)
    model['B3'] = np.zeros((1,OUT_SIZE))
    return model

In [29]:
def forward_prop(model,x):
    z1 = x.dot(model['W1']) + model['B1']
    a1 = np.tanh(z1)
    z2 = a1.dot(model['W2']) + model['B2']
    a2 = np.tanh(z2)
    z3 = a2.dot(model['W3']) + model['B3']
    h_x = np.exp(z3)
    y_out = h_x/ np.sum(h_x, axis=1, keepdims=True)
    return a1, a2, y_out

In [30]:
def back_prop(model, x ,a1 , a2, y, y_out):
    delta4 = y_out
    delta4[range(y.shape[0]), y] -= 1
    dw3 = (a2.T).dot(delta4)
    db3 = np.sum(delta4, axis = 0)
    delta3 = (1 - np.square(a2))*delta4.dot(model['W3'].T)
    dw2 = (a1.T).dot(delta3)
    db2 = np.sum(delta3, axis = 0)
    delta2 = (1 - np.square(a1))*delta3.dot(model['W2'].T)
    dw1 = (x.T).dot(delta2)
    db1 = np.sum(delta2, axis = 0)
    
    model['W1'] += -ALPHA*dw1
    model['B1'] += -ALPHA*db1
    model['W2'] += -ALPHA*dw2
    model['B2'] += -ALPHA*db2
    model['W3'] += -ALPHA*dw3
    model['B3'] += -ALPHA*db3
    
    return model

In [31]:
def loss(model, p, y):
    correct_logprobs = -np.log(p[range(y.shape[0]),y])
    l = np.sum(correct_logprobs)
    
    return 1.0/y.shape[0] * l

In [32]:
def predict(y_out):
    return np.argmax(y_out, axis = 1)

In [33]:
def main():
    model = initial_weights()
    for ix in range(EPOCH):
        print ("\nEpoch : %d" %(ix+1))
        count = 0
        while (count+BATCH_SIZE) < y_train.shape[0]:
            batch_data = X_train[count:(count+BATCH_SIZE),:]
            batch_labels = y_train[count:(count+BATCH_SIZE),]
            count += BATCH_SIZE
            
            a1, a2 , p = forward_prop(model, batch_data)
            model = back_prop(model,batch_data,a1,a2,batch_labels,p)
        
        _,_, p = forward_prop(model, X_train)
        training_loss=[]
        val_los=[]
        print ('training_loss : % .3f' % (loss(model,p,y_train)))
        training_loss.append(loss(model,p,y_train)))
        _,_,p = forward_prop(model, X_val)
        pred = predict(p)
        print ('val_accuracy : % .3f' % (accuracy(pred,y_val)))
        val_los.append(accuracy(pred,y_val)))
        print ('val_loss : % .3f' % loss(model,p,y_val))
    print("*************Completed***********")

In [34]:
main()


Epoch : 1
training_loss :  0.000
val_accuracy :  100.000
val_loss :  0.000

Epoch : 2
training_loss :  0.000
val_accuracy :  100.000
val_loss :  0.000

Epoch : 3
training_loss :  0.000
val_accuracy :  100.000
val_loss :  0.000

Epoch : 4
training_loss :  0.000
val_accuracy :  100.000
val_loss :  0.000

Epoch : 5
training_loss :  0.000
val_accuracy :  100.000
val_loss :  0.000

Epoch : 6
training_loss :  0.000
val_accuracy :  100.000
val_loss :  0.000

Epoch : 7
training_loss :  0.000
val_accuracy :  100.000
val_loss :  0.000

Epoch : 8


KeyboardInterrupt: 

In [None]:
plt.figure(0)
plt.plot(training_loss,color='blue')
plt.plot(val_loss,color='red')
