In [150]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [151]:
input_data = pd.read_csv("./train.csv")

labels = input_data['label']

numerical_columns = input_data.drop(columns=['label'])

standardized_data = (input_data-np.mean(input_data,axis=0))/np.std(input_data,axis=0)

input_data = pd.concat([labels, standardized_data], axis=1)


In [152]:
data=np.array(input_data)
m,n=data.shape
np.random.shuffle(data)

In [153]:
data_dev = data[0:1000].T
Xdev=data_dev[1:n]
ydev=data_dev[0]

data_train=data[1000:m].T
Xtrain=data_train[1:n]
ytrain = data_train[0]

In [154]:
def RELU(x):
    return np.maximum(0,x)

def dRELU(x):
    return x>0

def LRELU(x):
    a=0.01
    return np.maximum(a*x, x)

def dLRELU(x):
    a=0.01
    return (x>0) + a*(x<=0)

def tanh(x):
    return np.tanh(x)

def dtanh(x):
    return 1 - np.tanh(x)**2

def softmax(x):
    e = np.exp(x)
    return e/np.sum(e, axis=0)

In [155]:
def onehot(x):
    ohX=np.zeros((x.size,10))
    ohX[np.arange(x.size),x.astype(int)]=1
    return ohX.T

In [156]:
def initparams(X):
    m,n=X.shape
    c=50
    W1 = np.random.rand(c, m) - 0.5
    b1 = np.random.rand(c, 1) - 0.5
    
    W2 = np.random.rand(10, c) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    
    return W1,b1,W2,b2

def fpass(X,W1,b1,W2,b2):
    Z1=np.dot(W1,X)+b1
    A1=RELU(Z1)
    
    Z2 = np.dot(W2,A1)+b2
    A2=softmax(Z2)
    
    return Z1,A1,Z2,A2
    
def bpass(X,y,Z1,A1,Z2,A2,W2):
    m=y.size
    y=onehot(y)
    
    dZ2 = A2 - y
    dW2 = 1 / m * np.dot(dZ2,A1.T)
    db2 = 1 / m * np.sum(dZ2, axis = 1, keepdims = True)
    
    dZ1 = np.dot(W2.T,dZ2) * dRELU(A1)
    dW1 = 1 / m * np.dot(dZ1,X.T)
    db1 = 1 / m * np.sum(dZ1, axis = 1, keepdims = True)
    
    return dW1,db1,dW2,db2

def updateparams(W1,b1,W2,b2,dW1,db1,dW2,db2,lr):
    W1= W1 - lr*dW1
    b1= b1 - lr*db1
    
    W2= W2 - lr*dW2
    b2= b2 - lr*db2
    
    return W1,b1,W2,b2

def predict(x):
    return np.argmax(x,0)

def accuracy(preds,x):
    return np.sum(preds==x)/x.size

def train(X,y,Xd,yd,itr,lr):
    W1,b1,W2,b2=initparams(X)
    
    for i in range(itr+1):
        Z1,A1,Z2,A2=fpass(X,W1,b1,W2,b2)
        devZ1,devA1,devZ2,devA2=fpass(Xd,W1,b1,W2,b2)
        dW1,db1,dW2,db2=bpass(X,y,Z1,A1,Z2,A2,W2)
        W1,b1,W2,b2=updateparams(W1,b1,W2,b2,dW1,db1,dW2,db2,lr)
        
        if i%50 ==0:
            print(f"iteration: {i}")
            print(f"accuracy: {accuracy(predict(A2),y)}")
            print(f"DEV accuracy: {accuracy(predict(devA2),yd)}")
    

In [157]:
train(Xtrain, ytrain, Xdev, ydev, 200, 0.1)


iteration: 0
accuracy: 0.08552542372881355
DEV accuracy: 0.095
iteration: 50
accuracy: 0.7331864406779661
DEV accuracy: 0.713
iteration: 100
accuracy: 0.7664406779661017
DEV accuracy: 0.757
iteration: 150
accuracy: 0.7880508474576271
DEV accuracy: 0.78
iteration: 200
accuracy: 0.8057966101694916
DEV accuracy: 0.797
