In [1]:
import pandas as pd
import numpy as np

In [2]:
test_df = pd.read_csv('test.csv')
train_df = pd.read_csv('train.csv')

In [3]:
X_test = test_df.iloc[:, :].T
Y_train = train_df.iloc[1000:, 0]
X_train = train_df.iloc[1000:, 1:].T
Y_dev = train_df.iloc[:1000, 0]
X_dev = train_df.iloc[:1000, 1:].T
#X_dev

In [4]:
def params_init(height, width):
    W = np.random.rand(height, width) 
    b = np.random.rand(height, 1)
    return W, b

In [5]:
learning_rate = 0.9
iteration = 500
n, m = X_train.shape

In [6]:
def ReLu(Z):
    return np.maximum(Z, 0)

def softmax(Z):
    Z = np.exp(Z)
    return Z / np.sum(Z, axis = 0, keepdims = True)

def ReLu_deriv(Z):
    return Z > 0

def normalization(A):
    mean = np.mean(A, axis=0)
    std = np.std(A, axis=0)
    A = (A - mean) / std
    return A
    
def one_hot(Y):
    one_hot_y = np.zeros((Y.shape[0], 10))
    one_hot_y[np.arange(Y.size), Y] = 1
    return one_hot_y.T

In [7]:
def foward_prop(W1, b1, W2, b2, X):
    Z1 = np.dot(W1, X) + b1
    Z1 = normalization(Z1)
    A1 = ReLu(Z1)
    Z2 = np.dot(W2, A1) + b2
    #Z2 = normalization(Z2)
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

In [8]:
def backward_prop(A2, Y, A1, W2, Z1, X):
    dz2 = A2 -one_hot(Y)
    dw2 = 1 / m * np.dot(dz2, A1.T)
    db2 = 1/ m * np.sum(dz2, axis = 1, keepdims = True)
    dz1 = np.dot(W2.T, dz2) * ReLu_deriv(Z1)
   # print(X.T.shape, dz1. shape)
    dw1 = 1 / m * np.dot(dz1 , X.T)
    db1 = 1 / m * np.sum(dz1, axis= 1, keepdims= True)
    return dw2, dw1, db2, db1
    

In [9]:
W1, b1 = params_init(64, 784)
W2, b2 = params_init(10, 64)
for i in range(iteration):
    Z1, A1, Z2, A2 = foward_prop(W1, b1, W2, b2, X_train)
    dw2, dw1, db2, db1 = backward_prop(A2, Y_train, A1, W2, Z1, X_train)
    W1 = W1 - learning_rate * dw1
    W2 = W2 - learning_rate * dw2
    b1 = b1 - learning_rate * db1
    b2 = b2 - learning_rate * db2 



In [10]:
def predict(W1, b1, W2, b2, X):
    Z1, A1, Z2, A2 = foward_prop(W1, b1, W2, b2, X)
    Y_predict = np.argmax(A2, axis=0)
    return Y_predict
    

In [11]:
Y_predict_dev = predict(W1, b1, W2, b2, X_dev)
np.sum(Y_predict_dev == Y_dev) / Y_dev.size
#Accuary on dev set

0.957

In [12]:
Y_predict = predict(W1, b1, W2, b2, X_test)
Y_predict

array([2, 0, 9, ..., 3, 9, 2])

In [13]:
image_id = np.arange(Y_predict.size) + 1
submission = pd.DataFrame({'ImageId' : image_id, 'Label': Y_predict})
submission.to_csv('submission.csv', index=False)