In [2]:
import numpy as np
import pandas as pd

In [3]:
data = pd.read_csv('Data/digit-recognizer/train.csv')


In [4]:
data = np.array(data)
np.random.shuffle(data)
val_rate = 0.2
val_num = int(data.shape[0] * val_rate)

m, n=data.shape

x_val = data[:val_num, 1:]
t_val = data[:val_num, 0]
x_train = data[val_num: , 1:]
t_train = data[val_num: , 0]

In [5]:
def init_params():
    W1 = np.random.randn(784, 10) *0.01
    b1 = np.zeros((1,10))
    W2 = np.random.randn(10, 10) *0.01
    b2 = np.zeros((1,10))
    
    return W1, b1, W2, b2

def relu(x):
    return np.maximum(0, x)

def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 
    x = x - np.max(x) 
    return np.exp(x) / np.sum(np.exp(x))

class propagation:
    def __init__(self):
        self.A1 = None
        self.Z1 = None
        self.A2 = None
        self.Y = None
        self.T = None
        self.x = None
        self.W2 = None
    
    def forward(self, x, W1, b1, W2, b2):
        self.x = x
        self.W2 = W2
        self.A1 = np.dot(self.x, W1) + b1
        self.Z1 = relu(self.A1)
        self.A2 = np.dot(self.Z1, W2) + b2
        self.Y = softmax(self.A2)
    
        return self.Y
    
    def backward(self, t):
        self.T = num_key(t)
        m = self.T.size
        dA2 = (self.Y - self.T) / m
        dW2 = np.dot(self.Z1.T, dA2)
        db2 = np.sum(dA2, axis = 0)    
        dZ1 = np.dot(dA2, self.W2.T)
        dA1 = dZ1 * relu(self.Z1) 
        db1 = np.sum(dA1, axis = 0) 
        dW1 = np.dot(self.x.T, dA1)
        
        return dW1, dW2, db1, db2
def num_key(x):
    x.reshape(1, x.size)
    batch_size = len(x)
    t = np.zeros((batch_size, 10))
    t[np.arange(batch_size), x] = 1
    
    return t    

def update(W1, b1, W2, b2, dW1, dW2, db1, db2, learning_rate):
    db1.reshape(1, db1.size)
    db2.reshape(1, db2.size)
    lr = learning_rate
    W1 = W1 - lr * dW1
    W2 = W2 - lr * dW2
    b1 = b1 - lr * db1
    b2 = b2 - lr * db2
    
    return W1, b1, W2, b2


def prediction(Y):
    return np.argmax(Y, axis = 1)

def accuracy(Y, t):
    K = prediction(Y)
    t.reshape(1, t.size)
    return np.sum(K == t) / K.size


def train_network(x, t, iter, learning_rate):
    W1, b1, W2, b2 = init_params()
    prop = propagation()
    
    for i in range(iter):
        Y = prop.forward(x, W1, b1, W2, b2)
        dW1, dW2, db1, db2 = prop.backward(t)
        W1, b1, W2, b2 = update(W1, b1, W2, b2, dW1, dW2, db1, db2, learning_rate)
        if (i%10 == 0):
            print('iteration: ', i)
            print('accuracy: ', accuracy(Y, t))
            
            
    return W1, b1, W2, b2

def test_network(x, t, W1, b1, W2, b2):
    prop = propagation()
    Y = prop.forward(x, W1, b1, W2, b2)
    print(Y)
    print(accuracy(Y, t))
    return prediction(Y)

In [6]:
W1, b1, W2, b2 = train_network(x_train, t_train, 500, 0.02)

iteration:  0
accuracy:  0.12321428571428572
iteration:  10
accuracy:  0.11678571428571428
iteration:  20
accuracy:  0.10529761904761904
iteration:  30
accuracy:  0.10529761904761904
iteration:  40
accuracy:  0.10529761904761904
iteration:  50
accuracy:  0.10529761904761904
iteration:  60
accuracy:  0.10422619047619047
iteration:  70
accuracy:  0.10422619047619047
iteration:  80
accuracy:  0.10422619047619047
iteration:  90
accuracy:  0.10422619047619047
iteration:  100
accuracy:  0.10422619047619047
iteration:  110
accuracy:  0.10419642857142856
iteration:  120
accuracy:  0.10419642857142856
iteration:  130
accuracy:  0.10419642857142856
iteration:  140
accuracy:  0.11095238095238096
iteration:  150
accuracy:  0.11095238095238096
iteration:  160
accuracy:  0.11098214285714286
iteration:  170
accuracy:  0.11098214285714286
iteration:  180
accuracy:  0.11104166666666666
iteration:  190
accuracy:  0.11104166666666666
iteration:  200
accuracy:  0.11104166666666666
iteration:  210
accuracy

In [7]:
test_ans=test_network(x_val, t_val, W1, b1, W2, b2)

[[0.09976305 0.10093889 0.09999545 ... 0.10049951 0.09960984 0.09999008]
 [0.09976305 0.10093889 0.09999545 ... 0.10049951 0.09960984 0.09999008]
 [0.09976305 0.10093889 0.09999545 ... 0.10049951 0.09960984 0.09999008]
 ...
 [0.09976305 0.10093889 0.09999545 ... 0.10049951 0.09960984 0.09999008]
 [0.09976305 0.10093889 0.09999545 ... 0.10049951 0.09960984 0.09999008]
 [0.09976305 0.10093889 0.09999545 ... 0.10049951 0.09960984 0.09999008]]
0.11


In [8]:
def submission(x, W1, b1, W2, b2):
    prop = propagation()
    Y = prop.forward(x, W1, b1, W2, b2)
    
    return prediction(Y)

test_data = pd.read_csv('Data/digit-recognizer/test.csv')
test_data = np.array(test_data)
m, n = test_data.shape

ans=submission(test_data, W1, b1, W2, b2)
print(ans)

submission_dict = {"ImageId":np.array(range(1,m+1)),"Label":submission(test_data, W1, b1, W2, b2)}
Submission = pd.DataFrame(submission_dict)
Submission.head()
Submission.to_csv('submission.csv', index=False)

[1 1 1 ... 1 1 1]
