In [22]:
import numpy as np
import pandas as pd

In [23]:
data = pd.read_csv('Data/digit-recognizer/train.csv')

In [24]:
data = np.array(data)
np.random.shuffle(data)
val_rate = 0.2
val_num = int(data.shape[0] * val_rate)

m, n=data.shape

x_val = data[:val_num, 1:]
t_val = data[:val_num, 0]
x_train = data[val_num: , 1:]
t_train = data[val_num: , 0]

In [25]:
def init_params():
    W1 = np.random.randn(784, 10) *0.01
    b1 = np.zeros((1,10))
    W2 = np.random.randn(10, 10) *0.01
    b2 = np.zeros((1,10))
    
    return W1, b1, W2, b2

def relu(x):
    return np.maximum(0, x)

def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 
    x = x - np.max(x) 
    return np.exp(x) / np.sum(np.exp(x))

class propagation:
    def __init__(self):
        self.A1 = None
        self.Z1 = None
        self.A2 = None
        self.Y = None
        self.T = None
        self.x = None
        self.W2 = None
    
    def forward(self, x, W1, b1, W2, b2):
        self.x = x
        self.W2 = W2
        self.A1 = np.dot(self.x, W1) + b1
        self.Z1 = relu(self.A1)
        self.A2 = np.dot(self.Z1, W2) + b2
        self.Y = softmax(self.A2)
    
        return self.Y
    
    def backward(self, t):
        self.T = one_hot(t)
        m = self.T.size
        dA2 = (self.Y - self.T) / m       #Nx10
        dW2 = np.dot(self.Z1.T, dA2)   #10x10
        db2 = np.sum(dA2, axis = 0)    
        dZ1 = np.dot(dA2, self.W2.T)  #ㅒNx10
        dA1 = dZ1 * np.array(self.Z1 > 0, dtype=int) #Nx10
        #dA1 = dZ1 * d_relu(self.Z1) 
        db1 = np.sum(dA1, axis = 0) 
        dW1 = np.dot(self.x.T, dA1)    #784x10
        
        return dW1, dW2, db1, db2
def one_hot(x):
    x.reshape(1, x.size)
    batch_size = len(x)
    t = np.zeros((batch_size, 10))
    t[np.arange(batch_size), x] = 1
    
    return t    

def SGD(W1, b1, W2, b2, dW1, dW2, db1, db2, learning_rate):
    db1.reshape(1, db1.size)
    db2.reshape(1, db2.size)
    lr = learning_rate
    W1 = W1 - lr * dW1
    W2 = W2 - lr * dW2
    b1 = b1 - lr * db1
    b2 = b2 - lr * db2
    
    return W1, b1, W2, b2


def prediction(Y):
    return np.argmax(Y, axis = 1)

def accuracy(Y, t):
    K = prediction(Y)
    t.reshape(1, t.size)
    return np.sum(K == t) / K.size


def train_network(x, t, iter, learning_rate):
    W1, b1, W2, b2 = init_params()
    prop = propagation()
    
    for i in range(iter):
        Y = prop.forward(x, W1, b1, W2, b2)
        dW1, dW2, db1, db2 = prop.backward(t)
        W1, b1, W2, b2 = SGD(W1, b1, W2, b2, dW1, dW2, db1, db2, learning_rate)
        if (i%10 == 0):
            print('iteration: ', i)
            print('accuracy: ', accuracy(Y, t))
            
            
    return W1, b1, W2, b2

def test_network(x, t, W1, b1, W2, b2):
    prop = propagation()
    Y = prop.forward(x, W1, b1, W2, b2)
    print(Y)
    print(accuracy(Y, t))
    return prediction(Y)

In [26]:
W1, b1, W2, b2 = train_network(x_train, t_train, 500, 0.02)

iteration:  0
accuracy:  0.09601190476190476
iteration:  10
accuracy:  0.46098214285714284
iteration:  20
accuracy:  0.729047619047619
iteration:  30
accuracy:  0.7713095238095238
iteration:  40
accuracy:  0.7566964285714286
iteration:  50
accuracy:  0.7956547619047619
iteration:  60
accuracy:  0.8045238095238095
iteration:  70
accuracy:  0.846279761904762
iteration:  80
accuracy:  0.8486011904761904
iteration:  90
accuracy:  0.8582440476190476
iteration:  100
accuracy:  0.8700297619047619
iteration:  110
accuracy:  0.8745238095238095
iteration:  120
accuracy:  0.8760416666666667
iteration:  130
accuracy:  0.8786309523809523
iteration:  140
accuracy:  0.8820535714285714
iteration:  150
accuracy:  0.8651190476190476
iteration:  160
accuracy:  0.8863392857142857
iteration:  170
accuracy:  0.8927678571428571
iteration:  180
accuracy:  0.8947619047619048
iteration:  190
accuracy:  0.8966964285714286
iteration:  200
accuracy:  0.8982440476190476
iteration:  210
accuracy:  0.8863988095238096

In [27]:
test_ans=test_network(x_val, t_val, W1, b1, W2, b2)

[[2.04532061e-06 8.80571798e-07 4.24430556e-01 ... 1.07865273e-05
  2.04886677e-01 6.08274687e-04]
 [3.60152958e-06 9.71354305e-01 1.05002263e-02 ... 6.57232805e-04
  1.29179001e-02 1.30493128e-05]
 [1.52791461e-06 1.48888347e-08 1.76616388e-03 ... 1.65766898e-08
  1.31033502e-05 2.36364747e-06]
 ...
 [2.97354983e-08 5.51679343e-11 4.89400408e-08 ... 9.99952706e-01
  1.39490785e-08 4.68616363e-05]
 [1.75203161e-04 6.70163922e-11 3.32125608e-01 ... 9.98511026e-07
  7.61102001e-05 1.67488080e-04]
 [4.37280906e-06 4.34990263e-09 2.93296280e-03 ... 1.05298396e-06
  3.48338808e-04 4.35841762e-03]]
0.9113095238095238


In [28]:
print(test_ans)

[2 1 6 ... 7 4 6]


In [29]:
def submission(x, W1, b1, W2, b2):
    prop = propagation()
    Y = prop.forward(x, W1, b1, W2, b2)
    
    return prediction(Y)

test_data = pd.read_csv('Data/digit-recognizer/test.csv')
test_data = np.array(test_data)
m, n = test_data.shape

ans=submission(test_data, W1, b1, W2, b2)
print(ans)

submission_dict = {"ImageId":np.array(range(1,m+1)),"Label":submission(test_data, W1, b1, W2, b2)}
Submission = pd.DataFrame(submission_dict)
Submission.head()
Submission.to_csv('submission.csv', index=False)

[2 0 9 ... 3 9 2]
