In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('Data/digit-recognizer/train.csv')

In [3]:
data = np.array(data)
np.random.shuffle(data)
val_rate = 0.2
val_num = int(data.shape[0] * val_rate)

m, n=data.shape

x_val = data[:val_num, 1:]
t_val = data[:val_num, 0]
x_train = data[val_num: , 1:]
t_train = data[val_num: , 0]

In [4]:
def init_params():
    W1 = np.random.randn(784, 10) *0.01
    b1 = np.zeros((1,10))
    W2 = np.random.randn(10, 10) *0.01
    b2 = np.zeros((1,10))
    
    return W1, b1, W2, b2

def relu(x):
    return np.maximum(0, x)

def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 
    x = x - np.max(x) 
    return np.exp(x) / np.sum(np.exp(x))

class propagation:
    def __init__(self):
        self.A1 = None
        self.Z1 = None
        self.A2 = None
        self.Y = None
        self.T = None
        self.x = None
        self.W2 = None
    
    def forward(self, x, W1, b1, W2, b2):
        self.x = x
        self.W2 = W2
        self.A1 = np.dot(self.x, W1) + b1
        self.Z1 = relu(self.A1)
        self.A2 = np.dot(self.Z1, W2) + b2
        self.Y = softmax(self.A2)
    
        return self.Y
    
    def backward(self, t):
        self.T = one_hot(t)
        m = self.T.size
        dA2 = (self.Y - self.T) / m       #Nx10
        dW2 = np.dot(self.Z1.T, dA2)   #10x10
        db2 = np.sum(dA2, axis = 0)    
        dZ1 = np.dot(dA2, self.W2.T)  #ㅒNx10
        dA1 = dZ1 * np.array(self.Z1 > 0, dtype=int) #Nx10
        #dA1 = dZ1 * d_relu(self.Z1) 
        db1 = np.sum(dA1, axis = 0) 
        dW1 = np.dot(self.x.T, dA1)    #784x10
        
        return dW1, dW2, db1, db2
def one_hot(x):
    x.reshape(1, x.size)
    batch_size = len(x)
    t = np.zeros((batch_size, 10))
    t[np.arange(batch_size), x] = 1
    
    return t    

def SGD(W1, b1, W2, b2, dW1, dW2, db1, db2, learning_rate):
    db1.reshape(1, db1.size)
    db2.reshape(1, db2.size)
    lr = learning_rate
    W1 = W1 - lr * dW1
    W2 = W2 - lr * dW2
    b1 = b1 - lr * db1
    b2 = b2 - lr * db2
    
    return W1, b1, W2, b2


def prediction(Y):
    return np.argmax(Y, axis = 1)

def accuracy(Y, t):
    K = prediction(Y)
    t.reshape(1, t.size)
    return np.sum(K == t) / K.size


def train_network(x, t, iter, learning_rate):
    W1, b1, W2, b2 = init_params()
    prop = propagation()
    
    for i in range(iter):
        Y = prop.forward(x, W1, b1, W2, b2)
        dW1, dW2, db1, db2 = prop.backward(t)
        W1, b1, W2, b2 = SGD(W1, b1, W2, b2, dW1, dW2, db1, db2, learning_rate)
        if (i%10 == 0):
            print('iteration: ', i)
            print('accuracy: ', accuracy(Y, t))
            
            
    return W1, b1, W2, b2

def test_network(x, t, W1, b1, W2, b2):
    prop = propagation()
    Y = prop.forward(x, W1, b1, W2, b2)
    print(Y)
    print(accuracy(Y, t))
    return prediction(Y)

In [5]:
W1, b1, W2, b2 = train_network(x_train, t_train, 1000, 0.02)

iteration:  0
accuracy:  0.10982142857142857
iteration:  10
accuracy:  0.48386904761904764
iteration:  20
accuracy:  0.5638690476190477
iteration:  30
accuracy:  0.7631845238095238
iteration:  40
accuracy:  0.7408630952380952
iteration:  50
accuracy:  0.7605357142857143
iteration:  60
accuracy:  0.7500892857142857
iteration:  70
accuracy:  0.8295238095238096
iteration:  80
accuracy:  0.8131845238095238
iteration:  90
accuracy:  0.8333333333333334
iteration:  100
accuracy:  0.8475595238095238
iteration:  110
accuracy:  0.8566964285714286
iteration:  120
accuracy:  0.8506547619047619
iteration:  130
accuracy:  0.8499107142857143
iteration:  140
accuracy:  0.8786904761904762
iteration:  150
accuracy:  0.8808333333333334
iteration:  160
accuracy:  0.875625
iteration:  170
accuracy:  0.8786904761904762
iteration:  180
accuracy:  0.8870833333333333
iteration:  190
accuracy:  0.8885416666666667
iteration:  200
accuracy:  0.877529761904762
iteration:  210
accuracy:  0.881547619047619
iteration

In [6]:
test_ans=test_network(x_val, t_val, W1, b1, W2, b2)

[[1.72676902e-03 1.22081855e-05 8.64675979e-04 ... 5.03166630e-01
  1.13437084e-03 4.51495772e-01]
 [1.14912573e-03 5.83971478e-05 3.70855295e-02 ... 2.01850119e-06
  1.71103443e-01 2.47941483e-03]
 [4.76235970e-05 1.54224096e-07 2.38411365e-02 ... 4.26419890e-02
  1.55115277e-03 8.71191666e-01]
 ...
 [1.71142064e-06 5.60047771e-05 9.20433041e-04 ... 5.35960390e-03
  2.83491562e-03 6.51280528e-02]
 [1.49453663e-08 9.89645327e-01 5.80203522e-03 ... 2.41405251e-04
  2.21566200e-03 1.15360453e-04]
 [8.57045715e-05 1.35150799e-11 1.91172926e-05 ... 7.37853733e-04
  2.41293673e-03 8.15442277e-01]]
0.9245238095238095


In [7]:
print(test_ans)

[7 6 9 ... 4 1 9]


In [8]:
def submission(x, W1, b1, W2, b2):
    prop = propagation()
    Y = prop.forward(x, W1, b1, W2, b2)
    
    return prediction(Y)

test_data = pd.read_csv('Data/digit-recognizer/test.csv')
test_data = np.array(test_data)
m, n = test_data.shape

ans=submission(test_data, W1, b1, W2, b2)
print(ans)

submission_dict = {"ImageId":np.array(range(1,m+1)),"Label":submission(test_data, W1, b1, W2, b2)}
Submission = pd.DataFrame(submission_dict)
Submission.head()
Submission.to_csv('submission.csv', index=False)

[2 0 9 ... 3 9 2]
