In [31]:
import numpy as np
import pandas as pd

In [32]:
data = pd.read_csv('Data/digit-recognizer/train.csv')


In [33]:
data = np.array(data)
np.random.shuffle(data)
val_rate = 0.2
val_num = int(data.shape[0] * val_rate)

m, n=data.shape

x_val = data[:val_num, 1:]
t_val = data[:val_num, 0]
x_train = data[val_num: , 1:]
t_train = data[val_num: , 0]

In [34]:
def init_params():
    W1 = np.random.randn(784, 10) *0.01
    b1 = np.zeros((1,10))
    W2 = np.random.randn(10, 10) *0.01
    b2 = np.zeros((1,10))
    
    return W1, b1, W2, b2

def relu(x):
    return np.maximum(0, x)

def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 
    x = x - np.max(x) 
    return np.exp(x) / np.sum(np.exp(x))

class propagation:
    def __init__(self):
        self.A1 = None
        self.Z1 = None
        self.A2 = None
        self.Y = None
        self.T = None
        self.x = None
        self.W2 = None
    
    def forward(self, x, W1, b1, W2, b2):
        self.x = x
        self.W2 = W2
        self.A1 = np.dot(self.x, W1) + b1
        self.Z1 = relu(self.A1)
        self.A2 = np.dot(self.Z1, W2) + b2
        self.Y = softmax(self.A2)
    
        return self.Y
    
    def backward(self, t):
        self.T = num_key(t)
        m = self.T.size
        dA2 = (self.Y - self.T) / m       #Nx10
        dW2 = np.dot(self.Z1.T, dA2)   #10x10
        db2 = np.sum(dA2, axis = 0)    
        dZ1 = np.dot(dA2, self.W2.T)  #ㅒNx10
        dA1 = dZ1 * np.array(self.Z1 > 0, dtype=int) #Nx10
        #dA1 = dZ1 * d_relu(self.Z1) 
        db1 = np.sum(dA1, axis = 0) 
        dW1 = np.dot(self.x.T, dA1)    #784x10
        
        return dW1, dW2, db1, db2
def num_key(x):
    x.reshape(1, x.size)
    batch_size = len(x)
    t = np.zeros((batch_size, 10))
    t[np.arange(batch_size), x] = 1
    
    return t    

def SGD(W1, b1, W2, b2, dW1, dW2, db1, db2, learning_rate):
    db1.reshape(1, db1.size)
    db2.reshape(1, db2.size)
    lr = learning_rate
    W1 = W1 - lr * dW1
    W2 = W2 - lr * dW2
    b1 = b1 - lr * db1
    b2 = b2 - lr * db2
    
    return W1, b1, W2, b2


def prediction(Y):
    return np.argmax(Y, axis = 1)

def accuracy(Y, t):
    K = prediction(Y)
    t.reshape(1, t.size)
    return np.sum(K == t) / K.size


def train_network(x, t, iter, learning_rate):
    W1, b1, W2, b2 = init_params()
    prop = propagation()
    
    for i in range(iter):
        Y = prop.forward(x, W1, b1, W2, b2)
        dW1, dW2, db1, db2 = prop.backward(t)
        W1, b1, W2, b2 = SGD(W1, b1, W2, b2, dW1, dW2, db1, db2, learning_rate)
        if (i%10 == 0):
            print('iteration: ', i)
            print('accuracy: ', accuracy(Y, t))
            
            
    return W1, b1, W2, b2

def test_network(x, t, W1, b1, W2, b2):
    prop = propagation()
    Y = prop.forward(x, W1, b1, W2, b2)
    print(Y)
    print(accuracy(Y, t))
    return prediction(Y)

In [35]:
W1, b1, W2, b2 = train_network(x_train, t_train, 500, 0.02)

iteration:  0
accuracy:  0.08431547619047619
iteration:  10
accuracy:  0.5217261904761905
iteration:  20
accuracy:  0.7025
iteration:  30
accuracy:  0.6442857142857142
iteration:  40
accuracy:  0.7520535714285714
iteration:  50
accuracy:  0.76625
iteration:  60
accuracy:  0.8111607142857142
iteration:  70
accuracy:  0.8363095238095238
iteration:  80
accuracy:  0.8559523809523809
iteration:  90
accuracy:  0.8654761904761905
iteration:  100
accuracy:  0.8475
iteration:  110
accuracy:  0.8711904761904762
iteration:  120
accuracy:  0.8851488095238095
iteration:  130
accuracy:  0.8793452380952381
iteration:  140
accuracy:  0.8657142857142858
iteration:  150
accuracy:  0.8923511904761905
iteration:  160
accuracy:  0.8962797619047619
iteration:  170
accuracy:  0.8940178571428572
iteration:  180
accuracy:  0.8697916666666666
iteration:  190
accuracy:  0.8980357142857143
iteration:  200
accuracy:  0.9039880952380952
iteration:  210
accuracy:  0.9053869047619048
iteration:  220
accuracy:  0.9063

In [36]:
test_ans=test_network(x_val, t_val, W1, b1, W2, b2)

[[4.61389560e-05 4.66741587e-03 9.65137089e-01 ... 1.19421593e-04
  2.43262114e-02 9.59739121e-07]
 [2.67578594e-07 6.23185625e-07 1.31227856e-04 ... 1.01745395e-07
  9.99479590e-01 1.07619431e-04]
 [3.03942409e-05 1.50808034e-07 3.45742594e-05 ... 2.31895639e-03
  3.76551657e-03 9.48758864e-01]
 ...
 [5.54097314e-04 7.91572155e-06 9.93410874e-01 ... 2.11843871e-03
  3.37717373e-04 8.81015634e-04]
 [1.16892963e-05 1.64548763e-09 9.99685954e-01 ... 2.51912821e-06
  2.70896046e-07 5.46231676e-09]
 [6.58213564e-08 3.02337340e-01 6.67868253e-05 ... 6.02916534e-01
  3.89383794e-03 8.89053218e-02]]
0.9125


In [37]:
print(test_ans)

[2 8 9 ... 2 2 7]


In [38]:
def submission(x, W1, b1, W2, b2):
    prop = propagation()
    Y = prop.forward(x, W1, b1, W2, b2)
    
    return prediction(Y)

test_data = pd.read_csv('Data/digit-recognizer/test.csv')
test_data = np.array(test_data)
m, n = test_data.shape

ans=submission(test_data, W1, b1, W2, b2)
print(ans)

submission_dict = {"ImageId":np.array(range(1,m+1)),"Label":submission(test_data, W1, b1, W2, b2)}
Submission = pd.DataFrame(submission_dict)
Submission.head()
Submission.to_csv('submission.csv', index=False)

[2 0 9 ... 3 9 2]
