In [75]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [76]:
np.random.seed(42)

In [77]:
"""
ny: nbr of neurons in each layer

w1, b1 for input layer
w2, b2 for hidden layer
"""

def init_params(nx, nh, ny):
    w1 = np.random.normal(0, 0.3, size=(nh, nx))
    w2 = np.random.normal(0, 0.3, size=(ny, nh))
    b1 = np.zeros((nh, 1)).reshape(-1,1)
    b2 = np.zeros((ny, 1)).reshape(-1,1)

    return {
        'w1': w1, 'w2': w2, 
        'b1': b1, 'b2': b2, 
    }

In [78]:
def softmax(value):
    tmp = np.exp(value - np.max(value))
    return tmp / np.sum(tmp, axis=0)

def forward(params, x):
    w1, w2, b1, b2 = params['w1'], params['w1'], params['b1'], params['b2']

    a1 = x
    z1 = np.matmul(w1, a1.T) + b1
    a2 = np.tanh(z1)
    z2 = np.matmul(w2, a2) + b2
    a3 = softmax(z2)
    int_values = {
        'z1': z1, 'z2': z2,
        'a3': a3, 'a2': a2,
    }

    return a3, int_values

In [79]:
def loss_accuracy(y_pred,y_true):
    epsilon = 1e-10
    m = y_true.shape[0]
    
    loss = -np.sum(y_true * np.log(y_pred + epsilon))     
    loss = loss / m
    
    accuracy = np.mean(np.argmax(y_pred, axis=0) == np.argmax(y_true, axis=0))
    return loss,accuracy    

In [80]:
y_true = np.array([[1, 0.],
                   [0 , 1],
                   [0., 0.]])

y_pred = np.array([[0.8, 0.2],
                   [0.1, 0.7],
                   [0.4, 0.3]])

loss,accuracy = loss_accuracy(y_true, y_pred)

print("loss: ", loss)
print("accuracy: ", accuracy)

loss:  7.675283643263486
accuracy:  1.0


In [81]:
def backward(params, outputs, x, y):
    w1, w2 = params['w1'], params['w1']
    a2, a3 = outputs['a2'], outputs['a3']
    
    d_z2 = a3 - y # sigmoid
    
    d_z1 = np.matmul(w2.T, d_z2) * (1 - np.power(a2, 2)) # tanh
    
    # gradients w.r.t. parameters
    d_w2 = np.matmul(d_z2, a2.T) / x.shape[1]
    d_w1 = np.matmul(d_z1, x) / x.shape[1]
    d_b2 = np.sum(d_z2, axis=1, keepdims=True) / x.shape[1]
    d_b1 = np.sum(d_z1, axis=1, keepdims=True) / x.shape[1]
    
    result = {
        "d_w1": d_w1, "d_w2": d_w2,
        "d_b1": d_b1, "d_b2": d_b2
    }
    
    return result

In [82]:
def sgd(params, grads, eta):
    
    params["w1"] = params["w1"] - eta * grads["d_w1"]
    params["w2"] = params["w2"] - eta * grads["d_w2"]
    params["b1"] = params["b1"] - eta * grads["d_b1"]
    params["b2"] = params["b2"] - eta * grads["d_b2"]
    
    return params

In [83]:
x = np.random.randn(3, 50)
y = np.array([[1, 0, 0],
              [0, 1, 0],
              [0, 0, 0],
              [0, 0, 1],
             ])

nx = x.shape[1]
nh = 4
ny = y.shape[0]
params = init_params(nx, nh, ny)

num_epochs = 1000
eta = 0.1

loss_history = []
accuracy_history = []
for i in range(num_epochs):
    y_hat, cache = forward(params, x)
    
    loss,accuracy = loss_accuracy(y_hat, y)
    
    loss_history.append(loss)
    accuracy_history.append(accuracy)
    
    result = backward(params, cache, x, y)
    
    params = sgd(params, result, eta)

plt.plot(loss_history)
plt.title("loss")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.show()

plt.plot(accuracy_history)
plt.title("accuracy")
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.show()


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 4 is different from 50)

## hand written digit recognition dataset mnist

In [69]:
mnist = pd.read_csv('./mnist.csv')
mnist.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
mnist.describe()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,...,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,4.4434,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.1793,0.1636,0.0526,0.0006,0.0,0.0,0.0,0.0,0.0,0.0
std,2.895865,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.674149,5.736072,2.420004,0.06,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,253.0,253.0,156.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0


In [71]:
mnist.isna().sum()

label    0
1x1      0
1x2      0
1x3      0
1x4      0
        ..
28x24    0
28x25    0
28x26    0
28x27    0
28x28    0
Length: 785, dtype: int64

In [72]:
y = mnist['label']
x = mnist.drop(['label'],axis=1)

In [73]:
y = pd.get_dummies(y).T.to_numpy()
x = x.to_numpy()

In [None]:
nx = x.shape[1]
nh = 4
ny = y.shape[0]
params = init_params(nx, nh, ny)

num_epochs = 5000
eta = 0.1

loss_history = []
accuracy_history = []
for i in range(num_epochs):
    y_hat, cache = forward(params, x)
    
    loss,accuracy = loss_accuracy(y_hat,y)
    
    loss_history.append(loss)
    accuracy_history.append(accuracy)
    
    result = backward(params, cache, x, y)
    
    params = sgd(params, result, eta)

plt.plot(loss_history)
plt.title("loss")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.show()

plt.plot(accuracy_history)
plt.title("accuracy")
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.show()


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 4 is different from 784)