# Build a simple Neural Network from scratch

In [2]:
# Basic functions to build NN
import numpy as np

def softmax(x):
    x = x - np.max(x, axis=-1, keepdims=True)
    return np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)


def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    if t.size == y.size:
        t = t.argmax(axis=1)
             
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size


def numerical_gradient_2d(f, X):
    h = 1e-4
    grad = np.zeros_like(X)
    for i, row in enumerate(X):
        for j, val in enumerate(row):
            X[i,j] -= h
            y1 = f(X)
            X[i,j] += 2*h
            y2 = f(X)
            grad[i,j] = (y2-y1)/(2*h)
            X[i,j] = val
    return grad


# Extend the above 2-D version to N-D version as below
def numerical_gradient_nd(f, X):
    '''Simply nemerically calcurate gradient of parameter matrix X'''
    h = 1e-4
    grad = []
    Xf = X.flatten()
    for i, val in enumerate(Xf):
        Xf[i] -= h
        # Updating the content of passed pointer X since f() might not take value of X directly
        # like in this loss functiojn case but f() would refer to X implicitly, where X means 'W'
        X[...] = Xf.reshape(X.shape)
        y1 = f(X)

        Xf[i] += 2*h
        X[...] = Xf.reshape(X.shape)
        y2 = f(X)
        
        Xf[i] = val # Restore
        X[...] = Xf.reshape(X.shape)
        grad.append((y2-y1) / (2*h))

    return np.array(grad).reshape(X.shape)

numerical_gradient = numerical_gradient_nd # Use N-dimensional by default

# Build with functions

In [21]:
np.random.seed(0)
x0 = np.array([[0.6, 0.9],[0,0],[0.8, 0.7]])
t0 = np.array([[0, 0, 1],[0,1,0],[0,0,1]])
W1 = np.random.rand(2,3)
b1 = np.zeros(1)
W2 = np.random.rand(2,3)
b2 = np.zeros(1)

def predict(x):
    a1 = np.dot(x, W1) + b1
    a1[a1 < 0] = 0
    z1 = a1
    a2 = np.dot(x, W2) + b2
    z2 = softmax(a2)
    y = z2
    return y

def loss(x, t):
    y = predict(x)
    return cross_entropy_error(y, t)

# Test
for i in np.arange(100):
    dW1 = numerical_gradient(lambda W:loss(x0,t0), W1)
    db1 = numerical_gradient(lambda W:loss(x0,t0), b1)
    dW2 = numerical_gradient(lambda W:loss(x0,t0), W2)
    db2 = numerical_gradient(lambda W:loss(x0,t0), b2)
    W1 -= 0.5 * dW1
    b1 -= 0.5 * db1
    W2 -= 0.5 * dW2
    b2 -= 0.5 * db2
    print(f"i={i} loss={loss(x0, t0)}") 

i=0 loss=0.9014364865040848
i=1 loss=0.7923575877606321
i=2 loss=0.7127532872798903
i=3 loss=0.654089719733962
i=4 loss=0.6100884007646726
i=5 loss=0.5763983492213015
i=6 loss=0.5500643840354014
i=7 loss=0.5290768246094334
i=8 loss=0.5120525055355898
i=9 loss=0.4980233187361935
i=10 loss=0.48629901182559493
i=11 loss=0.4763782218015049
i=12 loss=0.46789023746821107
i=13 loss=0.46055633003103624
i=14 loss=0.4541636557754536
i=15 loss=0.44854734463547613
i=16 loss=0.4435780012187287
i=17 loss=0.4391528422898799
i=18 loss=0.43518931673298306
i=19 loss=0.43162044663413557
i=20 loss=0.42839137941743594
i=21 loss=0.42545680417111464
i=22 loss=0.4227789928647938
i=23 loss=0.42032629908362323
i=24 loss=0.4180719956765711
i=25 loss=0.4159933662321868
i=26 loss=0.4140709886258119
i=27 loss=0.4122881653205524
i=28 loss=0.41063046682065524
i=29 loss=0.4090853631187786
i=30 loss=0.4076419241254044
i=31 loss=0.4062905745886232
i=32 loss=0.40502289236569805
i=33 loss=0.40383144141949184
i=34 loss=0.4

# Try with a Class

In [4]:
class Net:
    '''Simple NN class which holds parameter W'''
    def __init__(self):
        self.W = np.random.randn(2,3)
        
    def predict(self, x):
        a = np.dot(x, self.W)
        z = softmax(a)
        y = z
        return y

    def loss(self, x, t):
        y = self.predict(x)
        _loss = cross_entropy_error(y, t)
        return _loss
    
    def gradient(self, x, t):
        f = lambda W: self.loss(x, t)
        return numerical_gradient(f, net.W)

# Test    
net = Net()
for i in np.arange(9):
    dW = net.gradient(x0, t0)
    net.W -=  0.5 * dW
    print(f"loss={net.loss(x0, t0)}")

loss=0.49818359250842825
loss=0.3721732894742513
loss=0.2926569470459612
loss=0.2392641046837959
loss=0.2014525634086171
loss=0.17349119992150133
loss=0.1520794863966645
loss=0.13521166003877994
loss=0.12161009078258005
