# Build a simple Neural Network from scratch

In [1]:
# Basic functions to build NN
import numpy as np

def softmax(x):
    x = x - np.max(x, axis=-1, keepdims=True)
    return np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)


def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    if t.size == y.size:
        t = t.argmax(axis=1)
             
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size


def numerical_gradient_2d(f, X):
    h = 1e-4
    grad = np.zeros_like(X)
    for i, row in enumerate(X):
        for j, val in enumerate(row):
            X[i,j] -= h
            y1 = f(X)
            X[i,j] += 2*h
            y2 = f(X)
            grad[i,j] = (y2-y1)/(2*h)
            X[i,j] = val
    return grad


# Extend the above 2-D version to N-D version as below
def numerical_gradient_nd(f, X):
    '''Simply nemerically calcurate gradient of parameter matrix X'''
    h = 1e-4
    grad = []
    Xf = X.flatten()
    for i, val in enumerate(Xf):
        Xf[i] -= h
        # Updating the content of passed pointer X since f() might not take value of X directly
        # like in this loss functiojn case but f() would refer to X implicitly, where X means 'W'
        X[...] = Xf.reshape(X.shape)
        y1 = f(X)

        Xf[i] += 2*h
        X[...] = Xf.reshape(X.shape)
        y2 = f(X)
        
        Xf[i] = val # Restore
        X[...] = Xf.reshape(X.shape)
        grad.append((y2-y1) / (2*h))

    return np.array(grad).reshape(X.shape)

numerical_gradient = numerical_gradient_nd # Use N-dimensional by default

# Build with functions

In [2]:
np.random.seed(0)
x0 = np.array([0.6, 0.9])
t0 = np.array([0, 0, 1])
W = np.random.rand(2,3)

def predict(x):
    a = np.dot(x, W)
    z = softmax(a)
    y = z
    return y

def loss(x, t):
    y = predict(x)
    return cross_entropy_error(y, t)

# Test
for i in np.arange(9):
    dW = numerical_gradient(lambda W:loss(x0,t0), W)
    W -= 0.5 * dW
    print(f"loss={loss(x0, t0)}") 

loss=0.6959934206606462
loss=0.49938552069857356
loss=0.37757736153655047
loss=0.29870927133781544
loss=0.24493252040955152
loss=0.20649812932314293
loss=0.17791777406640177
loss=0.15595756807225267
loss=0.13862179022094626


# Try with a Class

In [3]:
class Net:
    '''Simple NN class which holds parameter W'''
    def __init__(self):
        self.W = np.random.randn(2,3)
        
    def predict(self, x):
        a = np.dot(x, self.W)
        z = softmax(a)
        y = z
        return y

    def loss(self, x, t):
        y = self.predict(x)
        _loss = cross_entropy_error(y, t)
        return _loss
    
    def gradient(self, x, t):
        f = lambda W: self.loss(x, t)
        return numerical_gradient(f, net.W)

# Test    
net = Net()
for i in np.arange(9):
    dW = net.gradient(x0, t0)
    net.W -=  0.5 * dW
    print(f"loss={net.loss(x0, t0)}")

loss=0.49818359250842825
loss=0.3721732894742513
loss=0.2926569470459612
loss=0.2392641046837959
loss=0.2014525634086171
loss=0.17349119992150133
loss=0.1520794863966645
loss=0.13521166003877994
loss=0.12161009078258005
