# Trying to build a simple Neural Network from scratch

In [41]:
# Basic functions to build NN
import numpy as np

def softmax(x):
    x = x - np.max(x, axis=-1, keepdims=True)
    return np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)


def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    if t.size == y.size:
        t = t.argmax(axis=1)
             
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

def numerical_gradient(f, X):
    '''Simply nemerically calcurate gradient of parameter matrix X'''
    h = 1e-4
    grad = []
    Xf = X.flatten()
    for i, val in enumerate(Xf):
        Xf[i] -= h
        y1 = f(Xf.reshape(X.shape))

        Xf[i] += 2*h
        y2 = f(Xf.reshape(X.shape))
        
        Xf[i] = val
        grad.append((y2-y1) / (2*h))

    return np.array(grad).reshape(X.shape)

Here, I implement 2 NN class, Net0 and Net1. Net0 doesn't work but Net1 seems to work. Only difference between them is self.gradient(). In self.gradient(), loss function is passed to numerical_gradient. Net1 updates a parameter matrix(net.W) in addition to Net0.gradient()

In [42]:
# This doesn't work :(
class Net0:
    '''Simple NN class which holds parameter W'''
    def __init__(self, W):
        self.W = W
        
    def predict(self, x):
        a = np.dot(x, self.W)
        z = softmax(a)
        y = z
        return y

    def loss(self, x, t):
        y = self.predict(x)
        _loss = cross_entropy_error(y, t)
        return _loss
    
    def gradient(self, x, t):
        def f(W):
            return self.loss(x, t)
        return numerical_gradient(f, net.W)


# This works with only modification of self.gradient() against the Net0.gradient().
class Net1(Net0):
    '''Simple NN class which holds parameter W'''
    def gradient(self, x, t):
        def f(W):
            net.W = W # why do we need this?          
            return self.loss(x, t)
        return numerical_gradient(f, net.W)

The following test with **Net0** doesn't work. Loss seems always constant.

In [43]:
# data
np.random.seed(0)
x0 = np.array([0.6, 0.9])
t0 = np.array([0, 0, 1])
W0 = np.random.randn(2,3)

# Works OK
net = Net0(W0)
for i in np.arange(9):
    dW = net.gradient(x0, t0)
    net.W -=  0.5 * dW
    print(f"loss={net.loss(x0, t0)}")

loss=3.6674507891066104
loss=3.6674507891066104
loss=3.6674507891066104
loss=3.6674507891066104
loss=3.6674507891066104
loss=3.6674507891066104
loss=3.6674507891066104
loss=3.6674507891066104
loss=3.6674507891066104


In [44]:
# Doesn't work
net = Net1(W0)
for i in np.arange(9):
    dW = net.gradient(x0, t0)
    net.W -=  0.5 * dW
    print(f"loss={net.loss(x0, t0)}")    

loss=2.78161533871936
loss=2.0003343081582976
loss=1.3686725260622286
loss=0.9190010078462153
loss=0.6335065534649551
loss=0.45993115419801334
loss=0.35191450377644246
loss=0.2812081474959238
loss=0.23243191883336756
