In [1]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

## Gradient Descet in Neural Network

In [2]:
def softmax(x):
    cons = np.max(x)
    exp_x = np.exp(x - cons)
    sum_exp_x = np.sum(exp_x)
    return exp_x / sum_exp_x

In [3]:
def cross_entropy_error(y, t):
    delta = 1e-7
    return (-1)*np.sum(t * np.log(y + delta))

In [4]:
def __numerical_gradient_body(f, x):    
    h = 1e-4
    grad = np.zeros_like(x)
    
    for i in range(0, x.size, 1):
        tmp_val = x[i]
        
        # calculate f(x+h)
        x[i] = tmp_val + h
        fxh1 = f(x)
        
        # calculate f(x-h)
        x[i] = tmp_val - h
        fxh2 = f(x)
        
        grad[i] = (fxh1 - fxh2) / (2*h)
        
        # restore the value
        x[i] = tmp_val
        
    return grad

In [5]:
def numerical_gradient(f, X):
    if X.ndim == 1:
        return __numerical_gradient_body(f, X)
    else:
        grad = np.zeros_like(X)
        
        dim,ttl = X.shape
        
        for i in range(ttl):
            grad[:,i] = __numerical_gradient_body(f, X[:,i])
        
        return grad

In [6]:
class SimpleNet():
    def __init__(self):
        self.W = np.random.randn(2,3)
    
    def predict(self, x):
        return np.dot(x, self.W)
    
    def loss(self, x, t):
        # x: input data
        # t: label
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)
        return loss

### Calculate the gradinet of the Weights

In [7]:
net = SimpleNet()
print(net.W)

x = np.array([0.6, 0.9])
t = np.array([0, 0, 1])

y = net.predict(x)
print(y)

loss = net.loss(x, t)
print(loss)

[[-0.8339355  -0.13553598  1.21013523]
 [-0.52139142  1.6686686  -0.82705022]]
[-0.96961358  1.42048015 -0.01826406]
1.7230549742562002


In [8]:
def f(W):
    return net.loss(x, t)

In [9]:
dw = numerical_gradient(f, net.W)
print(dw)

[[ 0.04136868  0.45151914 -0.49288782]
 [ 0.06205302  0.67727871 -0.73933173]]


### Learning Procedure

In [10]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

In [11]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=1.0):
        self.params = {}
        self.params["W1"] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params["b1"] = np.zeros(hidden_size)
        self.params["W2"] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params["b2"] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2 = self.params["W1"], self.params["W2"]
        b1, b2 = self.params["b1"], self.params["b2"]
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y
    
    # x: input data
    # t: the labeling result
    def loss(self, x, t):
        y = self.predict(x)
        return cross_entropy_error(y,t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        infer = np.argmax(y, axis=1)
        label = np.argmax(t, axis=1)
        return np.sum(infer == label) / len(label)
    
    def numerical_gradient(self, x, t):
        loss_w = lambda w: self.loss(x, t)
        
        grads = {}
        grads["W1"] = numerical_gradient(loss_w, self.params["W1"])
        grads["b1"] = numerical_gradient(loss_w, self.params["b1"])
        grads["W2"] = numerical_gradient(loss_w, self.params["W2"])
        grads["b2"] = numerical_gradient(loss_w, self.params["b2"])
        
        return grads

In [12]:
# simple example
net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
print(net.params["W1"].shape)
print(net.params["b1"].shape)
print(net.params["W2"].shape)
print(net.params["b1"].shape)

# generate 100 images
x = np.random.rand(100, 784)
t = np.random.rand(100, 10)
print(x)
print(t)

(784, 100)
(100,)
(100, 10)
(100,)
[[0.02630838 0.53062906 0.67946543 ... 0.19935913 0.84168082 0.61418998]
 [0.67744476 0.08857352 0.84677061 ... 0.28646855 0.72083026 0.93161104]
 [0.00427215 0.46231823 0.20705598 ... 0.82023305 0.92859347 0.3585523 ]
 ...
 [0.29416256 0.37663556 0.61873699 ... 0.59829586 0.9930617  0.35422951]
 [0.71090586 0.65534099 0.88012781 ... 0.94882501 0.97726324 0.79848995]
 [0.4058097  0.41479547 0.01366396 ... 0.65600233 0.74998662 0.56895135]]
[[8.24486032e-01 2.04194147e-01 9.85686209e-02 5.65337837e-01
  1.95071495e-01 1.88140352e-01 9.75649395e-01 8.57580286e-01
  7.57288497e-01 2.45381929e-01]
 [9.59120476e-01 4.81382650e-01 1.08981950e-01 9.16388692e-02
  5.33020308e-01 9.61379110e-01 3.72305628e-01 7.98830377e-01
  3.56607781e-01 6.08114875e-01]
 [4.86528155e-01 2.81778550e-01 7.81313058e-01 1.99554220e-01
  3.70568096e-01 6.72944337e-02 2.36137694e-01 6.54437973e-01
  2.17752625e-02 5.91807114e-01]
 [1.78983806e-01 5.03590358e-01 3.95128864e-01 2.0

In [13]:
# example images
y = net.predict(x)
print(np.argmax(y,1))

[4 4 4 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 0 4 4 4 4 4 0 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 0 4 4 4 4 4 4 4 0 4 4 4 4 4 4 0 4 4 4 4 4 4 4 4 4 0 4 4 4 4
 4 4 0 4 4 4 4 4 4 4 4 1 4 0 4 4 4 0 4 4 0 4 4 4 0 4]


In [14]:
grads = net.numerical_gradient(x, t)
print(grads["W1"].shape)
print(grads["b1"].shape)
print(grads["W2"].shape)
print(grads["b1"].shape)

(784, 100)
(100,)
(100, 10)
(100,)


In [15]:
print(grads["W2"])

[[-9.22952127e-01 -1.54109675e-01 -1.56134547e-02 -2.29465513e-05
  -1.27292727e+00 -8.12431563e-02 -2.49201821e-02 -4.28681733e-02
  -3.75220043e-02  0.00000000e+00]
 [-3.01383912e+01 -8.95967840e+00 -1.20546527e+00 -6.26466772e-03
   7.42123122e+01 -7.59957697e+00 -2.15472089e+00 -4.27636371e+00
  -3.53176525e+00 -1.59161573e-07]
 [-3.45098977e+01 -1.12273830e+01 -1.51640910e+00 -6.62040748e-03
   6.77661096e+01 -8.15417111e+00 -2.92525400e+00 -5.37697008e+00
  -4.02075686e+00 -1.72803993e-07]
 [ 9.39476604e-02 -7.55797828e-03 -4.97392284e-02 -3.02816261e-05
  -1.17037580e-01 -1.83278512e-01 -8.14878695e-02 -1.09260327e-03
  -4.94698770e-02  0.00000000e+00]
 [-8.79146413e+00 -2.73240678e+00 -8.36590084e-01 -8.05312084e-04
  -7.82897911e+00 -2.77440222e+00 -6.05412092e-01 -1.92551483e+00
  -8.60323107e-01 -6.82121026e-08]
 [-3.45101780e+01 -1.12273995e+01 -1.51505655e+00 -6.62133516e-03
   6.77103581e+01 -8.15287075e+00 -2.92539659e+00 -5.37746110e+00
  -4.03022340e+00 -1.72803993e-07