In [1]:
import numpy as np

In [2]:
def mean_squared_error(y_hat, y):
    return np.sum((y_hat - y)**2)/y.size

In [3]:
y      = np.array([1,   2,   3,    4])
y_hat1 = np.array([1.2, 1.9, 2.9,  4.2]) 
mean_squared_error(y_hat1, y)

0.025000000000000022

In [4]:
y_hat2 = np.array([2.2, 0.9, 2.9,  5.2]) 
mean_squared_error(y_hat2, y)

1.0250000000000004

In [6]:
def cross_entropy_error(y_hat, y):
    return -np.sum(y*np.log(y_hat + 1e-7))

In [7]:
y = np.array([0, 1, 0, 0, 0])

In [8]:
y_hat1 = np.array([0.1, 0.7, 0.1, 0.1, 0])

In [9]:
cross_entropy_error(y_hat1, y)

0.3566748010815999

In [10]:
y_hat2 = np.array([0.7, 0.05, 0.05, 0.2, 0])

In [11]:
cross_entropy_error(y_hat2, y)

2.9957302735559908

In [14]:
import mnist

my_mnist = mnist.Mnist()

File: train-images-idx3-ubyte.gz already exists.
File: train-labels-idx1-ubyte.gz already exists.
File: t10k-images-idx3-ubyte.gz already exists.
File: t10k-labels-idx1-ubyte.gz already exists.
Pickle: dataset/mnist.pkl already exists.
loading....
Done


In [15]:
(train_images, train_labels), (_, _) = my_mnist.load()

In [16]:
train_size = train_images.shape[0]
batch_size = 32

In [17]:
batch_mask = np.random.choice(train_size, batch_size)
print(batch_mask)

[7525  238 5493  182 5741 9754 6413 8917 7754 9650  601 8500 3811 1354
 9556 7014 8085  251 9375 6784 9925 8755 6982 4096 6801 2293 7888 8675
  379 3341 6540 6872]


In [18]:
train_size

10000

In [19]:
def cross_entropy_error(y_hat, y):
    batch_size = 1 if y_hat.ndim == 1 else y_hat.shape[0]
    return -np.sum(y*np.log(y_hat + 1e-7))/batch_size

In [20]:
y_hat_batch = np.array([ [0.2, 0.2, 0.3, 0.1, 0.2], [0.1, 0.1, 0.1, 0.1, 0.6]])
y_batch =     np.array([ [0,   0,   1,   0,    0],   [0,   0,   0,   0,   1]])

In [21]:
cross_entropy_error(y_hat_batch, y_batch)

0.8573989640459981

In [22]:
def numerical_diff(f, x):
    h = 10e-50
    return (f(x + h) - f(x))/h

In [23]:
def func(x):
    return x**2 + 0.1*x

In [24]:
numerical_diff(func, 0.8)

0.0

In [25]:
def numerical_diff(f, x):
    h = 1e-4
    return (f(x + h) - f(x - h))/(2*h)

In [26]:
numerical_diff(func, 0.8)

1.6999999999994797

In [27]:
numerical_diff(func, 0.2)

0.49999999999994493

In [28]:
def func(x):
    return x[0]**2 + x[1]**2

def func_tmp1(x0):
    return x0**2 + 4.0**2

In [29]:
numerical_diff(func_tmp1, 3.0)

6.00000000000378

In [30]:
def func_tmp2(x1):
    return 3.0**2 + x1**2

In [31]:
numerical_diff(func_tmp2, 4.0)

7.999999999999119

#Numerical Gradient

In [32]:
def func2(x):
    return x[0]**2 + x[1]**2

In [33]:
def func_tmp1(x0):
    return x0**2 + 4**2

def func_tmp2(x1):
    return 3**2 + x1**2

In [34]:
def _numerical_diff(f, x):
    h = 1e-4
    return (f(x + h) - f(x - h))/(2*h)

In [35]:
def _numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x) 
    
    for idx in range(x.size):
        tmp_val = x[idx]
        
        # f(x+h) 
        x[idx] = float(tmp_val) + h
        fxh1 = f(x)
        
        # f(x-h) 
        x[idx] = tmp_val - h 
        fxh2 = f(x) 
        
        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val 
        
    return grad

In [36]:
_numerical_diff(func_tmp1, 3.0)

6.00000000000378

In [37]:
_numerical_diff(func_tmp2, 4.0)

7.999999999999119

In [38]:
_numerical_gradient(func2, np.array([3.0, 4.0]))

array([6., 8.])

In [39]:
def gradient_descent(f, init_x, lr=0.1, step_num = 100):
    x = init_x
    for i in range(step_num):
        grad = _numerical_gradient(f, x)
        x -= lr*grad  # x = x - lr*grad

    return x

In [40]:
init_x = np.array([2800.0, 1000.0])
# func2 = x0**2 + x1**2
gradient_descent(func2, init_x, step_num=10000, lr=0.001)

array([5.65680105e-06, 2.02028609e-06])

#SimpleNet

In [41]:
class SimpleNet:
    def __init__(self):
        self.w = np.random.randn(2, 3)


    # for multi-dimensional x
    def softmax(self, x):
        if x.ndim == 2:
            x = x.T
            x = x - np.max(x, axis=0)
            y = np.exp(x) / np.sum(np.exp(x), axis=0)
            return y.T 

        x = x - np.max(x)  
        return np.exp(x) / np.sum(np.exp(x))


    def cross_entroy_error(self, y, t):
        delta = 1e-7
        batch_size = 1 if y.ndim == 1 else y.shape[0]

        return -np.sum(t*np.log(y + delta)) / batch_size


    # for multi-dimensional x
    def numerical_gradient(self, f, x):
        h = 1e-4 # 0.0001
        grad = np.zeros_like(x)
        
        it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
        while not it.finished:
            idx = it.multi_index
            tmp_val = x[idx]
            x[idx] = float(tmp_val) + h
            fxh1 = f(x) # f(x+h)
            
            x[idx] = tmp_val - h 
            fxh2 = f(x) # f(x-h)
            grad[idx] = (fxh1 - fxh2) / (2*h)
            
            x[idx] = tmp_val 
            it.iternext()   
            
        return grad


    def predict(self, x):
        return np.dot(x, self.w)
    

    def loss(self, x, y):
        z = self.predict(x)
        y_hat = self.softmax(z)
        loss = self.cross_entroy_error(y_hat, y)

        return loss

In [42]:
net = SimpleNet()
print(net.w)

[[-1.40792789 -1.5466065  -0.63994022]
 [ 0.26208877 -1.71393661  1.05970771]]


In [44]:
x = np.array([0.7, 0.19])
p = net.predict(x)
print(p)

[-0.93575266 -1.40827251 -0.24661369]


In [45]:
np.argmax(p)

2

In [46]:
y = np.array([0, 1, 0])
net.loss(x, y)

1.757729778639131

In [47]:
y = np.array([0, 0, 1])
net.loss(x, y)

0.5960713599277233

In [48]:
def loss_function(w):
    return net.loss(x, y)

In [49]:
dw = net.numerical_gradient(loss_function, net.w)
print(dw)

[[ 0.19361461  0.12070503 -0.31431964]
 [ 0.05255254  0.03276279 -0.08531533]]


In [50]:
loss_function = lambda w: net.loss(x, y)

In [51]:
dw = net.numerical_gradient(loss_function, net.w)
print(dw)

[[ 0.19361461  0.12070503 -0.31431964]
 [ 0.05255254  0.03276279 -0.08531533]]


#TwoLayerNet class

In [52]:
class Activations:
    def sigmoid(self, x):
        return 1/(1 + np.exp(-x))
    
    # for multi-dimensional x
    def softmax(self, x):
        if x.ndim == 2:
            x = x.T
            x = x - np.max(x, axis=0)
            y = np.exp(x) / np.sum(np.exp(x), axis=0)
            return y.T 

        x = x - np.max(x)  
        return np.exp(x) / np.sum(np.exp(x))

In [53]:
class Errors:
    def cross_entroy_error(self, y, t):
        delta = 1e-7
        batch_size = 1 if y.ndim == 1 else y.shape[0]

        return -np.sum(t*np.log(y + delta)) / batch_size

In [54]:
import activations
import errors

In [56]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}

        self.params['w1'] = weight_init_std*np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)

        self.params['w2'] = weight_init_std*np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

        self.activations = activations.Activations()
        self.errors = errors.Errors()

    def predict(self, x):
        w1, w2 = self.params['w1'], self.params['w2']
        b1, b2 = self.params['b1'], self.params['b2']
        
        a1 = np.dot(x, w1) + b1
        z1 = self.activations.sigmoid(a1)
        a2 = np.dot(z1, w2) + b2
        y = self.activations.softmax(a2)

        return y
    
    def loss(self, x, y):
        y_hat = self.predict(x)

        return self.errors.cross_entropy_error(y_hat, y)
    

    def accuracy(self, x, y):
        y_hat = self.predict(x)
        p = np.argmax(y_hat, axis=1)
        y_p = np.argmax(y, axis=1)

        return np.sum(p == y_p)/float(x.shape[0])
    

    # for multi-dimensional x
    def _numerical_gradient(self, f, x):
        h = 1e-4 # 0.0001
        grad = np.zeros_like(x)
        
        it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
        while not it.finished:
            idx = it.multi_index
            tmp_val = x[idx]
            x[idx] = float(tmp_val) + h
            fxh1 = f(x) # f(x+h)
            
            x[idx] = tmp_val - h 
            fxh2 = f(x) # f(x-h)
            grad[idx] = (fxh1 - fxh2) / (2*h)
            
            x[idx] = tmp_val 
            it.iternext()   
            
        return grad
    

    def numerical_gradient(self, x, y):
        loss_w = lambda w: self.loss(x, y)

        grads = {}
        grads['w1'] = self._numerical_gradient(loss_w, self.params['w1'])
        grads['b1'] = self._numerical_gradient(loss_w, self.params['b1'])
        grads['w2'] = self._numerical_gradient(loss_w, self.params['w2'])
        grads['b2'] = self._numerical_gradient(loss_w, self.params['b2'])

        return grads
    
 

#Train TwoLayerNet

In [57]:
import mnist
from two_layer_net import TwoLayerNet

import matplotlib.pyplot as plt
import numpy as np

ModuleNotFoundError: No module named 'two_layer_net'