In [1]:
# download data base at http://yann.lecun.com/exdb/mnist/
import numpy as np
import gzip

key_file = {
'x_train':'../source/train-images-idx3-ubyte.gz',
'y_train':'../source/train-labels-idx1-ubyte.gz',
'x_test':'../source/t10k-images-idx3-ubyte.gz',
'y_test':'../source/t10k-labels-idx1-ubyte.gz'
}

def load_image(filename):
    with gzip.open(filename, 'rb') as f:
        imgs = np.frombuffer(f.read(), dtype=np.uint8, offset=16)
    return imgs   

def load_label(filename):
    with gzip.open(filename, 'rb') as f:
        labels = np.frombuffer(f.read(), dtype=np.uint8, offset=8)
        
        one_hot_labels = np.zeros((labels.shape[0], 10))
        for i in range(labels.shape[0]):
            one_hot_labels[i, labels[i]] = 1
    return one_hot_labels

def convert_into_numpy(key_file):
    dataset = {}

    dataset['x_train'] = load_image(key_file['x_train'])
    dataset['y_train'] = load_label(key_file['y_train'])
    dataset['x_test'] = load_image(key_file['x_test'])
    dataset['y_test'] = load_label(key_file['y_test'])
    
    return dataset


def load_mnist():

    dataset = convert_into_numpy(key_file)

    dataset['x_train'] = dataset['x_train'].astype(np.float32)
    dataset['x_test'] = dataset['x_test'].astype(np.float32)
    dataset['x_train'] /= 255.0
    dataset['x_test'] /= 255.0

    dataset['x_train'] = dataset['x_train'].reshape(-1, 28*28)
    dataset['x_test'] = dataset['x_test'].reshape(-1, 28*28)
    return dataset

In [2]:
dataset = load_mnist()
dataset

{'x_train': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 'y_train': array([[0., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 1., 0.]]),
 'x_test': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 'y_test': array([[0., 0., 0., ..., 1., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0.,

In [3]:
buff = dataset['x_train'][0]
np.where(buff>0, 1, 0)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,

In [4]:
dataset['y_train'][0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])

In [11]:
def make_param(shape_list):
    w_list = []
    b_list = []
    
    for i in range(len(shape_list) - 1):
        w = np.random.randn(shape_list[i], shape_list[i + 1])
        b = np.ones(shape_list[i + 1]) / 10
        w_list.append(w)
        b_list.append(b)
    return w_list, b_list # w[0] = w', w[1] = w'', b[0] = b', b[1] = b''

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def inner_product(x, w, b):
    return np.dot(x, w) + b

def activation_func(x, w, b):
    return sigmoid(inner_product(x, w, b))

In [12]:
def calculate(x, w_list, b_list):  
    a1 = inner_product(x, w_list[0], b_list[0])
    y1 = sigmoid(a1)
    
    a2 = inner_product(y1, w_list[1], b_list[1])
    y2 = sigmoid(a2)
    
    y2 /= np.sum(y2, axis = 1, keepdims=True)
    
    return y1, y2

In [14]:
def update(x_train, w_list, b_list, y_train, lr):
    y1, y2 = calculate(x_train, w_list, b_list)
    d12_d11 = 1.0
    d11_d9 = 1/x_train.shape[0]*(y2 - y_train)
    d9_d8 = y2*(1-y2)
    d8_d7 = 1
    d8_d6 = y1.T
    d8_d5 = w_list[1].T
    d5_d4 = y1*(1-y1)
    d4_d3 = 1
    d4_d2 = x_train.T
    
    d12_d8 = d12_d11 * d11_d9 * d9_d8
    b_list[1] -= lr * np.sum(d12_d8 * d8_d7, axis = 0)
    w_list[1] -= lr * np.dot(d8_d6, d12_d8)
    
    #d12_d8 = d12_d11 * d11_d9 * d9_d8
    d12_d4 = d12_d8 * d8_d5 * d5_d4
    b_list[0] -= lr * np.sum(d12_d4 * d4_d3, axis = 0)
    w_list[0] -= lr * np.dot(d4_d2, d12_d4)
    
    return w_list, b_list

In [None]:
# goto source/NN 查看剩下的部分