In [1]:
import numpy as np

In [2]:
# 二乗和誤差
def mean_squared_error(y, t):
    return 0.5 * np.sum((y - t)**2)

In [5]:
def cross_entropy_error(y, t):
    delta = 1e-7
    return -np.sum(t * np.log(y + delta))

In [8]:
import sys
sys.path.append('..')

In [10]:
from dataset.mnist import load_mnist
(x_train, t_train), (x_test, t_test) = \
load_mnist(normalize=True, one_hot_label=True)

In [11]:
print(x_train.shape)
print(t_train.shape)

(60000, 784)
(60000, 10)


In [12]:
train_size = x_train.shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

In [13]:
batch_mask

array([29146,  4042, 42038, 31754, 38101,  8441, 54012, 51590, 46298,
       21170])

In [14]:
# t: one-hot
def cross_entropy_error(y, t):
    if y.dim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    
    batch_size = y.shape[0]
    return -np.sum(t * np.log(y)) / batch_size

In [18]:
# t: label
def cross_entropy_error(y, t):
    if y.dim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    batch_size = y.shape[0]    
    return -np.sum(np.log(y[np.arrange(batch_size), t])) / batch_size

In [None]:
# 中心差分での微分
def numerical_diff(f, x):
    h = 1e-4
    return (f(x+h) - f(x-h)) / (2*h)

In [None]:
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)
    
    for idx in range(x.size):
        tmp_val = x[idx]
        
        # f(x+h)
        x[idx] = tmp_val + h
        fxh1 = f(x)
        
        # f(x-h)
        x[idx] = tmp_val - h
        fxh2 = f(x)       
        
        grad[idx] = (fxh1 - fxh2) / (2 * h)
        
        # 元に戻す
        x[idx] = tmp_val
    return grad        

In [None]:
def function_2(x):
    return np.sum(x**2)

In [None]:
numerical_gradient(function_2, np.array([3.0, 4.0]))

In [None]:
def gradient_descent(f, init_x, lr=0.01, step_num=100):
    x = init_x
    
    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr*grad
    
    return x

In [None]:
init_x = np.array([-3.0, 4.0])
gradient_descent(function_2, init_x=init_x, lr=0.1, step_num=100)

In [1]:
import sys
sys.path.append('..')

import numpy as np
from common.functions import softmax, cross_entropy_error
from common.gradient import numerical_gradient

In [2]:
class simpleNet:
    def __init__(self):
        self.W = np.random.randn(2, 3) # ガウス分布
        
    def predict(self, x):
        return np.dot(x, self.W)
    
    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)
        
        return loss
    

In [3]:
net = simpleNet()

In [4]:
print(net.W)

[[-1.27527668  0.98837035 -0.32584199]
 [ 0.73903408 -1.18023503  0.63850337]]


In [5]:
x = np.array([0.6, 0.9])
p = net.predict(x)
print(p)

[-0.10003534 -0.46918932  0.37914784]


In [6]:
np.argmax(p)

2

In [7]:
t = np.array([0, 0, 1])
net.loss(x, t)

0.7165779532416807

In [8]:
def f(W):
    return net.loss(x, t)

In [9]:
dW = numerical_gradient(f, net.W)

In [10]:
print(dW)

[[ 0.18148411  0.12546341 -0.30694752]
 [ 0.27222617  0.18819511 -0.46042128]]


In [5]:
import sys
sys.path.append('..')
import numpy as np
from common.functions import *
from common.gradient import *

In [6]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y
    # x: input data, t: 教師data
    def loss(self, x, t):
        y = self.predict(x)
        
        return cross_entropy_error(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
    


In [7]:
from dataset.mnist import load_mnist
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

train_loss_list = []

In [13]:
from two_layer_net import TwoLayerNet

In [8]:
# highper parameter
iters_num = 1000
train_size = x_train.shape[0]
batch_size = 100
lr = 0.1

In [14]:
network = TwoLayerNet(784, 50, 10)

In [17]:
import time

start = time.time()

for i in range(iters_num):
    print ('{0} epoch, {1}sec'.format(i+1, time.time() - start))
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    grad = network.gradient(x_batch, t_batch)
    
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= lr * grad[key]
    
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    

1 epoch, 0.0001552104949951172sec
2 epoch, 0.003309965133666992sec
3 epoch, 0.004968166351318359sec
4 epoch, 0.006836891174316406sec
5 epoch, 0.0083770751953125sec
6 epoch, 0.010030031204223633sec
7 epoch, 0.012328147888183594sec
8 epoch, 0.014577150344848633sec
9 epoch, 0.016694068908691406sec
10 epoch, 0.01866316795349121sec
11 epoch, 0.020468950271606445sec
12 epoch, 0.022130250930786133sec
13 epoch, 0.023945093154907227sec
14 epoch, 0.02591705322265625sec
15 epoch, 0.027772188186645508sec
16 epoch, 0.029343128204345703sec
17 epoch, 0.031060218811035156sec
18 epoch, 0.03287696838378906sec
19 epoch, 0.03469991683959961sec
20 epoch, 0.03634500503540039sec
21 epoch, 0.038073062896728516sec
22 epoch, 0.040197134017944336sec
23 epoch, 0.04212808609008789sec
24 epoch, 0.04401397705078125sec
25 epoch, 0.045617103576660156sec
26 epoch, 0.047573089599609375sec
27 epoch, 0.049172163009643555sec
28 epoch, 0.050698280334472656sec
29 epoch, 0.052282094955444336sec
30 epoch, 0.05363297462463379se

283 epoch, 0.403217077255249sec
284 epoch, 0.4047970771789551sec
285 epoch, 0.40603208541870117sec
286 epoch, 0.4072561264038086sec
287 epoch, 0.40846824645996094sec
288 epoch, 0.40964293479919434sec
289 epoch, 0.4108290672302246sec
290 epoch, 0.41200923919677734sec
291 epoch, 0.4132421016693115sec
292 epoch, 0.4144411087036133sec
293 epoch, 0.4156031608581543sec
294 epoch, 0.416762113571167sec
295 epoch, 0.4179229736328125sec
296 epoch, 0.4190530776977539sec
297 epoch, 0.42022705078125sec
298 epoch, 0.42145395278930664sec
299 epoch, 0.4230661392211914sec
300 epoch, 0.4248340129852295sec
301 epoch, 0.42632508277893066sec
302 epoch, 0.42768406867980957sec
303 epoch, 0.42920899391174316sec
304 epoch, 0.4305598735809326sec
305 epoch, 0.4320070743560791sec
306 epoch, 0.4335150718688965sec
307 epoch, 0.4348442554473877sec
308 epoch, 0.4360790252685547sec
309 epoch, 0.4373950958251953sec
310 epoch, 0.4386422634124756sec
311 epoch, 0.43985819816589355sec
312 epoch, 0.44115209579467773sec
313 

594 epoch, 0.8067619800567627sec
595 epoch, 0.8082339763641357sec
596 epoch, 0.8096239566802979sec
597 epoch, 0.8108160495758057sec
598 epoch, 0.8120312690734863sec
599 epoch, 0.8132431507110596sec
600 epoch, 0.8144659996032715sec
601 epoch, 0.815669059753418sec
602 epoch, 0.8169162273406982sec
603 epoch, 0.8181040287017822sec
604 epoch, 0.8194222450256348sec
605 epoch, 0.8206682205200195sec
606 epoch, 0.8218789100646973sec
607 epoch, 0.8233871459960938sec
608 epoch, 0.8250889778137207sec
609 epoch, 0.8267061710357666sec
610 epoch, 0.828071117401123sec
611 epoch, 0.8294510841369629sec
612 epoch, 0.8308839797973633sec
613 epoch, 0.832237958908081sec
614 epoch, 0.8335092067718506sec
615 epoch, 0.8347833156585693sec
616 epoch, 0.8359930515289307sec
617 epoch, 0.8372552394866943sec
618 epoch, 0.8387401103973389sec
619 epoch, 0.8403429985046387sec
620 epoch, 0.8419539928436279sec
621 epoch, 0.8433902263641357sec
622 epoch, 0.8447160720825195sec
623 epoch, 0.8459591865539551sec
624 epoch, 0.

901 epoch, 1.2084541320800781sec
902 epoch, 1.2101669311523438sec
903 epoch, 1.2115540504455566sec
904 epoch, 1.2128722667694092sec
905 epoch, 1.2143831253051758sec
906 epoch, 1.215651035308838sec
907 epoch, 1.2171251773834229sec
908 epoch, 1.218473196029663sec
909 epoch, 1.219860315322876sec
910 epoch, 1.2212002277374268sec
911 epoch, 1.2225451469421387sec
912 epoch, 1.2240161895751953sec
913 epoch, 1.2256290912628174sec
914 epoch, 1.2270472049713135sec
915 epoch, 1.2287039756774902sec
916 epoch, 1.230147123336792sec
917 epoch, 1.231654167175293sec
918 epoch, 1.2330141067504883sec
919 epoch, 1.2343332767486572sec
920 epoch, 1.2356631755828857sec
921 epoch, 1.2370121479034424sec
922 epoch, 1.2385749816894531sec
923 epoch, 1.240246057510376sec
924 epoch, 1.241590976715088sec
925 epoch, 1.2428700923919678sec
926 epoch, 1.2444791793823242sec
927 epoch, 1.2458851337432861sec
928 epoch, 1.2477691173553467sec
929 epoch, 1.2495231628417969sec
930 epoch, 1.2514081001281738sec
931 epoch, 1.2530

In [20]:
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
lr = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(int(train_size / batch_size), 1)

In [22]:
network = TwoLayerNet(784, 50, 10)

In [28]:
start = time.time()

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    grad = network.gradient(x_batch, t_batch)
    
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= lr * grad[key]
    
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    # 1 epoch 毎に精度計算
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)        
        print('{0} epoch, {1:.3f}'.format(int(i / iter_per_epoch), time.time() - start))
        print('train acc: {0}, test acc: {1}'.format(train_acc, test_acc))

0 epoch, 0.521
train acc: 0.9481666666666667, test acc: 0.9459
1 epoch, 1.680
train acc: 0.9493666666666667, test acc: 0.9468
2 epoch, 2.851
train acc: 0.9506333333333333, test acc: 0.9467
3 epoch, 4.033
train acc: 0.9520666666666666, test acc: 0.9478
4 epoch, 5.256
train acc: 0.9530333333333333, test acc: 0.9496
5 epoch, 6.504
train acc: 0.9547166666666667, test acc: 0.9502
6 epoch, 7.718
train acc: 0.95575, test acc: 0.9506
7 epoch, 8.983
train acc: 0.9567333333333333, test acc: 0.9518
8 epoch, 10.197
train acc: 0.9574833333333334, test acc: 0.9527
9 epoch, 11.456
train acc: 0.9586333333333333, test acc: 0.953
10 epoch, 12.675
train acc: 0.95955, test acc: 0.9536
11 epoch, 13.877
train acc: 0.9606666666666667, test acc: 0.9549
12 epoch, 15.116
train acc: 0.9614166666666667, test acc: 0.9558
13 epoch, 16.389
train acc: 0.9621333333333333, test acc: 0.9564
14 epoch, 17.614
train acc: 0.96275, test acc: 0.9571
15 epoch, 18.826
train acc: 0.9641166666666666, test acc: 0.9579
16 epoch, 20