In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from common.layers import *
from collections import OrderedDict
from common.gradient import numerical_gradient
from common.trainer import Trainer

fashion_mnist = tf.keras.datasets.fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_train.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(60000,)


In [2]:
from dataset.mnist import load_mnist
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)

In [3]:
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)

In [4]:
X_train.shape

(60000, 28, 28)

In [5]:
X_train = X_train.reshape(60000, 1, 28, 28)
X_test = X_test.reshape(10000, 1, 28, 28)

In [6]:
X_train = X_train / X_train.max()
X_test = X_test / X_test.max()
print(X_train.max())
print(X_test.max())

1.0
1.0


In [7]:
num = np.unique(y_train, axis=0)
num = num.shape[0]
y_train = np.eye(num)[y_train]
print()




In [8]:
num = np.unique(y_test, axis=0)
num = num.shape[0]
y_test = np.eye(num)[y_test]

In [9]:
y_train = y_train.astype(np.int64)
y_test = y_test.astype(np.int64)

In [10]:
class SGD:
    def __init__(self, lr = 0.01):
        self.lr = lr
        
    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key]

In [11]:
class Momentum:
    def __init__(self, lr = 0.01, momentum = 0.9):
        self.lr = lr
        self.momentum = momentum
        self.v = None
        
    def update(self, params, grads):
        if self.v is None:
            self.v = {}
            for key, val in params.items():
                self.v[key] = np.zeros_like(val)
                
        for key in params.keys():
            self.v[key] = self.momentum * self.v[key] - self.lr * grads[key]
            params[key] += self.v[key]

In [12]:
class Adagrad:
    def __init__(self, lr = 0.01):
        self.lr = lr
        self.h = None
        
    def update(self, params, grads):
        if self.h is None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zeros_like(val)
                
        for key in params.keys():
            self.h[key] += grads[key] * grads[key]
            params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)

In [18]:
class SimpleConvNet:
    def __init__(self, input_dim=(1, 28, 28), 
                 conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
                 hidden_size=100, output_size=10, weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))

        self.params = {}
        self.params['W1'] = weight_init_std * \
                            np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * \
                            np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * \
                            np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
                                           conv_param['stride'], conv_param['pad'])
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()
        
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x
    
    def loss(self, x, t):
        y = self.predict(x)
        
        return self.last_layer.forward(y, t)
    
    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        
        acc = 0.0
        
        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt) 
        
        return acc / x.shape[0]
    
    def gradient(self, x, t):
        self.loss(x, t)

        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
            
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
        grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

        return grads
    

In [19]:
network = SimpleConvNet(input_dim=(1,28,28), 
                        conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
                        hidden_size=100, output_size=10, weight_init_std=0.01)
trainer = Trainer(network, X_train, y_train, X_test, y_test,
                  epochs=20, mini_batch_size=100,
                  optimizer='Adam', optimizer_param={'lr': 0.001},
                  evaluate_sample_num_per_epoch=1000)

In [20]:
trainer.train()

train loss:2.2994064543605113
=== epoch:1, train acc:0.125, test acc:0.113 ===
train loss:2.2969476664107433
train loss:2.290298036571933
train loss:2.280443650557045
train loss:2.266592769850869
train loss:2.253694651645621
train loss:2.2244360315845295
train loss:2.1924547579626497
train loss:2.1551794207239614
train loss:2.104732418242105
train loss:2.0533324049405848
train loss:1.997900488539371
train loss:1.9396078698513777
train loss:1.8483585754960268
train loss:1.7710527961405713
train loss:1.7187800910171926
train loss:1.5833417307437379
train loss:1.549970989200826
train loss:1.4976883647556365
train loss:1.322835993528585
train loss:1.2490945199808694
train loss:1.276173025861903
train loss:1.080280620560267
train loss:1.1915264038996956
train loss:1.1422544057682387
train loss:1.080236211944789
train loss:1.1108539518923315
train loss:1.158492616157045
train loss:0.9948541303966054
train loss:0.9869303608008373
train loss:1.0658707246966732
train loss:0.9397610180226619
tra

KeyboardInterrupt: 

In [21]:
plt.plot(np.arange(0, 10000, 600), train_acc_list)
plt.title('SGD')
plt.show

NameError: name 'train_acc_list' is not defined