In [1]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

import numpy as np

## 7.4 합성곱/풀링 계층 구현하기
### 7.4.1 4차원 배열

In [2]:
x = np.random.rand(10, 1, 28, 28)
x.shape
x[0].shape
x[0,0] # 또는 x[0][0]


(10, 1, 28, 28)

(1, 28, 28)

array([[0.74889139, 0.27610254, 0.34137729, 0.61994367, 0.91737665,
        0.89751647, 0.8079344 , 0.19678818, 0.98692891, 0.171172  ,
        0.0807035 , 0.3568013 , 0.74575297, 0.90463588, 0.31491693,
        0.04738363, 0.44858268, 0.71847949, 0.39784587, 0.76465392,
        0.07571548, 0.6193311 , 0.43459609, 0.05375284, 0.95126941,
        0.6655919 , 0.68171682, 0.83473206],
       [0.36177897, 0.37282158, 0.09157972, 0.44478709, 0.03796736,
        0.70124527, 0.99660157, 0.6407871 , 0.495408  , 0.76084226,
        0.21273683, 0.24002209, 0.13891527, 0.32957805, 0.02826579,
        0.48590079, 0.42947526, 0.64052464, 0.06660279, 0.07260489,
        0.79982405, 0.86805562, 0.98171806, 0.44989194, 0.70685372,
        0.84967202, 0.41717162, 0.12290211],
       [0.32414234, 0.52357886, 0.64517254, 0.09667518, 0.10166888,
        0.66071083, 0.08356545, 0.95413696, 0.33926739, 0.38648055,
        0.76011033, 0.51766856, 0.48281695, 0.39659225, 0.01956227,
        0.05381265, 0.3362

### 7.4.3 합성곱 계층 구현하기

In [3]:
import sys, os
sys.path.append(os.pardir)
from common.util import im2col

x1 = np.random.rand(1,3,7,7) #(데이터 수, 채널 수, 높이, 너비)
col1 = im2col(x1, 5, 5, stride=1, pad=0)
print(col1.shape)

x2 = np.random.rand(10, 3, 7, 7) # 데이터 10개
col2 = im2col(x2, 5, 5, stride=1, pad=0)
print(col2.shape)

(9, 75)
(90, 75)


In [4]:
class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad
    
    def forward(self, x):
        FN, C, FH, FW = self.W.shape #필터의 개수, 채널 수, 필터의 높이, 필터의 너비
        N, C, H, W = x.shape # 데이터 개수, 채널 수, 데이터 높이, 데이터 너비
        out_h = int(1 + (H + 2*self.pad - FH) / self.stride) # 합성곱 신경망 통과 후 높이
        out_w = int(1 + (W + 2*self.pad - FW) / self.stride) # 합성곱 신경망 통과 후 너비

        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T #필터 전개
        out = np.dot(col, col_W) + self.b
        
        out = out.reshape(N, out_h, out_w, -1).transpose(0,3,1,2) # 인덱스로 축의 순서 변경 p247

        return out


In [21]:
class Pooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad
    
    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h)/ self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)

        # 전개 (1)
        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h*self.pool_w)

        # 최댓값 (2)
        out = np.max(col, axis=1)

        # 성형 (3)
        out = out.reshape(N, out_h, out_w, C).transpose(0,3,1,2)

        return out


In [30]:
from collections import OrderedDict
from common.layers import Relu, Affine, SoftmaxWithLoss, Pooling, Convolution
class SimpleConvNet:
    def __init__(self, input_dim = (1,28,28),
                 conv_param={'filter_num':30, 'filter_size':5,
                            'pad':0, 'stride':1},
                 hidden_size = 100, output_size=10, weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size/2)* (conv_output_size/2))

        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size) #CNN 가중치
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size) # 완전연결계층 가중치
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size) # 완전연결계층 가중치
        self.params['b3'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'],conv_param['pad'])
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
        
        self.last_layer = SoftmaxWithLoss()
    
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    def loss(self, x, t):
        y = self.predict(x)
        return self.last_layer.forward(y, t)
    
    def gradient(self, x, t):
        #순전파
        self.loss(x, t)
        
        #역전파
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 결과 저장
        grads = {}
        grads['W1'] = self.layers['Conv1'].dW
        grads['b1'] = self.layers['Conv1'].db
        grads['W2'] = self.layers['Affine1'].dW
        grads['b2'] = self.layers['Affine1'].db
        grads['W3'] = self.layers['Affine2'].dW
        grads['b3'] = self.layers['Affine2'].db

        return grads
 
    def accuracy(self, x, t): # 새롭게 추가 (from ch4_back_propagation)
        y = self.predict(x)
        y = np.argmax(y, axis = 1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])

        return accuracy






In [37]:
# 데이터 읽기
from dataset.mnist import MnistDataloader
mnist_dataloader = MnistDataloader()
(x_train, t_train), (x_test, t_test) = mnist_dataloader.load_data()

network  = SimpleConvNet()

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.001

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)
print(iter_per_epoch)

x_train = x_train.reshape(-1,1,28,28) # (데이터 개수, 채널 수=1개(흑백), 높이, 너비)
x_test = x_test.reshape(-1,1,28,28) 


for i in range(iters_num):
    if i % 100 == 0:
        print(f"epoch {i}")
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # print(x_batch.shape)
    # print(t_batch.shape)

    # 오차역전파법으로 기울기를 구한다
    grad = network.gradient(x_batch, t_batch)
    # print("grad[W1]: ",np.count_nonzero(grad['W1']))
    # print("grad['b1']: ",grad['b1'])
    # print("grad['W2']: ",grad['W2'])
    # print("grad['b2']: ",grad['b2'])

    # 갱신
    for key in ('W1', 'b1', 'W2','b2', 'W3','b3'):
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    # print("loss: ",loss)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:
        print("loss: ",loss)

        # train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        # train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        # print("train_acc, test_acc: ", train_acc, test_acc)
        print("test_acc: ", test_acc)


600.0
epoch 0
loss:  2.304784320814451
test_acc:  0.0846
epoch 100
epoch 200
epoch 300
epoch 400
epoch 500
epoch 600
loss:  0.11589606454499154
test_acc:  0.94
epoch 700
epoch 800
epoch 900
epoch 1000
epoch 1100
epoch 1200
loss:  0.11527281013289763
test_acc:  0.9584
epoch 1300
epoch 1400
epoch 1500
epoch 1600
epoch 1700
epoch 1800
loss:  0.08976647872153644
test_acc:  0.9668
epoch 1900
epoch 2000
epoch 2100
epoch 2200
epoch 2300
epoch 2400
loss:  0.029601223826169275
test_acc:  0.9717
epoch 2500
epoch 2600
epoch 2700
epoch 2800
epoch 2900
epoch 3000
loss:  0.03241272556792776
test_acc:  0.9771
epoch 3100
epoch 3200
epoch 3300
epoch 3400
epoch 3500
epoch 3600
loss:  0.028027080486256795
test_acc:  0.9773
epoch 3700
epoch 3800
epoch 3900
epoch 4000
epoch 4100
epoch 4200
loss:  0.03084554204923514
test_acc:  0.9803
epoch 4300
epoch 4400
epoch 4500
epoch 4600
epoch 4700
epoch 4800
loss:  0.03527169840896817
test_acc:  0.982
epoch 4900
epoch 5000
epoch 5100
epoch 5200
epoch 5300
epoch 5400

KeyboardInterrupt: 