In [17]:
import keras
import tensorflow

print(f'keras=={keras.__version__}')
print(f'tensorflow=={tensorflow.__version__}')

keras==2.3.1
tensorflow==2.0.0


In [18]:
import sys
mnist_utils_path = '../fashion-mnist/utils/'
mnist_data_path  = '../fashion-mnist/data/fashion/'
sys.path.append(mnist_utils_path)

In [19]:
from bokeh.plotting import output_notebook
from image_utils import show_image_grid

output_notebook()

In [20]:
import numpy as np
from mnist_reader import load_mnist

train, train_labels = load_mnist(mnist_data_path, kind='train')
test, test_labels   = load_mnist(mnist_data_path, kind='t10k')

n_classes = np.unique(train_labels).shape[0]

print(train.shape)
print(train_labels.shape)
print(test.shape)
print(test_labels.shape)
print(n_classes)

(60000, 784)
(60000,)
(10000, 784)
(10000,)
10


In [21]:
image_samples = train[:12].reshape(-1, 28, 28)
label_samples = train_labels[:12]

rows = show_image_grid(image_samples, label_samples, rot90=2)

In [22]:
from keras import backend as K

K.image_data_format()

'channels_last'

In [23]:
if K.image_data_format() == 'channels_first':
    train_X = train.reshape(train.shape[0], 1, 28, 28)
    test_X = test.reshape(test.shape[0], 1, 28, 28)
    input_shape = (1, 28, 28)
else:
    train_X = train.reshape(train.shape[0], 28, 28, 1)
    test_X = test.reshape(test.shape[0], 28, 28, 1)
    input_shape = (28, 28, 1)

print(f'range = ({train_X.min()}, {train_X.max()})')
print(f'dtype = {train_X.dtype}')
print(f'train_X shape = {train_X.shape}')
print(f'test_X  shape = {test_X.shape}')

train_X = train_X.astype(np.float32) / 255
test_X = test_X.astype(np.float32) / 255

print(f'\nrange = ({train_X.min()}, {train_X.max()})')
print(f'dtype = {train_X.dtype}')
print(f'train_X shape = {train_X.shape}')
print(f'test_X  shape = {test_X.shape}')

range = (0, 255)
dtype = uint8
train_X shape = (60000, 28, 28, 1)
test_X  shape = (10000, 28, 28, 1)

range = (0.0, 1.0)
dtype = float32
train_X shape = (60000, 28, 28, 1)
test_X  shape = (10000, 28, 28, 1)


In [24]:
train_y = keras.utils.to_categorical(train_labels, n_classes)
test_y = keras.utils.to_categorical(test_labels, n_classes)

print(f'train_y shape = {train_y.shape}')
print(f'test_y  shape = {test_y.shape}')

train_y shape = (60000, 10)
test_y  shape = (10000, 10)


In [25]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten

cnn = Sequential()
cnn.add(
    Conv2D(
        filters=32,
        kernel_size=(3,3),
        strides=(1,1),
        activation='relu',
        use_bias=True,
        kernel_initializer='glorot_uniform',
        input_shape = input_shape
    ))
cnn.add(MaxPooling2D(pool_size=(2,2)))
cnn.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu'))
cnn.add(MaxPooling2D(pool_size=(2,2)))
cnn.add(Flatten())
cnn.add(Dense(units=128, activation='relu'))
cnn.add(Dense(64, activation='relu'))
cnn.add(Dense(n_classes, activation='softmax'))

cnn.compile(
    loss = keras.losses.categorical_crossentropy,
    optimizer = keras.optimizers.Adam(),
    metrics = ['accuracy']
)

In [26]:
cnn.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 11, 11, 32)        9248      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5, 5, 32)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 800)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               102528    
_________________________________________________________________
dense_2 (Dense)              (None, 64)               

In [27]:
np.set_printoptions(precision=7, suppress=True)

print(f'shape of output = {cnn.predict(test_X[:3]).shape}')
print(cnn.predict(test_X[:3]).T)

shape of output = (3, 10)
[[0.1031251 0.0863935 0.1017048]
 [0.0949227 0.0992664 0.1013836]
 [0.1133611 0.1208772 0.1104651]
 [0.0985312 0.1006509 0.0973843]
 [0.097416  0.1104402 0.115727 ]
 [0.1100124 0.1270575 0.1147457]
 [0.1094344 0.1006733 0.1066365]
 [0.087649  0.0800953 0.0921294]
 [0.0965499 0.0949707 0.0830492]
 [0.0889982 0.0795749 0.0767745]]


In [28]:
from time import time


batch_size = 128
epochs = 20

train_time = time()

history = cnn.fit(
    train_X, train_y,
    batch_size = batch_size,
    epochs = epochs,
    verbose = 2, # 0: silent, 1: progress bar, 2: one line per epoch
    validation_data = (test_X, test_y)
)

train_time = time() - train_time

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
 - 25s - loss: 0.5968 - accuracy: 0.7830 - val_loss: 0.4439 - val_accuracy: 0.8409
Epoch 2/20
 - 23s - loss: 0.3871 - accuracy: 0.8604 - val_loss: 0.3554 - val_accuracy: 0.8737
Epoch 3/20
 - 24s - loss: 0.3320 - accuracy: 0.8798 - val_loss: 0.3358 - val_accuracy: 0.8752
Epoch 4/20
 - 24s - loss: 0.3000 - accuracy: 0.8907 - val_loss: 0.3307 - val_accuracy: 0.8809
Epoch 5/20
 - 24s - loss: 0.2794 - accuracy: 0.8974 - val_loss: 0.3169 - val_accuracy: 0.8863
Epoch 6/20
 - 25s - loss: 0.2592 - accuracy: 0.9046 - val_loss: 0.3026 - val_accuracy: 0.8875
Epoch 7/20
 - 24s - loss: 0.2444 - accuracy: 0.9092 - val_loss: 0.2769 - val_accuracy: 0.9002
Epoch 8/20
 - 29s - loss: 0.2307 - accuracy: 0.9133 - val_loss: 0.2761 - val_accuracy: 0.9001
Epoch 9/20
 - 26s - loss: 0.2179 - accuracy: 0.9180 - val_loss: 0.2808 - val_accuracy: 0.8986
Epoch 10/20
 - 23s - loss: 0.2083 - accuracy: 0.9226 - val_loss: 0.2780 - val_accuracy: 0.8996
Epoch 11/

In [29]:
test_time = time()
score = cnn.evaluate(test_X, test_y)
test_time = time() - test_time



In [30]:
print(f'train time = {train_time:.5} sec')
print(f'test  time = {test_time:.5} sec')
print(f'evaluation score = {score}')

train time = 482.85 sec
test  time = 1.003 sec
evaluation score = [0.2949613790631294, 0.9114999771118164]


In [31]:
print(f'shape of output = {cnn.predict(test_X[:3]).shape}')
print(cnn.predict(test_X[:3]).T)

shape of output = (3, 10)
[[0.        0.        0.       ]
 [0.        0.        1.       ]
 [0.        0.9999948 0.       ]
 [0.        0.        0.       ]
 [0.        0.0000052 0.       ]
 [0.000001  0.        0.       ]
 [0.        0.        0.       ]
 [0.0000092 0.        0.       ]
 [0.        0.        0.       ]
 [0.9999897 0.        0.       ]]


In [34]:
for layer in cnn.layers:
    name = layer.name
    if name[:5] in ['conv2', 'dense']:        
        weights = layer.weights[0].numpy()
        bias = layer.bias.numpy()
        print(f'[{name}] weights: {weights.shape}, bias: {bias.shape}')

[conv2d_1] weights: (3, 3, 1, 32), bias: (32,)
[conv2d_2] weights: (3, 3, 32, 32), bias: (32,)
[dense_1] weights: (800, 128), bias: (128,)
[dense_2] weights: (128, 64), bias: (64,)
[dense_3] weights: (64, 10), bias: (10,)


In [35]:
def get_hidden_vectors(model, inputs, mode=0):
    func = K.function([model.layers[0].input, K.learning_phase()],
                      [layer.output for layer in model.layers])
    # mode 0 : evaluation mode
    # mode 1 : training mode. diff when using dropout
    hiddens = func([inputs, mode])
    names = [layer.name for layer in model.layers]
    return hiddens, names

hiddens, names = get_hidden_vectors(cnn, test_X[:3])

print(f'num layers = {len(hiddens)}\n')
for h, name in zip(hiddens, names):
    print(f'[{name:16}]: {h.shape}')

num layers = 8

[conv2d_1        ]: (3, 26, 26, 32)
[max_pooling2d_1 ]: (3, 13, 13, 32)
[conv2d_2        ]: (3, 11, 11, 32)
[max_pooling2d_2 ]: (3, 5, 5, 32)
[flatten_1       ]: (3, 800)
[dense_1         ]: (3, 128)
[dense_2         ]: (3, 64)
[dense_3         ]: (3, 10)


In [36]:
from bokeh.palettes import Spectral4
from bokeh.plotting import figure, show
from bokeh.models import SingleIntervalTicker

# bokeh >= 1.4.0 부터 legend -> legend_label 로 이름이 바뀌었습니다.
def linechart(x, y, line_color='grey', line_width=3, legend_label=None, p=None, title=None):
    if p is None:
        p = figure(width=800, height=400, title=title)
        p.xaxis.axis_label = 'Epoch'
        p.xaxis.ticker = SingleIntervalTicker(interval=1)
        p.xaxis.major_label_standoff = 1
    p.line(x=x, y=y, line_color=line_color, line_width=line_width,
           legend_label=legend_label, alpha=0.8, line_dash=(4,4))
    return p

In [37]:
x = np.arange(len(history.history['loss']))
p = None
title = 'Training performance of CNN'
for color, (legend, y) in zip(Spectral4, history.history.items()):
    p = linechart(x, y, line_color=color, legend_label=legend, p=p, title=title)
show(p)

In [38]:
ff = Sequential()
ff.add(Dense(200, activation='relu', name='hidden_1', input_shape=(784,)))
ff.add(Dense(50, activation='relu', name='hidden_2'))
ff.add(Dense(n_classes, activation='softmax', name='output'))
ff.compile(
    loss = keras.losses.categorical_crossentropy,
    optimizer = keras.optimizers.Adam(),
    metrics = ['accuracy']
)

history = ff.fit(
    train_X.reshape(train_X.shape[0],-1), train_y,
    batch_size = batch_size,
    epochs = epochs,
    verbose = 2, # 0: silent, 1: progress bar, 2: one line per epoch
    validation_data = (test_X.reshape(test_X.shape[0],-1), test_y)
)

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
 - 2s - loss: 0.5288 - accuracy: 0.8166 - val_loss: 0.4443 - val_accuracy: 0.8431
Epoch 2/20
 - 2s - loss: 0.3829 - accuracy: 0.8624 - val_loss: 0.3844 - val_accuracy: 0.8631
Epoch 3/20
 - 2s - loss: 0.3416 - accuracy: 0.8753 - val_loss: 0.3697 - val_accuracy: 0.8665
Epoch 4/20
 - 2s - loss: 0.3176 - accuracy: 0.8831 - val_loss: 0.3764 - val_accuracy: 0.8632
Epoch 5/20
 - 1s - loss: 0.2984 - accuracy: 0.8896 - val_loss: 0.3423 - val_accuracy: 0.8793
Epoch 6/20
 - 2s - loss: 0.2868 - accuracy: 0.8944 - val_loss: 0.3516 - val_accuracy: 0.8760
Epoch 7/20
 - 1s - loss: 0.2720 - accuracy: 0.8991 - val_loss: 0.3571 - val_accuracy: 0.8690
Epoch 8/20
 - 2s - loss: 0.2599 - accuracy: 0.9040 - val_loss: 0.3310 - val_accuracy: 0.8788
Epoch 9/20
 - 1s - loss: 0.2485 - accuracy: 0.9064 - val_loss: 0.3357 - val_accuracy: 0.8841
Epoch 10/20
 - 2s - loss: 0.2388 - accuracy: 0.9106 - val_loss: 0.3402 - val_accuracy: 0.8821
Epoch 11/20
 - 1s -

In [39]:
x = np.arange(len(history.history['loss']))
p = None
title = 'Training performance of Feed-forward h=(200,50)'
for color, (legend, y) in zip(Spectral4, history.history.items()):
    p = linechart(x, y, line_color=color, legend_label=legend, p=p, title=title)
show(p)

In [40]:
ff.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_1 (Dense)             (None, 200)               157000    
_________________________________________________________________
hidden_2 (Dense)             (None, 50)                10050     
_________________________________________________________________
output (Dense)               (None, 10)                510       
Total params: 167,560
Trainable params: 167,560
Non-trainable params: 0
_________________________________________________________________


In [41]:
for layer in ff.layers:
    name = layer.name
    weights = layer.weights[0].numpy()
    bias = layer.bias.numpy()
    print(f'[{name:9}] weights: {weights.shape}, bias: {bias.shape}')

[hidden_1 ] weights: (784, 200), bias: (200,)
[hidden_2 ] weights: (200, 50), bias: (50,)
[output   ] weights: (50, 10), bias: (10,)


In [46]:
ff.save('keras_ff.h5')

In [47]:
from keras.models import load_model

ff_loaded = load_model('keras_ff.h5')
ff_loaded.evaluate(
    test_X.reshape(test_X.shape[0],-1), test_y,
    verbose = 0
)

[0.34319481167793275, 0.8855000138282776]