## Work
1. 請比較 SGD optimizer 不同的 momentum 及使用 nesterov 與否的表現

In [None]:
import os
import keras

# 本作業可以不需使用 GPU, 將 GPU 設定為 "無" (若想使用可自行開啟)
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [None]:
train, test = keras.datasets.cifar10.load_data()

In [None]:
## 資料前處理
def preproc_x(x, flatten=True):
    x = x / 255.
    if flatten:
        x = x.reshape((len(x), -1))
    return x

def preproc_y(y, num_classes=10):
    if y.shape[-1] == 1:
        y = keras.utils.to_categorical(y, num_classes)
    return y    

In [None]:
x_train, y_train = train
x_test, y_test = test

# 資料前處理 - X 標準化
x_train = preproc_x(x_train)
x_test = preproc_x(x_test)

# 資料前處理 -Y 轉成 onehot
y_train = preproc_y(y_train)
y_test = preproc_y(y_test)

In [None]:
def build_mlp(input_shape, output_units = 10, num_neurons=[512, 256, 128]):
    input_layer = keras.layers.Input(input_shape)
    for i, n_units in enumerate(num_neurons):
      if i == 0:
        x = keras.layers.Dense(units=n_units, activation='relu', name='hidden_layer'+str(i+1))(input_layer)
      else:
        x = keras.layers.Dense(units=n_units, activation='relu', name='hidden_layer'+str(i+1))(x)
    out = keras.layers.Dense(units=output_units, activation='softmax', name='output')(x)
    model = keras.models.Model(inputs =[input_layer], outputs =[out])
    return model

In [None]:
"""Code Here
設定超參數
"""
EPOCHS = 50
LEARNING_RATE = 0.01
BATCH_SIZE = 256
MOMENTUM = [0, 0.5, 0.95]
NESTEROV = [True, False]

In [None]:
results = {}
"""Code Here
撰寫你的訓練流程並將結果用 dictionary 紀錄
"""

for moment in MOMENTUM:
    keras.backend.clear_session()
    print('experiment with MOMENTUM= %.6f, NESTETOV= %s' % (moment,nestv))
    model = build_mlp(input_shape=x_train.shape[1:])
    model.summary()
    optimizer = keras.optimizers.SGD(learning_rate=LEARNING_RATE, momentum=moment, nesterov=False)
    model.compile(loss= 'categorical_crossentropy', metrics= ['accuracy'], optimizer=optimizer)
    model.fit(x_train, y_train, 
              epochs = EPOCHS, batch_size = BATCH_SIZE, 
              validation_data = (x_test, y_test), shuffle = True)
    #collect result
    train_loss = model.history.history['loss']
    valid_loss = model.history.history['val_loss']
    train_acc = model.history.history['accuracy']
    valid_acc = model.history.history['val_accuracy']

    exp_name_tag = 'exp-momentum-%s' %str(moment)
    results[exp_name_tag] = {'train-loss':train_loss,
                            'valid-loss':valid_loss,
                              'train-accu':train_acc,
                              'valid-accu':valid_acc}

In [None]:
results1 = {}
for moment in MOMENTUM:
    keras.backend.clear_session()
    print('experiment with MOMENTUM= %.6f, NESTETOV= %s' % (moment,nestv))
    model = build_mlp(input_shape=x_train.shape[1:])
    model.summary()
    optimizer = keras.optimizers.SGD(learning_rate=LEARNING_RATE, momentum=moment, nesterov=True)
    model.compile(loss= 'categorical_crossentropy', metrics= ['accuracy'], optimizer=optimizer)
    model.fit(x_train, y_train, 
              epochs = EPOCHS, batch_size = BATCH_SIZE, 
              validation_data = (x_test, y_test), shuffle = True)
    #collect result
    train_loss = model.history.history['loss']
    valid_loss = model.history.history['val_loss']
    train_acc = model.history.history['accuracy']
    valid_acc = model.history.history['val_accuracy']

    exp_name_tag = 'exp-momentum-%s' %str(moment)
    results1[exp_name_tag] = {'train-loss':train_loss,
                            'valid-loss':valid_loss,
                              'train-accu':train_acc,
                              'valid-accu':valid_acc}

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
"""Code Here
將結果繪出
"""
color_bar = ['r','y', 'g', 'b', 'm', 'k']
plt.figure(figsize=(8,6))
for i, cond in enumerate(results.keys()):
    plt.plot(range(len(results[cond]['train-loss'])), results[cond]['train-loss'], '-', label = cond, color = color_bar[i])
    plt.plot(range(len(results[cond]['valid-loss'])), results[cond]['valid-loss'], '--', label = cond, color = color_bar[i])
plt.title('Loss with Nesterov = False')
plt.legend()
plt.show()

plt.figure(figsize=(8,6))
for i, cond in enumerate(results.keys()):
    plt.plot(range(len(results[cond]['train-accu'])), results[cond]['train-accu'], '-', label = cond, color = color_bar[i])
    plt.plot(range(len(results[cond]['valid-accu'])), results[cond]['valid-accu'], '--', label = cond, color = color_bar[i])
plt.title('Accuracy with Nesterov = False')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
"""Code Here
將結果繪出
"""
color_bar = ['r','y', 'g', 'b', 'm', 'k']
plt.figure(figsize=(8,6))
for i, cond in enumerate(results.keys()):
    plt.plot(range(len(results1[cond]['train-loss'])), results1[cond]['train-loss'], '-', label = cond, color = color_bar[i])
    plt.plot(range(len(results1[cond]['valid-loss'])), results1[cond]['valid-loss'], '--', label = cond, color = color_bar[i])
plt.title('Loss with Nesterov = True')
plt.legend()
plt.show()

plt.figure(figsize=(8,6))
for i, cond in enumerate(results.keys()):
    plt.plot(range(len(results1[cond]['train-accu'])), results1[cond]['train-accu'], '-', label = cond, color = color_bar[i])
    plt.plot(range(len(results1[cond]['valid-accu'])), results1[cond]['valid-accu'], '--', label = cond, color = color_bar[i])
plt.title('Accuracy with Nesterov = True')
plt.legend()
plt.show()