## Work
1. 請自行定義一個 loss function, 為 0.3 * focal loss + 0.7 cross-entropy，訓練並比較結果


In [1]:
import os
import keras
from keras import backend as k
from keras.datasets import cifar10
from keras.models import Model
from keras.layers import Input, Dense, BatchNormalization, Activation
from keras.optimizers import SGD
from keras.utils import to_categorical

# Disable GPU
os.environ["CUDA_VISIBLE_DEVICES"] = ""

Using TensorFlow backend.


In [2]:
train, test = cifar10.load_data()

In [3]:
## 資料前處理
def preproc_x(x, flatten=True):
    x = x / 255.
    if flatten:
        x = x.reshape((len(x), -1))
    return x

def preproc_y(y, num_classes=10):
    if y.shape[-1] == 1:
        y = keras.utils.to_categorical(y, num_classes)
    return y    

In [4]:
x_train, y_train = train
x_test, y_test = test

# Preproc the inputs
x_train = preproc_x(x_train)
x_test = preproc_x(x_test)

# Preprc the outputs
y_train = preproc_y(y_train)
y_test = preproc_y(y_test)

In [5]:
def build_mlp(input_shape, output_units=10, num_neurons=[512, 256, 128]):
    input_layer = Input(input_shape)
    for ind, num in enumerate(num_neurons):
        if ind == 0:
            dense_layer = Dense(num)(input_layer)
            bn_layer = BatchNormalization()(dense_layer)
            act_layer = Activation(activation='relu')(bn_layer)
        else:
            dense_layer = Dense(num)(act_layer)
            bn_layer = BatchNormalization()(dense_layer)
            act_layer = Activation(activation='relu')(bn_layer)
    output = Dense(num, activation='softmax')(act_layer)
    model = Model(inputs=[input_layer], outputs=[output])
    return model

In [6]:
## 超參數設定
LEARNING_RATE = 1e-3
EPOCHS = 25
BATCH_SIZE = 1024
MOMENTUM = 0.95
INPUT_SHAPE = x_train.shape[1:]
OUTPUT_SHAPE = 10
GAMMA = 2
ALPHA = 0.25

sgd = SGD(lr=LEARNING_RATE, momentum=MOMENTUM)

In [24]:
import tensorflow as tf
import keras.backend as K

# Focal_loss = -alpha * (1 - p_t)^{gamma} * log(p_t)
    
def combined_loss(gamma, alpha, ce_weight):
    gamma = float(gamma)
    alpha = float(alpha)
    fl_weight = 1 - ce_weight
        
    def custom_loss(y_true, y_pred):
        """Focal loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        """
        epsilon = 1e-8
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)

        model_out = tf.add(y_pred, epsilon)
        ce = tf.multiply(y_true, - tf.log(model_out))
        weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
        fl = tf.multiply(alpha, tf.multiply(weight, ce))
        reduced_fl = tf.reduce_max(fl, axis=1)
        reduce_fl = tf.reduce_mean(reduced_fl)
        
        ce = tf.multiply(ce, ce_weight)
        fl = tf.multiply(reduce_fl, fl_weight)
        total_loss = tf.add(ce, fl)
        return total_loss
    return custom_loss

ce_weights_list = [0., 0.3, 0.5, 0.7, 1]

In [26]:
import itertools
results = {}

for i, ce_w in enumerate(ce_weights_list):
    print("Numbers of exp: %i, ce_weight: %.2f" % (i, ce_w))
    model = build_mlp(input_shape=INPUT_SHAPE)
    loss_fun = combined_loss(alpha=ALPHA, gamma=GAMMA, ce_weight=ce_w)
    model.compile(optimizer=sgd, loss=loss_fun, metrics=['accuracy'])

    model.fit(x_train, y_train, 
              epochs=EPOCHS, 
              batch_size=BATCH_SIZE, 
              validation_data=(x_test, y_test), 
              shuffle=True
             )
    
    # Collect results
    exp_name_tag = ("exp-%s" % (i))
    results[exp_name_tag] = {'train-loss': model.history.history["loss"],
                             'valid-loss': model.history.history["val_loss"],
                             'train-acc': model.history.history["acc"],
                             'valid-acc': model.history.history["val_acc"]}

In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as mplcm
import matplotlib.colors as colors
%matplotlib inline
NUM_COLORS = len(results.keys())

cm = plt.get_cmap('gist_rainbow')
cNorm  = colors.Normalize(vmin=0, vmax=NUM_COLORS-1)
scalarMap = mplcm.ScalarMappable(norm=cNorm, cmap=cm)
color_bar = [scalarMap.to_rgba(i) for i in range(NUM_COLORS)]

plt.figure(figsize=(8,6))
for i, cond in enumerate(results.keys()):
    plt.plot(range(len(results[cond]['train-loss'])),results[cond]['train-loss'], '-', label=cond, color=color_bar[i])
    plt.plot(range(len(results[cond]['valid-loss'])),results[cond]['valid-loss'], '--', label=cond, color=color_bar[i])
plt.title("Loss")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()

plt.figure(figsize=(8,6))
for i, cond in enumerate(results.keys()):
    plt.plot(range(len(results[cond]['train-acc'])),results[cond]['train-acc'], '-', label=cond, color=color_bar[i])
    plt.plot(range(len(results[cond]['valid-acc'])),results[cond]['valid-acc'], '--', label=cond, color=color_bar[i])
plt.title("Accuracy")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()