In [1]:
%load_ext autoreload
%autoreload 2

In [7]:
import numpy as np
import sys
sys.path.append('utils/')

import keras
from keras import optimizers
from image_preprocessing_ver2 import ImageDataGenerator
from keras.applications.mobilenet import preprocess_input
from keras.models import Model
from keras.layers import Lambda, concatenate, Activation
from keras.losses import categorical_crossentropy as logloss
from keras.metrics import categorical_accuracy, top_k_categorical_accuracy
from keras import backend as K

from mobilenet import get_mobilenet

In [3]:
data_dir = '/home/ubuntu/data/'

In [4]:
train_logits = np.load(data_dir + 'train_logits.npy')[()]
val_logits = np.load(data_dir + 'val_logits.npy')[()]

In [5]:
data_generator = ImageDataGenerator(
    data_format='channels_last',
    preprocessing_function=preprocess_input
)

train_generator = data_generator.flow_from_directory(
    data_dir + 'train', train_logits,
    target_size=(224, 224),
    batch_size=64
)

val_generator = data_generator.flow_from_directory(
    data_dir + 'val', val_logits,
    target_size=(224, 224),
    batch_size=64
)

Found 25600 images belonging to 256 classes.
Found 5120 images belonging to 256 classes.


In [6]:
temperature = 10

In [17]:
model = get_mobilenet()
model.layers.pop()

for layer in model.layers[:-1]:
    layer.trainable = False
    
model.layers[-1].kernel_regularizer = keras.regularizers.l2(1e-3)

logits = model.layers[-1].output
probabilities = Activation('softmax')(logits)

logits_T = Lambda(lambda x: x/temperature)(logits)
probabilities_T = Activation('softmax')(logits_T)

output = concatenate([probabilities, probabilities_T])
model = Model(model.input, output)

In [9]:
lambda_const = 0.1

In [18]:
def knowledge_distillation_loss(y_true, y_pred):    
    y_true, y_soft = y_true[:, :256], y_true[:, 256:]
    y_soft = K.softmax(y_soft/temperature)
    y_pred, y_pred_soft = y_pred[:, :256], y_pred[:, 256:]    
    return lambda_const*logloss(y_true, y_pred) + logloss(y_soft, y_pred_soft)

In [20]:
def accuracy(y_true, y_pred):
    y_true = y_true[:, :256]
    y_pred = y_pred[:, :256]
    return categorical_accuracy(y_true, y_pred)

In [21]:
model.compile(
    optimizer=optimizers.Adam(lr=1e-3), 
    loss=knowledge_distillation_loss, metrics=[accuracy]
)

In [22]:
model.fit_generator(
    train_generator, 
    steps_per_epoch=150, epochs=10, 
    validation_data=val_generator, validation_steps=16, 
    max_queue_size=10, workers=1, use_multiprocessing=False
)

Epoch 1/10
Epoch 2/10
Epoch 3/10

KeyboardInterrupt: 