In [None]:
from keras.datasets import mnist
from keras.utils    import np_utils, to_categorical
from keras.models   import Sequential
from keras.layers   import Dense, Activation

NUM_EPOCH      = 2
NUM_BATCH_SIZE = 32

#----------------------------------------------------
# MNISTデータを返す
#----------------------------------------------------
def get_mnist():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    # 画像を1次元配列化
    x_train = x_train.reshape(60000, 784)
    x_test  = x_test.reshape(10000, 784)

    # 0.0-1.0の範囲に変換
    x_train = x_train.astype('float32')
    x_test  = x_test.astype('float32')
    x_train /= 255
    x_test  /= 255

    # one-hot-encoding
    y_train = np_utils.to_categorical(y_train, 10)
    y_test  = np_utils.to_categorical(y_test, 10)
    return (x_train, y_train), (x_test, y_test)

#----------------------------------------------------
# 教師モデルを返す(複雑なモデル)
#----------------------------------------------------
def get_model_teacher():
    model = Sequential()
    model.add(Dense(50, input_dim=784))
    model.add(Activation('sigmoid'))
    model.add(Dense(20))
    model.add(Activation('sigmoid'))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(optimizer='sgd',
              loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

#----------------------------------------------------
# 温度付きソフトマックス層を追加する
#----------------------------------------------------
def get_softmax_temperature(model, temperature):
    # softmax層を削除する
    model.layers.pop()
    
    # softed probabilities
    logits   = model.layers[-1].output
    logits_T = Lambda(lambda x: x/temperature)(logits)
    prob_T   = Activation('softmax')(logits_T)
    
    model = Model(model.input, prob_T)
    
    return model

#----------------------------------------------------
# 生徒モデルを返す（簡素化モデル)
#----------------------------------------------------
def get_model_student():
    model = Sequential()
    model.add(Dense(10, input_dim=784))
    model.add(Activation('softmax'))
    model.compile(optimizer='Adam', 
            loss='mean_squared_error', metrics=['accuracy'])
    return model


#----------------------------------------------------
# 補助関数
#----------------------------------------------------
def negative_activation(x):
    return -x



#----------------------------------------------------
# メイン関数
#----------------------------------------------------
def main():
    
    # MNISTデータ取得
    #--------------------------------------
    (x_train, y_train), (x_test, y_test) = get_mnist()


    # 教師モデルを訓練
    #--------------------------------------
    teacher = get_model_teacher()

    # ハードターゲットで訓練
    history = teacher.fit(x_train, y_train, 
                          batch_size=NUM_BATCH_SIZE, 
                          epochs=NUM_EPOCH, 
                          verbose=1, 
                          validation_data=(x_test, y_test))
    
    score = teacher.evaluate(x_test, y_test, verbose=0)
    print(score)
    

    # 生徒モデルを訓練
    #--------------------------------------
    
    # 教師モデルのソフトマックス層を温度付きに変更
    teacher = get_softmax_temperature(teacher, 10)
    
    # 生徒モデル
    student = get_model_student()

    # 教師モデルの出力層と生徒モデルの出力層の差分レイヤを作成
    negativeRight = Activation(negative_activation)(student.output) 
    diff = Add()([teacher.output, negativeRight])
    
    # 教師モデルの入力層、生徒モデルの入力層 → 差分レイヤ出力層となるモデルを作成
    model = Model(inputs=[teacher.input, student.input], outputs=[diff])
    model.compile(loss='mean_squared_error', optimizer='Adam', metrics=['acc'])
    #model.summary()

    # 生徒モデルの出力の期待値は教師モデルの出力(ソフトターゲット)のため
    # 差分を0に近づけるように訓練
    y_train = np.zeros((60000, 10))

    for i in range(len(teacher.layers)):
        setattr(teacher.layers[i], 'trainable', False)

    model.fit([x_train, x_train], [y_train], 
              batch_size=NUM_BATCH_SIZE,
              epochs=NUM_EPOCH,
              verbose=1)

    score = student.evaluate(x_test, y_test, verbose=0)
    print(score)

if __name__ == '__main__':
    main()