# Setup

In [None]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
except Exception:
    pass

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

%load_ext tensorboard

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


# Pretrained Layers Reusage

## Data

In [None]:
def split_dataset(X, y):
    """

    """
    y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts
    y_A = y[~y_5_or_6]
    y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7
    y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?
    return ((X[~y_5_or_6], y_A),
            (X[y_5_or_6], y_B))

In [None]:
X, y = dict(), dict()
X_A, y_A = dict(), dict()
X_B, y_B = dict(), dict()

(X['train'], y['train']), (X['test'], y['test']) = keras.datasets.fashion_mnist.load_data()

X['train'] =  X['train']/255.0
X['test'] =  X['test']/255.0


In [None]:
(X_A['train'], y_A['train']), (X_B['train'], y_B['train']) = split_dataset(X['train'],y['train'])

(X_A['test'], y_A['test']), (X_B['test'], y_B['test']) = split_dataset(X['test'],y['test'])

X_B['train'] = X_B['train'][:200]
y_B['train'] = y_B['train'][:200]

## Model A (for 8 classes)

In [None]:
def create_model_A():
    model_A = keras.Sequential()

    model_A.add(keras.layers.Flatten(input_shape=[28,28]))

    for n_hidden in (300,100,50,50,50):
        model_A.add(keras.layers.Dense(n_hidden, activation='selu', use_bias=False))
        model_A.add(keras.layers.BatchNormalization())
        
    model_A.add(keras.layers.Dense(8, activation = 'softmax'))
    return model_A

model_A = create_model_A()
model_A.compile(loss='sparse_categorical_crossentropy',
                optimizer = keras.optimizers.Adam(learning_rate=1e-2),
                metrics = ['accuracy'])

model_A.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_19 (Flatten)        (None, 784)               0         
                                                                 
 dense_45 (Dense)            (None, 300)               235200    
                                                                 
 batch_normalization_18 (Bat  (None, 300)              1200      
 chNormalization)                                                
                                                                 
 dense_46 (Dense)            (None, 100)               30000     
                                                                 
 batch_normalization_19 (Bat  (None, 100)              400       
 chNormalization)                                                
                                                                 
 dense_47 (Dense)            (None, 50)              

In [None]:
history = model_A.fit(X_A['train'],y_A['train'],
                      validation_split=0.3,
                      epochs = 100,
                      callbacks = [keras.callbacks.EarlyStopping(patience=10)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100


In [None]:
model_A.save("my_model_A.h5")

## Training model for binary classsification (model B)

In [None]:
model_B = create_model_A()

model_B.compile(loss='sparse_categorical_crossentropy',
                optimizer = keras.optimizers.Adam(learning_rate=1e-2),
                metrics = ['accuracy'])

In [None]:
history = model_A.fit(X_A['train'],y_A['train'],
                      validation_split=0.3,
                      epochs = 100,
                      callbacks = [keras.callbacks.EarlyStopping(patience=10)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100


## Reusing A's weigths

In [None]:

transfer_A_model = keras.Sequential(
    keras.models.load_model('my_model_A.h5').layers[:-1]
 ) # all layers excluding output

for layer in transfer_A_model.layers:
    layer.trainable = False

transfer_A_model.add(keras.layers.Dense(1,activation='sigmoid'))

transfer_A_model.compile(loss='binary_crossentropy',
                optimizer = keras.optimizers.Adam(learning_rate=1e-2),
                metrics = ['accuracy'])

transfer_A_model.summary()

Model: "sequential_23"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_19 (Flatten)        (None, 784)               0         
                                                                 
 dense_45 (Dense)            (None, 300)               235200    
                                                                 
 batch_normalization_18 (Bat  (None, 300)              1200      
 chNormalization)                                                
                                                                 
 dense_46 (Dense)            (None, 100)               30000     
                                                                 
 batch_normalization_19 (Bat  (None, 100)              400       
 chNormalization)                                                
                                                                 
 dense_47 (Dense)            (None, 50)              

Note that `transfer_A_model` and `model_A` actually share layers now, so when we train one, it will update both models. If we want to avoid that, we need to build `transfer_A_model` on top of a clone of `model_A`:

```
>> model_A = keras.models.load_model("my_model_A.h5")
>> model_A_clone = keras.models.clone_model(model_A)
>> model_A_clone.set_weights(model_A.get_weights())
```


In [None]:
transfer_A_model.fit(X_B['train'],y_B['train'],
                     validation_split = 0.3,
                     epochs=100,
                     callbacks=[keras.callbacks.EarlyStopping(patience=10)])