In [1]:
%config Completer.use_jedi = False

In [2]:
from tensorflow import keras as keras
import seaborn as sns
import numpy as np

In [3]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [4]:
def split_dataset(X,y):
    sandal_indices = (y==5) | (y==6)
    X_train_A = X[~sandal_indices]
    X_train_B = X[sandal_indices]
    y_train_A = y[~sandal_indices]
    y_train_A[y_train_A>6] -= 2
    y_train_B = (y[sandal_indices]==6).astype(np.float32)
    return (X_train_A,X_train_B,y_train_A,y_train_B)
X_train_A,X_train_B,y_train_A,y_train_B = split_dataset(X_train,y_train)
X_valid_A,X_valid_B,y_valid_A,y_valid_B = split_dataset(X_valid,y_valid)
X_test_A,X_test_B,y_test_A,y_test_B = split_dataset(X_test,y_test)
X_train_B  =X_train_B[:200]
y_train_B  =y_train_B[:200]

In [5]:
model_A = keras.Sequential()
model_A.add(keras.layers.Flatten(input_shape=(28,28)))
for shape in (300,100,50,50,50):
    model_A.add(keras.layers.Dense(shape, activation='selu'))
model_A.add(keras.layers.Dense(8, activation='softmax'))
model_A.compile(loss="sparse_categorical_crossentropy", optimizer = keras.optimizers.SGD(lr=1e-3), metrics=['accuracy'])
model_A.summary()

history = model_A.fit(X_train_A, y_train_A, epochs=20,
                    validation_data=(X_valid_A, y_valid_A))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 300)               235500    
_________________________________________________________________
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_2 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_3 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_4 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_5 (Dense)              (None, 8)                 4

### Transfer Learning with Keras

In [6]:

model_A.save("my_model.h5")

In [7]:
model_B = keras.Sequential()
model_B.add(keras.layers.Flatten(input_shape=(28,28)))
for shape in (300,100,50,50,50):
    model_B.add(keras.layers.Dense(shape, activation='selu'))
model_B.add(keras.layers.Dense(1, activation='sigmoid'))
model_B.compile(loss="binary_crossentropy", optimizer = keras.optimizers.SGD(lr=1e-3), metrics=['accuracy'])
model_B.summary()

history = model_B.fit(X_train_B, y_train_B, epochs=20,
                    validation_data=(X_valid_B, y_valid_B))

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 300)               235500    
_________________________________________________________________
dense_7 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_8 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_9 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_10 (Dense)             (None, 50)                2550      
_________________________________________________________________
dense_11 (Dense)             (None, 1)                

In [8]:
model_A = keras.models.load_model("my_model.h5")
model_B_on_A = keras.models.Sequential(model_A.layers[:-1])
model_B_on_A.add(keras.layers.Dense(1,activation='sigmoid'))

In [9]:
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = False
model_B_on_A.compile(loss="binary_crossentropy", optimizer="sgd",
metrics=["accuracy"])
history = model_B_on_A.fit(X_train_B, y_train_B, epochs=20,
                    validation_data=(X_valid_B, y_valid_B))
    
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = True
model_B_on_A.compile(loss="binary_crossentropy", optimizer="sgd",
metrics=["accuracy"])
history = model_B_on_A.fit(X_train_B, y_train_B, epochs=20,
                    validation_data=(X_valid_B, y_valid_B))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [10]:
model_B_on_A.evaluate(X_test_B,y_test_B)



[0.010568957775831223, 0.9980000257492065]

In [11]:
def exponential_decay(lr,s):
    def exponential_decay_fn(epoch):
        return 0.01 * 0.1**(epoch/20)
    return exponential_decay_fn
exponential_decay_fn = exponential_decay(0.01, 20)


In [None]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.Dense(300, activation='selu'),
    keras.layers.Dense(100, activation='selu'),
    keras.layers.Dense(10, activation='softmax')
])
lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)
model.compile(loss="sparse_categorical_crossentropy",optimizer="nadam", metrics=["accuracy"])
history = model.fit(X_train,y_train,epochs = 25, validation_data = (X_valid,y_valid), callbacks = [lr_scheduler])

In [13]:
def piecewise_constant_fn(epoch):
    if epoch < 5:
        return 0.01
    elif epoch < 15:
        return 0.005
    else:
        return 0.001

In [14]:
lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)

In [15]:
layer = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal", kernel_regularizer = keras.regularizers.l2(0.01))

In [16]:
from functools import partial

RegularizedDense = partial(
    keras.layers.Dense,
    activation="elu",
    kernel_initializer="he_normal",   
    kernel_regularizer = keras.regularizers.l2(0.01)
)

In [24]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    RegularizedDense(300),
    RegularizedDense(100),
    keras.layers.Dense(10, activation="softmax"),
])
model.compile(loss="sparse_categorical_crossentropy",optimizer="nadam", metrics=["accuracy"])
history = model.fit(X_train,y_train,epochs = 25, validation_data = (X_valid,y_valid), callbacks = [lr_scheduler])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [25]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape = [28,28]),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(300, activation='elu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(100, activation='elu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(10, activation='softmax'),
])
model.compile(optimizer= "nadam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
history = model.fit(X_train,y_train, epochs=25, validation_data=(X_valid,y_valid),callbacks=[lr_scheduler])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


### Exercises

In [50]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [44]:
X_train.shape

(50000, 32, 32, 3)

In [58]:
model = keras.Sequential()
model.add(keras.layers.Flatten(input_shape=[32,32,3]))
for i in range(20):
    model.add(keras.layers.Dense(100,kernel_initializer='he_normal'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation("elu"))
model.add(keras.layers.Dense(10,activation="softmax"))
model.compile(optimizer="nadam", loss= "sparse_categorical_crossentropy", metrics =["accuracy"])
history = model.fit(X_train,y_train,epochs = 20, validation_data = (X_valid,y_valid), callbacks = [keras.callbacks.EarlyStopping(patience=10)])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
