In [None]:
from tensorflow import keras

### Implementing transfer learning

In [None]:
# Getting model to use

model_A = keras.models.load_model("my_model_A.h5")

## Creating a new model based on A

model_B = keras.models.Sequential(model_A.layers[:-1])
model_B.add(keras.layers.Dense(1, activation = "sigmoid"))
##The above code makes model A and B have the same weights 



In [None]:
### To avoid the above scenerio

model_A_clone = keras.models.clone_model(model_A)
model_A_clone.set_weights(model_A.get_weights())

In [None]:
### making reused layers untrainable

for layer in model_A_clone.layers[:-1]:
    layer.trainable = False
    
    
    
model_A_clone.compile(loss = "binary_crossentropy", optimizer = "sgd", metrics = ["accuracy"])

In [None]:
### Training the model

history = model_A_clone.fit(X_train_b, y_train_B, epochs = 4 , validation_data = [X_valid_B, y_valid_B])
optimizer = keras.optimizers.SGD(lr = 1e-4)

### Faster optimizers
#### momentum optimizer

In [1]:
optimizer = keras.optimizers.SGD(lr = 0.0001, momentum = 0.9)

NameError: name 'keras' is not defined

### Nesterov 

update of momentum reduces the oscillation

In [None]:
optimizer = keras.optimizers.SGD(lr = 0.001, momentum = 0.9, nesterov = True)

### Ada Grad
- achieves faster converges as it directs gradient vector along the steepest slope


### RMSProp

- Ada grad runs the risk of slowing down a bit too fast and never converging to global minimum

RMS prop fixes this

In [None]:
optimizer = keras.optimizers.RMSprop(lr = 0.0001, rho = 0.9)

#rho = decay rate

### Adam and Nadam

Adam - adaptive moment estimation (combines both rMsprop and momentum optimization)

- Typically the best



NaDam is adam with nestrov trick sometimes converges earlier



In [None]:
optimizer = keras.optimizers.Adam(lr = 0.0001, beta_1 = 0.9, beta_2 = 0.999)

### learning rate scheduling

#### Power scheduling

In [5]:
optimizer = keras.optimizers.Adam(lr = 0.01, decay = k1e-4)

#### Exponential scheduling

In [6]:
def exponential_decay(lr0,s):
    def exponential_decay_fn(epoch):
        return 0.01 * 0.1 **(epoch/s)
    return exponential_decay_fn

In [7]:
exponential_decay(10,4)

<function __main__.exponential_decay.<locals>.exponential_decay_fn(epoch)>

In [None]:
exponential_decay_fn = exponential_decay(lr0 = 0.01, s= 20)

### Create a learning rate scheduler

In [None]:
lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)

history = model.fit(X_train, y_train , callbacks = [lr_scheduler])

In [None]:
### using schedules class

### Performance Scheduling 

In [None]:
lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor = 0.5, patience = 5)

In [None]:
s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size =
32)
learning_rate = keras.optimizers.schedules.ExponentialDecay(0.01, s, 0.1)

In [None]:
optimizer = keras.optimizers.SGD(learning_rate)

### Regularization 

##### Lasso and ridge regularization

In [None]:
layer = keras.layers.Dense(100, activation = "elu", kernel_initializer = "he_normal",
                          kernel_regularizers = keras.regularizers.l2(0.01))

In [None]:
from functools import partial
RegularizedDense = partial(keras.layers.Dense,
                            activation="elu",
                            kernel_initializer="he_normal",
                            kernel_regularizer=keras.regularizers.l2(0.01))
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    RegularizedDense(300),
    RegularizedDense(100),
    RegularizedDense(10, activation="softmax",
    kernel_initializer="glorot_uniform")
    ])

### using dropout as a regularization technique

In [None]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape = [28,28]),
    keras.layers.Dropout(rate = 0.2),
    keras.layers.Dense(300, activation = "elu", kernel_initializer = 'he_normal'),
    keras.layers.Dropout(rate = 0.2),
    keras.layers.Dense(300, activation = "elu", kernel_initializer = 'he_normal'),
    keras.layers.Dropout(rate = 0.2),
    keras.layers.Dense(10, activation ="softmax")

])