In [31]:
import tensorflow as tf
import numpy as np

In [32]:
(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.fashion_mnist.load_data()

In [33]:
xtrain, xtest = xtrain/255.0, xtest/255.0
xval, yval = xtrain[50000:], ytrain[50000:]
xtrain, ytrain = xtrain[:50000], ytrain[:50000]

## 1. Testing different activations
### 1.1 Relu

In [34]:
tf.random.set_seed(42)
np.random.seed(42)

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = [28, 28]),
    tf.keras.layers.Dense(300, activation='relu',kernel_initializer='he_normal'),
    tf.keras.layers.Dense(100, activation='relu',kernel_initializer='he_normal'),
    tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'], optimizer = 'SGD')
history = model.fit(xtrain, ytrain, validation_data = (xval, yval), epochs = 10, batch_size = 256)
relu_acc, relu_valacc = history.history['accuracy'], history.history['val_accuracy']
relu_loss, relu_valloss = history.history['loss'], history.history['val_loss']

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### 1.2 LeakyRELU

In [35]:
tf.random.set_seed(42)
np.random.seed(42)

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = [28, 28]),
    tf.keras.layers.Dense(300, kernel_initializer='he_normal'),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(100, kernel_initializer='he_normal'),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'], optimizer = 'SGD')
history = model.fit(xtrain, ytrain, validation_data = (xval, yval), epochs = 10, batch_size = 256)
leaky_relu_acc, leaky_relu_valacc = history.history['accuracy'], history.history['val_accuracy']
leaky_relu_loss, leaky_relu_valloss = history.history['loss'], history.history['val_loss']

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### 1.3 Parametric ReLU

In [15]:
tf.random.set_seed(42)
np.random.seed(42)

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = [28, 28]),
    tf.keras.layers.Dense(300, kernel_initializer='he_normal'),
    tf.keras.layers.PReLU(),
    tf.keras.layers.Dense(100, kernel_initializer='he_normal'),
    tf.keras.layers.PReLU(),
    tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'], optimizer = 'SGD')
history = model.fit(xtrain, ytrain, validation_data = (xval, yval), epochs = 10, batch_size = 256)
prelu_acc, prelu_valacc = history.history['accuracy'], history.history['val_accuracy']
prelu_loss, prelu_valloss = history.history['loss'], history.history['val_loss']

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### 1.4 ELU

In [16]:
tf.random.set_seed(42)
np.random.seed(42)

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = [28, 28]),
    tf.keras.layers.Dense(300, activation = 'elu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(100, activation = 'elu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'], optimizer = 'SGD')
history = model.fit(xtrain, ytrain, validation_data = (xval, yval), epochs = 10, batch_size = 256)
elu_acc, elu_valacc = history.history['accuracy'], history.history['val_accuracy']
elu_loss, elu_valloss = history.history['loss'], history.history['val_loss']

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### 1.5 SELU

In [17]:
tf.random.set_seed(42)
np.random.seed(42)

pixel_means = xtrain.mean(axis=0, keepdims=True)
pixel_stds = xtrain.std(axis=0, keepdims=True)
xtrain_scaled = (xtrain - pixel_means) / pixel_stds
xval_scaled = (xval - pixel_means) / pixel_stds
xtest_scaled = (xtest - pixel_means) / pixel_stds

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = [28, 28]),
    tf.keras.layers.Dense(300, activation = 'selu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(100, activation = 'selu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'], optimizer = 'SGD')
history = model.fit(xtrain_scaled, ytrain, validation_data = (xval_scaled, yval), epochs = 10, batch_size = 256)
selu_acc, selu_valacc = history.history['accuracy'], history.history['val_accuracy']
selu_loss, selu_valloss = history.history['loss'], history.history['val_loss']

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
import seaborn as sns
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(20, 4))
plt.subplot(121)
plt.title("Accuracy")
sns.lineplot(np.arange(10), relu_acc, label='RELU')
sns.lineplot(np.arange(10), leaky_relu_acc, label='Leaky_RELU')
sns.lineplot(np.arange(10), prelu_acc, label='PRELU')
sns.lineplot(np.arange(10), elu_acc, label='ELU')
sns.lineplot(np.arange(10), selu_acc, label='SELU')

plt.subplot(122)
plt.title("Validation Accuracy")
sns.lineplot(np.arange(10), relu_valacc, label='RELU')
sns.lineplot(np.arange(10), leaky_relu_valacc, label='Leaky_RELU')
sns.lineplot(np.arange(10), prelu_valacc, label='PRELU')
sns.lineplot(np.arange(10), elu_valacc, label='ELU')
sns.lineplot(np.arange(10), selu_valacc, label='SELU')


ModuleNotFoundError: No module named 'seaborn'

## Observations

* Despite running for short epochs, SELU clearly outperforming other variants
* As the usual trend goes, we see SELU > ELU > PRELU, LeakyRelu > RELU

## 2. Batch Normalization

In [19]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = [28, 28]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(300, activation = 'relu', kernel_initializer='he_normal'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(100, activation = 'relu', kernel_initializer='he_normal'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation = 'softmax')    
])
model.compile(loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'], optimizer = 'SGD')
history = model.fit(xtrain, ytrain, validation_data = (xval, yval), epochs = 10, batch_size = 256)
bn_acc, bn_valacc = history.history['accuracy'], history.history['val_accuracy']
bn_loss, bn_valloss = history.history['loss'], history.history['val_loss']

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [20]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_10 (Flatten)         (None, 784)               0         
_________________________________________________________________
batch_normalization (BatchNo (None, 784)               3136      
_________________________________________________________________
dense_30 (Dense)             (None, 300)               235500    
_________________________________________________________________
batch_normalization_1 (Batch (None, 300)               1200      
_________________________________________________________________
dense_31 (Dense)             (None, 100)               30100     
_________________________________________________________________
batch_normalization_2 (Batch (None, 100)               400       
_________________________________________________________________
dense_32 (Dense)             (None, 10)              

* The number of parameters in batch norm are 4*number of features. 4 corresponding to $\gamma, \beta$ (the scaling and shifting of normalized inputs), $\mu, \sigma$ (moving avg of mean, std dev of training data)

In [21]:
fig = plt.figure(figsize=(20, 4))
plt.subplot(121)
plt.title("Accuracy")
sns.lineplot(np.arange(10), relu_acc, label='RELU')
sns.lineplot(np.arange(10), selu_acc, label='SELU')
sns.lineplot(np.arange(10), bn_acc, label='Batch_Norm_RELU')

plt.subplot(122)
plt.title("Validation Accuracy")
sns.lineplot(np.arange(10), relu_valacc, label='RELU')
sns.lineplot(np.arange(10), selu_valacc, label='SELU')
sns.lineplot(np.arange(10), bn_valacc, label='Batch_Norm_RELU')

NameError: name 'plt' is not defined

### Observations

* SELU and BatchNorm are almost similar performance. They indeed do similar transformation

* If you use SELU it does take care of normalizations and separate BatchNorm layer is not required

In [22]:
### Batch Norm without Bias

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = [28, 28]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(300, use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Dense(100, use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Dense(10, activation="softmax")
])
model.compile(loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'], optimizer = 'SGD')
history = model.fit(xtrain, ytrain, validation_data = (xval, yval), epochs = 10, batch_size = 256)
bn1_acc, bn1_valacc = history.history['accuracy'], history.history['val_accuracy']
bn1_loss, bn1_valloss = history.history['loss'], history.history['val_loss']

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [23]:
fig = plt.figure(figsize=(20, 4))
plt.subplot(121)
plt.title("Accuracy")
sns.lineplot(np.arange(10), relu_acc, label='RELU')
sns.lineplot(np.arange(10), selu_acc, label='SELU')
sns.lineplot(np.arange(10), bn_acc, label='Batch_Norm_RELU')
sns.lineplot(np.arange(10), bn1_acc, label='Batch_Norm1_RELU')

plt.subplot(122)
plt.title("Validation Accuracy")
sns.lineplot(np.arange(10), relu_valacc, label='RELU')
sns.lineplot(np.arange(10), selu_valacc, label='SELU')
sns.lineplot(np.arange(10), bn_valacc, label='Batch_Norm_RELU')
sns.lineplot(np.arange(10), bn1_valacc, label='Batch_Norm1_RELU')

NameError: name 'plt' is not defined

### Observations

* Using BN before or after activations doesnt seem to yield clearer difference in behavior

## 3. Reusing Save Models

In [24]:
# Create a model to save it before reusing it
## "A" fashion mnist with 8 target class
## "B" 200 items of Sandals and Shirts

def split_dataset(X, y):
    y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts
    y_A = y[~y_5_or_6]
    y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7
    y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?
    return ((X[~y_5_or_6], y_A),
            (X[y_5_or_6], y_B))

(xtrain_A, ytrain_A), (xtrain_B, ytrain_B) = split_dataset(xtrain, ytrain)
(xval_A, yval_A), (xval_B, yval_B) = split_dataset(xval, yval)
(xtest_A, ytest_A), (xtest_B, ytest_B) = split_dataset(xtest, ytest)
xtrain_B = xtrain_B[:200]
ytrain_B = ytrain_B[:200]

In [25]:
model_A = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = [28, 28]),
    tf.keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(8, activation='softmax'),
])
model.compile(loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'], optimizer = 'SGD')
history = model.fit(xtrain_A, ytrain_A, validation_data = (xval_A, yval_A), epochs = 10, batch_size = 256)

Train on 39966 samples, validate on 8034 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [26]:
model_A.save("my_model_A.h5")

In [27]:
### Without Transfer Learning
model_B = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = [28, 28]),
    tf.keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(1, activation='softmax'),
])
model.compile(loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'], optimizer = 'SGD')
history = model.fit(xtrain_B, ytrain_B, validation_data = (xval_B, yval_B), epochs = 10, batch_size = 256)
modelB_scratch_acc, modelB_scratch_valacc = history.history['accuracy'], history.history['val_accuracy']
modelB_scratch_loss, modelB_scratch_valloss = history.history['loss'], history.history['val_loss']

Train on 200 samples, validate on 1966 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [28]:
import tensorflow as tf
from tensorflow import keras
model_B_with_A = keras.load_model("my_model_A.h5")

AttributeError: module 'tensorflow_core.keras' has no attribute 'load_model'

In [29]:
tf.__version__

'2.0.0'

In [30]:
keras.__version__

'2.2.4-tf'