## Chapter 10

In [455]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
print('tensorflow version: ' + str(tf.__version__))
print('tf.keras version: ' + str(keras.__version__))

tensorflow version: 2.4.1
tf.keras version: 2.4.0


#### MLP Classification

In [456]:
fashion_mnist = keras.datasets.fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

In [457]:
X_validation, X_train, X_test = X_train[:10000,:,:]/255, X_train[10000:, :, :]/255, X_test/255
y_validation, y_train, y_test = y_train[:10000], y_train[10000:], y_test

In [458]:
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
"Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

In [459]:
np.array(class_names)[y_test]

array(['Ankle boot', 'Pullover', 'Trouser', ..., 'Bag', 'Trouser',
       'Sandal'], dtype='<U11')

* The sequential API allows for easy creation of linearly connected networks.
* Layers are simply added sequentially

In [460]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28,28])) # does not include batch size
model.add(keras.layers.Dense(300, activation='relu'))
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))

# alternatively, pass all layers as a list in the call to .Sequential()
# model = keras.models.Sequential([keras.layers.Flatten(...), keras.layers.Dense(...), ....])
# Flatten() documentation shows arg data_format instead of input_shape. Why????

In [461]:
model.summary()

Model: "sequential_32"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_9 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_110 (Dense)            (None, 300)               235500    
_________________________________________________________________
dense_111 (Dense)            (None, 100)               30100     
_________________________________________________________________
dense_112 (Dense)            (None, 10)                1010      
Total params: 266,610
Trainable params: 266,610
Non-trainable params: 0
_________________________________________________________________


* layers and layer names can be accessed from model
* you can iterate over models

In [462]:
print("model layers:")
for layer in model.layers:
    print(layer)
    
print("\nname of each layer:")
for layer in model.layers:
    print(layer.name)

model layers:
<tensorflow.python.keras.layers.core.Flatten object at 0x00000237F0CD3EE0>
<tensorflow.python.keras.layers.core.Dense object at 0x00000237F0CD3A60>
<tensorflow.python.keras.layers.core.Dense object at 0x00000237FD301A00>
<tensorflow.python.keras.layers.core.Dense object at 0x00000237F0F90C40>

name of each layer:
flatten_9
dense_110
dense_111
dense_112


* weights and biases can be accessed too   
* because weights and biases are stored in the layer objects, reinitializing the models require creating new instances of these objects (model.compile only does not work)

In [463]:
weights, biases = model.layers[1].get_weights()
print("Shape of weights in first dense layer: " + str(weights.shape))
print("Shape of biases in first dense layer: " + str(biases.shape))

Shape of weights in first dense layer: (784, 300)
Shape of biases in first dense layer: (300,)


* weights are randomly initialized automatically
* biases are initialized to zero

In [464]:
print("Mean and std for weights from first dense layer: " + str(weights.mean()) + ", " + str(weights.std()))
print("Mean and std for biases from first dense layer: " + str(biases.mean()) + ", " + str(biases.std()))

Mean and std for weights from first dense layer: 0.00012342357, 0.042894926
Mean and std for biases from first dense layer: 0.0, 0.0


In [465]:
model.compile(loss = "sparse_categorical_crossentropy", # classes are mutually exclusive
             optimizer = 'sgd',
             metrics = ['accuracy'])

In [466]:
history = model.fit(X_train, y_train, epochs = 50,
                    validation_data = (X_validation, y_validation))

# alternatively, use validation_split instead of validation_data
# verbose=False prevents from printing status after each epoch
# standard batch_size is 32, hence the 1563 steps per epoch

Epoch 1/50


KeyboardInterrupt: 

In [None]:
pd.DataFrame(history.history)

In [None]:
pd.DataFrame(history.history).plot()

In [None]:
model.evaluate(X_test, y_test)

Using model.predict returns the class probabilities (output of the sofmax layer):

In [None]:
model.predict(X_test)

In [None]:
np.argmax(model.predict(X_test), axis=1)

In [None]:
np.array(class_names)[np.argmax(model.predict(X_test), axis=1)]

#### MLP Regression

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
housing = fetch_california_housing()
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train.shape[1:]

In [None]:
model = keras.Sequential([keras.layers.Dense(30, activation='relu', input_shape=X_train.shape[1:]), # shape needs to be tuple
                         keras.layers.Dense(1)])
model.compile(loss='mean_squared_error', optimizer='sgd')
history = model.fit(X_train, y_train, epochs = 100, validation_split = .20)

In [None]:
pd.DataFrame(history.history).plot()
plt.yscale('log')

In [None]:
plt.scatter(model.predict(X_test), y_test, alpha = .2)

#### Functional API

In [None]:
input = keras.layers.Input(shape=X_train.shape[1:])
hidden1 = keras.layers.Dense(30, activation='relu')(input)
hidden2 = keras.layers.Dense(30, activation='relu')(hidden1)
concat = keras.layers.Concatenate()([input, hidden2])
output = keras.layers.Dense(1)(concat)

In [None]:
model = keras.models.Model(inputs=[input], outputs=[output])

In [None]:
model.compile(loss='mean_squared_error', optimizer='adam') 
# the book favors 'sgd', but it is not numerically stable in this example

In [None]:
history = model.fit(X_train, y_train, epochs = 100, validation_split = .20)

In [None]:
pd.DataFrame(history.history)

In [None]:
pd.DataFrame(history.history).plot()

* To use multiple inputs/outputs, create multiple input/output layer objects and list them in the model declaration
* In the call to model.fit(), model.evalute() or model.predict(), pass multiple inputs as a tuple
* For multiple outputs, the model.compile() method must receive either a list of loss functions (for each of the outputs) or a single one (which will be used for all outputs); the model.fit() etc methods also need to receive a tuple with the labels

#### Subclassing API

Model behavior can be customized by creating subclasses of the model class

#### Callbacks

* Models can be saved and restored in HDF5 format with model.save() and keras.models.load_model()
* To save during the training process, _callbacks_ can be used
* Callbacks are passed as list of keras callbacks objects from the keras.callbacks API
* Examples:
    * EarlyStopping stops after _patience_ epochs without performance improvement
    * ModelCheckpoint saves the model state periodically
    * LearningRateSchedules implements a function to adjust learning rate at the beginning of each epoch
    * ReduceLROnPlateau reduces the learning rate when a metric stops improving

In [None]:
input = keras.layers.Input(shape=X_train.shape[1:])
hidden1 = keras.layers.Dense(30, activation='relu')(input)
hidden2 = keras.layers.Dense(30, activation='relu')(hidden1)
concat = keras.layers.Concatenate()([input, hidden2])
output = keras.layers.Dense(1)(concat)

model = keras.models.Model(inputs = [input], outputs = [output])
model.compile(optimizer = 'adam', loss='mean_squared_error')

history = model.fit(X_train, y_train, epochs = 100, validation_split= 0.2, 
                    callbacks = [keras.callbacks.EarlyStopping(monitor = 'val_loss', 
                                                               patience = 10,
                                                               restore_best_weights=True)])

#### TensorBoard

Create a function to define a directory name to save the log:

In [None]:
def get_logdir():
    import time
    from os import getcwd
    from os.path import join
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return join(getcwd(), 'TensorBoard_logs', run_id)

get_logdir()

Create a _callback_ object with the function and run the model  
__This creates the logs__

In [None]:
dir = get_logdir() # get a directory name
tensorboard_cb = keras.callbacks.TensorBoard(dir) # pass the directory name to the TB callback

history = model.fit(X_train, y_train, epochs = 100, validation_split=.2,
                   callbacks = [keras.callbacks.EarlyStopping(monitor = 'val_loss', 
                                                               patience = 10,
                                                               restore_best_weights=True),
                                tensorboard_cb])

To run tensorboard, go to the Anaconda Prompt and run:   
    - _python -m tensorboard.main --logdir=./TensorBoard_logs --port=6060_   
    - (logdir should be the root dir with the logs, port will be used in browser)   
To view the tensorboard logs, go to the web browser and type the address:   
    - _http://localhost:6060_

#### Exercise 10 - MLP on MNIST

In [None]:
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [None]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=X_train.shape[1:], name='Flatten')) # ignore the m dimension
model.add(keras.layers.Dropout(rate=.2, name='Dropout_01'))
model.add(keras.layers.Dense(units=200, activation='elu', name='Dense_01'))
model.add(keras.layers.Dropout(rate=.2, name='Dropout_02'))
model.add(keras.layers.Dense(units=100, activation='elu', name='Dense_02'))
model.add(keras.layers.Dropout(rate=.2, name='Dropout_03'))
model.add(keras.layers.Dense(units=100, activation='elu', name='Dense_03'))
model.add(keras.layers.Dense(units=10, activation='softmax', name='Output'))

In [None]:
opt = keras.optimizers.Adam(learning_rate=0.001, beta_1=.9, beta_2=.999)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(X_train, y_train, validation_split=.1, epochs = 1000,
         callbacks = [keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=100, restore_best_weights=True),
                      keras.callbacks.TensorBoard(log_dir=get_logdir()),
                      keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=25, factor=.5, min_lr=0.00001)])

In [None]:
model.evaluate(X_test, y_test)

### Chapter 11

#### Initialization

* Keras uses Glorot/uniform scaled by fan_in by default   
* Can be changed during layer creation

In [None]:
# model.add(keras.layers.Dense(kernel_initializer = 'he_uniform'))
# model.add(keras.layers.Dense(kernel_initializer = 'he_normal'))

* To change the scaling from fan_in to fan_avg

In [None]:
# he_avg_init = keras.initializers.VarianceScaling(scale=2, mode='fan_avg', distribution='uniform')
# model.add(keras.layers.Dense(kernel_initializer = he_avg_init))

* Other initializations:

In [None]:
# lecun = keras.initializers.VarianceScaling(scale=1, distribution='normal', mode='fan_in')
# model.add(keras.layers.Dense(kernel_initializer = 'lecun_normal'))

#### Batch Normalization

In [None]:
# keras.layers.BatchNormalization

#### Gradient Clipping

* Added to the optimizer   
* _clipvalue_ clips the value of the gradient for each input   
* _clipnorm_ clips the norm of the gradient vector, maintaining the direction of the gradient descent step

In [None]:
# optimizer = keras.optimizers.SGD(clipvalue = 1.0)
# optimizer = keras.optimizers.Adam(clipnorm = 1.0)

#### Optimizers

Momentum:  
* Just add momentum term to SGD optimizer

In [None]:
# keras.optimizers.SGD(momentum=0.9) # Momentum
# keras.optimizers.SGD(momentum=0.9, nesterov=True) # Nesterov accelerated gradient

In [None]:
# keras.optimizers.RMSprop(lr=, rho=)
# keras.optimizers.Adam(lr=, beta_1=, beta_2=)

#### Regularization

In [None]:
# reg = keras.regularizers.l2()
# reg = keras.regularizers.l1()
# reg = keras.regularizers.l1_l2()
# layer = keras.layers.Dense(..., kernel_regularizer = reg)

In [1]:
# model.add(keras.layers.Dropout(rate=.1)) # rate means dropout rate

#### Exercises:

In [2]:
from tensorflow import keras

(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()

ImportError: cannot import name '_pywrap_traceme' from 'tensorflow.python.profiler.internal' (unknown location)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_validate, y_train, y_validate = train_test_split(X_train_full, y_train_full, test_size=.1, shuffle=True)

In [None]:
# elu + he

#Input = keras.layers.Input(shape = X_train.shape[1:], name='Input')
#X = keras.layers.Flatten(input_shape = X_train.shape[1:], name='Flatten')(Input)
#for layer in range(20):
#    X = keras.layers.Dense(units=100, kernel_initializer='he_uniform', use_bias=False, name='Dense'+str(layer+1))(X)
#    X = keras.layers.BatchNormalization(name='BN'+str(layer+1))(X)
#    X = keras.layers.Activation('elu', name='Activation'+str(layer+1))(X)
#Y = keras.layers.Dense(units=10, activation='softmax', kernel_initializer='he_uniform', name='Output')(X) 

In [None]:
# selu + lecun + dropout

normalization = keras.layers.experimental.preprocessing.Normalization(axis=(1,2,3), name='Normalization')
normalization.adapt(X_train)

Input = keras.layers.Input(shape = X_train.shape[1:], name='Input')
X = normalization(Input)
X = keras.layers.Flatten(input_shape = X_train.shape[1:], name='Flatten')(X)
X = keras.layers.AlphaDropout(rate=.001, name='DropoutInput')

for layer in range(20):
    X = keras.layers.Dense(units=100, kernel_initializer='lecun_normal', activation='selu', name='Dense'+str(layer+1))(X) 
    X = keras.layers.AlphaDropout(rate=.001, name='Dropout'+str(layer+1))(X)
Y = keras.layers.Dense(units=10, activation='softmax', kernel_initializer='lecun_normal', name='Output')(X)

In [None]:
model_name = "22_selu_lecun_dropout_001_nadam_lr_0_001"
optimizer = keras.optimizers.Nadam(learning_rate=0.001)

model = keras.models.Model(inputs=Input, 
                           outputs=Y, 
                           name=model_name)

model.compile(optimizer = optimizer,
             loss = 'sparse_categorical_crossentropy', # because labels provided as integers (not one-hot!)
             metrics = ['accuracy'])

In [None]:
from os.path import join
log_dir = join("TensorBoard_logs", model_name)

tb_cb = keras.callbacks.TensorBoard(log_dir = log_dir, histogram_freq = 10)
es_cb = keras.callbacks.EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)

In [None]:
model.summary()

In [None]:
model.fit(x = X_train,
          y = y_train,
          epochs = 1000,
          validation_data = (X_validate, y_validate),
          callbacks = [tb_cb, es_cb])

In [None]:
model.evaluate(X_test, y_test)

### Chapter 12

In [None]:
import tensorflow as tf

In [None]:
t = tf.constant([[1,2,3],[4,5,6]], dtype='float32') # constant as in immutable, returns a tf.Tensor object
t

In [None]:
t @ tf.transpose(t) # matrix mul

In [None]:
tf.reduce_mean(t)

In [None]:
tf.math.log(t)

In [None]:
# this will not work, tf.Tensor is immutable
t[1,2] = 10

In [None]:
t = tf.Variable(t) # returns a tf.Variable object
# t[1,2] = 10 # this will not work
t[1,1].assign(20)