# Regularization example with IRIS dataset

## Model validation on the Iris dataset

### Intentions

We will implement validation, regularisation and callbacks on IRIS dataset

In [None]:
from numpy.random import seed
seed(8)
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, model_selection 
%matplotlib inline

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras import initializers

IRIS pictures here..

#### The Iris dataset

In this assignment, you will use the [Iris dataset](https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html). It consists of 50 samples from each of three species of Iris (Iris setosa, Iris virginica and Iris versicolor). Four features were measured from each sample: the length and the width of the sepals and petals, in centimeters. For a reference, see the following papers:

- R. A. Fisher. "The use of multiple measurements in taxonomic problems". Annals of Eugenics. 7 (2): 179–188, 1936.

Our goal is to showcase a neural network that classifies each sample into the correct class, as well as applying validation and regularisation techniques.

#### Load and preprocess the data

First read in the Iris dataset using `datasets.load_iris()`, and split the dataset into training and test sets.

In [None]:
def read_in_and_split_data(iris_data):
    targets = iris_data.target
    data = iris_data.data
    
    train_data, test_data, train_targets, test_targets = train_test_split(data, targets, test_size=0.1)
    return train_data, test_data, train_targets, test_targets

In [None]:
iris_data = datasets.load_iris()
train_data, test_data, train_targets, test_targets = read_in_and_split_data(iris_data)

We will now convert the training and test targets using a one hot encoder.

In [None]:
train_targets = tf.keras.utils.to_categorical(np.array(train_targets))
test_targets = tf.keras.utils.to_categorical(np.array(test_targets))

#### Build the neural network model


In [None]:
def get_model(input_shape):
    model=tf.keras.Sequential([
        Dense(64, activation='relu', input_shape=(input_shape),
             kernel_initializer=tf.keras.initializers.he_uniform(),######SHOULD we use the seed as defined above?
             bias_initializer=initializers.Ones()),
        Dense(128,activation='relu'),
        Dense(128,activation='relu'),
        Dense(128,activation='relu'),
        Dense(128,activation='relu'),
        
        Dense(64, activation='relu'),
        Dense(64, activation='relu'),
        Dense(64, activation='relu'),
        Dense(64, activation='relu'),
        
        Dense(3, activation='softmax')
    ])
    return model

In [None]:
model = get_model(train_data[0].shape)
print(model.summary())

#### Compile the model


In [None]:
def compile_model(model):
    return model.compile(loss='mse', optimizer="adam", metrics=["mse","mae","accuracy"])

In [None]:
compile_model(model)

#### Fit the model to the training data

In [None]:
def train_model(model, train_data, train_targets, epochs):
    history = model.fit( train_data, train_targets, epochs=epochs, batch_size=40, validation_split=0.40)
    return history    

In [None]:
history = train_model(model, train_data, train_targets, epochs=800)

#### Plot the learning curves

We will now plot two graphs:
* Epoch vs accuracy
* Epoch vs loss


In [None]:
try:
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
except KeyError:
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
plt.title('Accuracy vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='lower right')
plt.show() 

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show() 

#### Reducing overfitting in the model

We update the above model by adding regularization:
two dropout layers, weight decay, and a batch normalisation layer. 

In [None]:
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Dropout

def get_regularised_model(input_shape, dropout_rate, weight_decay):
    model=tf.keras.Sequential([
        Dense(64, activation='relu', input_shape=(input_shape),
             kernel_initializer=tf.keras.initializers.he_uniform(),######SHOULD we use the seed as defined above?
             bias_initializer=initializers.Ones(),
             kernel_regularizer=regularizers.l2(weight_decay)),
        Dense(128,activation='relu', kernel_regularizer=regularizers.l2(weight_decay)),
        Dense(128,activation='relu',kernel_regularizer=regularizers.l2(weight_decay)),
        Dropout(dropout_rate),
        Dense(128,activation='relu',kernel_regularizer=regularizers.l2(weight_decay)),
        Dense(128,activation='relu',kernel_regularizer=regularizers.l2(weight_decay)),
        
        Dense(64, activation='relu',kernel_regularizer=regularizers.l2(weight_decay)),
        Dense(64, activation='relu',kernel_regularizer=regularizers.l2(weight_decay)),
        Dropout(dropout_rate),
        Dense(64, activation='relu',kernel_regularizer=regularizers.l2(weight_decay)),
        Dense(64, activation='relu',kernel_regularizer=regularizers.l2(weight_decay)),
        
        Dense(3, activation='softmax')
    ])
    return model
    

#### Instantiate, compile and train the model

In [None]:
reg_model = get_regularised_model(train_data[0].shape, 0.3, 0.001)

In [None]:
compile_model(reg_model)

In [None]:
reg_history = train_model(reg_model, train_data, train_targets, epochs=800)

#### Plot the learning curves

In [None]:
try:
    plt.plot(reg_history.history['accuracy'])
    plt.plot(reg_history.history['val_accuracy'])
except KeyError:
    plt.plot(reg_history.history['acc'])
    plt.plot(reg_history.history['val_acc'])
plt.title('Accuracy vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='lower right')
plt.show() 

In [None]:
plt.plot(reg_history.history['loss'])
plt.plot(reg_history.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show() 

We can see that the regularisation has helped to reduce the overfitting of the network.
Let's incorporate callbacks into a new training run that implements early stopping and learning rate reduction on plateaux.

We wrote a functon so that:

* It creates an `EarlyStopping` callback object and a `ReduceLROnPlateau` callback object
* The early stopping callback is used and monitors validation loss with the mode set to `"min"` and patience of 30.
* The learning rate reduction on plateaux is used with a learning rate factor of 0.2 and a patience of 20.

In [None]:
def get_callbacks():
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=30, mode='min')
    learning_rate_reduction = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=20)
    return early_stopping, learning_rate_reduction    

In [None]:
call_model = get_regularised_model(train_data[0].shape, 0.3, 0.0001)
compile_model(call_model)
early_stopping, learning_rate_reduction = get_callbacks()
call_history = call_model.fit(train_data, train_targets, epochs=800, validation_split=0.15,
                         callbacks=[early_stopping, learning_rate_reduction], verbose=0)

In [None]:
print(learning_rate_reduction.patience)

Finally, let's replot the accuracy and loss graphs for our new model.

In [None]:
try:
    plt.plot(call_history.history['accuracy'])
    plt.plot(call_history.history['val_accuracy'])
except KeyError:
    plt.plot(call_history.history['acc'])
    plt.plot(call_history.history['val_acc'])
plt.title('Accuracy vs. epochs')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='lower right')
plt.show() 

In [None]:
plt.plot(call_history.history['loss'])
plt.plot(call_history.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show() 

In [None]:
# Evaluate the model on the test set

#test_loss, test_acc = call_model.evaluate(test_data, test_targets, verbose=0)
results = call_model.evaluate(test_data, test_targets, verbose=0)
#contents of 'results' is loss vaule and  metrics which are ["mse","mae","accuracy"], as set during compilie stage.

#print("Test loss: {:.3f}\nTest accuracy: {:.2f}%".format(test_loss, 100 * test_acc))
print("Test loss: {:.3f}\nTest accuracy: {:.2f}%".format(results[0], 100 * results[3]))