In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from utils import data_shift
import hiplot as hip
from matplotlib import pyplot as plt
import sys
from copy import copy
import warnings
warnings.filterwarnings("ignore")

Before starting this exercise, you need to install the library firstly:

- hiplot: conda install -c conda-forge hiplot

### Task 1: Neural Network Implementation and Optimization

In this exercise you need to implement a fully-connected neural network using PyTorch to forecast daily minimum temperatures.

**a)** Import temperature time-series dataset **daily-minimum-temperatures.csv** in the data folder. 

In [None]:
data = pd.read_csv('./data/daily-minimum-temperatures.csv',header=0, index_col=0)
data.head(10)

In [None]:
# Visualize the data
### Your Code Here ### 

**b)** Generate a **7**-lag input dataset manually, i.e., use the previous 7 days data \[x(t-7), x(t-6)...x(t-1)\] as the input to predict the following day X(t) using the given function **data_shift()**.

In [None]:
lags = 7
train, test = data[0:-50], data[-50:]

# Scaling the train/test data using minmaxscaler
### Your Code Here ###       

print(train_scaled.head(5))
print(test_scaled.head(5))

In [None]:
# Generate the shifted training/test dataset, using the given function data_shift()
### Your Code Here ### 

# Split the train/test dataset to train_X, train_y / test_X, test_y
### Your Code Here ### 

# Convert the train/test dataset to tensors, the data type is torch.float32
### Your Code Here ### 

print(train_X_tensor[:3])
print(train_y_tensor[:3])
print(test_X_tensor[:3])
print(test_y_tensor[:3])

**c)**  Complete the given class **dnn()** using PyTorch to implement a neural network.

Hints:

- [**nn.Sequential()**](https://pytorch.org/docs/stable/generated/torch.nn.Sequential.html)

In [None]:
class dnn(nn.Module):
    def __init__(self, input_size=7, network_structure=[16, 4], output_size=1, activation_function='sigmoid', 
                 dropout_rate=0.2, leaky_relu_slop=0.01, output_activation_function=None):
        super().__init__()
        """
        Initialization.
        input_size: int, the dimension of the input features.
        network_structure: list, the strcture of the hidden layers in the network, each number corresponds to the number 
                           of neuros in a hidden layer.
        output_size: int, the dimension of the prediction target.
        activation_function: string, the name of the selected activation function in hidden layers. Implement the function
                             str2act() to convert the string to activation function.
        dropout_rate: float, the probability of an element to be zeroed.
        leaky_relu_slop: float, controls the angle of the negative slope for the function leakyrelu.
        output_activation_function: string, the name of the selected activation function in output layer.       
        """
        self.input_size = input_size
        self.output_size = output_size
        self.leaky_relu_slop = leaky_relu_slop
        self.activation_function = self.str2act(activation_function)
        self.output_activation_function = self.str2act(output_activation_function)
        self.dropout = nn.Dropout(dropout_rate)
        self.network_structure = copy(network_structure)
        self.network_structure.insert(0, self.input_size)
        
        # Implement the neural network model using pytorch modules based on the given input/output size 
        # and the network structure.
        
        ### Your Code Here ### 
        
    def forward(self, x):
        '''
        x: torch.tensor, the input
        
        Feedforward propagation of the input to the output. 
        '''
        ### Your Code Here ### 
        
        return x
    
    def predict(self, x):
        '''
        x: troch.tensor, the input
        
        Make a prediction for the input. The mode should be set to evaluation mode there.
        '''
        training_mode = self.training # store the current mode
        self.eval() # to evaluation mode
        
        # predict
        ### Your Code Here ### 
        
        self.train(training_mode) # reset the saved mode
        return output
    
    def str2act(self, str):
        '''
        Convert the given string to the corresponding activation function.
        '''
        
        if str == "sigmoid":
            
            pass
            ### Your Code Here ### 
        
        elif str == "tanh":
            
            pass
            ### Your Code Here ###     
        
        elif str == "relu":
            
            pass
            ### Your Code Here ### 
            
        
        elif str == "leakyrelu":
            
            pass
            ### Your Code Here ### 
            
        elif str == None:
            # No activate function is required.
            # the function should return the input
            return None

In [None]:
model = dnn()
model

**d)** Train the **model** using mini-batch gradient descent.

Hints:

- Convert the training tensors to iterable objects using the data loading utility [**DataLoader()**](https://pytorch.org/docs/stable/data.html). 

- Use [MSE](https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html) as the loss function

- Use SGD with a optimial learning rate as the [optimizer](https://pytorch.org/docs/stable/optim.html).

In [None]:
epochs = 256
batch_size = 32
input_size = train_X_tensor.shape[1]
output_size = 1
network_structure = [64, 32, 16, 1]
activation_function = 'leakyrelu'
leaky_relu_slop = 0.01
dropout_rate = 0
output_activation_function = None

# Convert the training tensors to iterable objects
### Your Code Here ### 

# Initialize the model
### Your Code Here ### 

# Define loss function
### Your Code Here ### 

# Define optimizer
### Your Code Here ### 

In [None]:
train_losses = [] # save the average training loss of each iteration
test_losses = [] # save the average test loss of each iteration

# Training process
for epoch in range(epochs):
    # set training mode
    model.train()
    
    # Implement the training loop using mini-batch 
    for idx, batch in enumerate(loader):
        pass
        ### Your Code Here ### 
        
    # compute test error in each epoch and add it to test_losses
    # Do the same for training loss as well
    # print the test error in each 10 epochs
    ### Your Code Here ### 

In [None]:
# Plot the learning curve
### Your Code Here ### 

In [None]:
# Plot the comparison between measurements and predictions of the training data
### Your Code Here ### 

In [None]:
# Plot the comparison between measurements and predictions of the test data
### Your Code Here ### 

In [None]:
# What problem you can observe in this figure? What could cause this problem?

**e)** Implement [**Early Stopping**](https://en.wikipedia.org/wiki/Early_stopping) in the training process.

In [None]:
## Re-initialize the model
### Your Code Here ### 

# The training process is monitored and stopped when the loss doesn't decrease within 30 iterations.
early_stop_patience = 30

train_losses = [] # save the average training loss of each iteration
test_losses = [] # save the average test loss of each iteration
best_epoch = 0 # the epoch of the lowest test loss
lowest_loss = sys.float_info.max # the current lowest test loss
epochs_counter = 0 # count how many epochs have been monitored and compare the number to the patience.

# Define loss function
### Your Code Here ### 

# Define optimizer SGD
### Your Code Here ### 

# Define optimizer Adam (for subtask f)
### Your Code Here ### 

# Training process
for epoch in range(epochs):
    # set training mode
    model.train()
    running_losses = 0.0
    
     # Implement the training loop using mini-batch
    for idx, batch in enumerate(loader):
        pass
        ### Your Code Here ### 
    
    # compute test error in each epoch and add it to test_losses
    # Do the same for training loss as well
    # print the test error in each 10 epochs
    
    ### Your Code Here ###
    
    # Implment early stopping
    # Update the lowest loss, when the newest test loss is smaller.
    # Otherwise stop_epochs += 1
    # Stop learning and return the lowest test loss, when the stop_epochs is greater than or equal to
    # the early_stop_patience
    
    ### Your Code Here ###

In [None]:
# Plot the comparison between measurements and predictions of the test data
### Your Code Here ###

**f)** Try [**Adam**](https://pytorch.org/docs/stable/optim.html) optimzer based on the code of sub-task **e** and plot the learning curve. What different can you observe between the learning curve of SGD and Adam.

**g)** Search the optimal hyperarameters using **grid search** and visualize the searching results with [HiPlot](https://github.com/facebookresearch/hiplot).

- Optimize these hyperparameters: **the structure of neural networks** (i.e., the number of neuros and layers), **activation functions**, **dropout rates**, **optimizers**, **learning rate**.

- The searching range can be adjusted depending on your hardware condition.

In [None]:
nn_structures = [[32, 16, 1], [64, 32, 16, 1]]
activation_functions = ['tanh', 'sigmoid']
dropout_rates = [0.2, 0.05]
learning_rates = [0.05, 0.01]
optimizers = ['adam', 'sgd']
results = []
param_idx = 1

# The training process is monitored and stopped when the loss doesn't decrease within 30 iterations.
early_stop_patience = 30

# Search the optimal parameters using grid search
for nn_structure in nn_structures:
    for activation_function in activation_functions:
        for dropout_rate in dropout_rates:
            for learning_rate in learning_rates:
                for optimizer_str in optimizers:
                    
                    # Training settings
                    # declare variables, e.g. losses list, lowest loss etc
                    # initialize the neural network, optimizer, and the corresponding loss function.
                    ### Your Code Here ###
                    
                    print(f"The index of the hyparameter set: {param_idx}")
                    
                    # Implement the training loop using mini-batch
                    for epoch in range(epochs):
                        # set training mode
                        model.train()
                        running_losses = 0.0
                        
                        # Implement the training loop using mini-batch
                        for idx, batch in enumerate(loader):
                            pass
                            ### Your Code Here ###
                        
                        # compute test error in each epoch and add it to test_losses
                        # Do the same for training loss as well
                        # print the test error in each 10 epochs
                        
                        ### Your Code Here ###
                        
                        # Implment early stopping
                        # Update the lowest loss, when the newest test loss is smaller.
                        # Otherwise stop_epochs += 1
                        # Stop learning and return the lowest test loss, when the stop_epochs is greater than or equal to
                        # the early_stop_patience
                        
                        ### Your Code Here ###
                        
                    # Store the parameters and the result for displaying    
                    results.append({"Structure": nn_structure,
                                    "activation function": activation_function,
                                    "dropout rate": dropout_rate,
                                    "learning rate": learning_rate,
                                    "optimizer": optimizer_str,
                                    "test error": lowest_loss
                                    })
                    param_idx += 1

In [None]:
# Visualize the results in parallel plots.
hip.Experiment.from_iterable(results).display()