In [None]:
import pandas as pd
import copy
import sys

from LAB3_1.utils import show_loss, show_result

from tqdm import tqdm
from typing import Tuple
from itertools import product

import numpy as np
from numpy import ndarray

import torch
from torch.optim import Adam
from torch import Tensor, no_grad, zeros
from torch.nn import Module, Sequential, Linear, MSELoss, ReLU, RNN

### Retrieve the dataset

In [None]:
gpu = 'cuda' if torch.cuda.is_available() else 'cpu'

source1 = pd.read_csv("./sources/NARMA10.csv", header=None).T.to_numpy()
source2 = pd.read_csv("./sources/MG17.csv", header=None).T.to_numpy()

### Hold-out

In [None]:
source1 = np.expand_dims(source1.T, axis=-1)
tr_dataset, dev_dataset, ts_dataset = source1[:,:4000], source1[:,4000:5000], source1[:,5000:]

### Model

In [None]:
# Recurrent Neural network
class RecurrentNN(Module):
    def __init__(self, hidden:int, layers:int, no_linearity:str="relu"):
        super(RecurrentNN,self).__init__()
        self.hidden_size = hidden
        self.layers = layers

        self.rnn = RNN(input_size=1,
                       hidden_size=hidden,
                       num_layers=layers,
                       nonlinearity=no_linearity,
                       batch_first=True)

        self.read_out = Sequential(ReLU(), Linear(hidden, 1))
        self.criteria = MSELoss() # Mean square error loss

        self.last_hidden = None

    def forward(self, x:Tensor, y:Tensor=None, save_state:bool=False):
        # input [steps,1]
        # output [step, hidden], hn [layer,hidden]
        output, hn = self.rnn(x, self.last_hidden)
        y_pred = self.read_out(output) # we take the last step

        if save_state:
            self.last_hidden = hn.detach()

        loss = None
        if y is not None:
            loss = self.criteria(y_pred, y)
        return (loss, y_pred) if loss is not None else y_pred

In [None]:
class RNN_trainer:
    def __init__(self, hidden:int, layers:int, no_linearity:str):

        # model
        self.model = RecurrentNN(hidden=hidden, layers=layers, no_linearity=no_linearity).to(gpu)

    def fit(self, df:ndarray, epochs:int=2, lr:float=0.001)->Tensor:
        df = torch.from_numpy(df).float().to(gpu)

        # Oss. we avoid implementing further mechanisms like early stopping, scheduler, ecc.
        opt = Adam(self.model.parameters(), lr)
        history_tr = zeros(epochs)
        self.model.train()

        for i in range(epochs):
            opt.zero_grad(set_to_none=True)

            loss = self.model(df[0], df[1])[0] # perform the output
            loss.backward() # gradient accumulation

            opt.step()

            # save the current loss
            history_tr[i] = loss.item()

        return history_tr

    def validate(self, df:ndarray, save_state:bool=False) -> Tuple:
        df = torch.from_numpy(df).float().to(gpu)

        return  self.predict(df[0], df[1], save_state) + (df[1],)

    def predict(self, x:Tensor, y:Tensor=None, save_state:bool=False):
        """
        If the target it is provided, the method performs also the loss, otherwise
        return only the output of the network.
        """
        self.model.eval()
        with no_grad():
            return  self.model(x, y, save_state) # perform the output


In [None]:
ranges_to_explore = {
    "units" : [100, 200, 500],
    "epochs" : [500, 800],
    "lr" : [0.003, 0.008, 0.01],
    "layers": [1],
    "activ" : ["relu"]
}

class GridSearch:

    def __init__(self, tr:ndarray, dev:ndarray):

        all_configs = [dict(zip(ranges_to_explore.keys(), configs)) for configs in product(*ranges_to_explore.values())]

        print("Number of configurations to try: ",len(all_configs))
        # returns the performance in each configuration, the best model and the history of the loss
        rank, best, loss = self.run(tr, dev, all_configs)

        # we sort by validation loss
        rank = sorted(rank, key=lambda conf: conf[2])

        print("\nThe best solution in ", rank[0])
        self.best_config = rank[0][0]
        self.best_model = best
        self.tr_loss = loss

    @staticmethod
    def run(tr:ndarray, dev:ndarray, configs:list):
        """
        In the grid search, we explore all configurations provided and try to find the best
        hyperparameter configuration using the training set to train the model and the validation
        set to compare the performance among all models instantiated by configurations.
        """

        rank = [] # the keep in track the configuration and the corresponding performance

        # we save the best trained model and the training loss during the epochs
        best, loss = None, None
        best_dev_loss = sys.maxsize

        for config in tqdm(configs):

            trainer = RNN_trainer(hidden=config["units"],
                                  layers=config["layers"],
                                  no_linearity=config["activ"])

            history = trainer.fit(tr, config["epochs"], config["lr"])
            vl_loss = trainer.validate(dev)[0].item()

            rank.append((config, round(history[-1].item(), 6), round(vl_loss, 6)))

            # we keep the best model
            if best_dev_loss > vl_loss:
                best_dev_loss = vl_loss
                loss = copy.deepcopy(history)
                best = copy.deepcopy(trainer)

        return rank, best, loss

### Assignment 1.1: NARMA10 task with RNN

In [None]:
gs = GridSearch(tr_dataset, dev_dataset)
best_config =  gs.best_config
best_model = gs.best_model

### Training loss

In [None]:
show_loss(gs.tr_loss)

### Train, Validation and Test errors in the best configuration

In [None]:
tr_loss = best_model.validate(tr_dataset, save_state=True)[0]
print("Train Error", round(tr_loss.item(), 6))

dev_loss = best_model.validate(dev_dataset)[0]
print("Validation Error", round(dev_loss.item(), 6))

test_loss = best_model.validate(ts_dataset)[0]
print("Test Error", round(test_loss.item(), 6))

### Final retrain with Training and Validation set (with the best configuration)

In [None]:
final_trainer = RNN_trainer(hidden=best_config["units"],
                            layers=best_config["layers"],
                            no_linearity=best_config["activ"])
# we use both training and validation as a training set, using the best parameters
# found in the previous model selection
final_tr = np.hstack([tr_dataset, dev_dataset])
tr_history = final_trainer.fit(final_tr, best_config["epochs"], lr=best_config["lr"])

tr_loss, tr_out, tr_y = final_trainer.validate(final_tr)
print("Validation Error", round(tr_loss.item(), 6))

test_loss, test_out, test_y  = final_trainer.validate(ts_dataset)
print("Test Error", round(test_loss.item(), 6))

In [None]:
show_loss(tr_history)

In [None]:
show_result(tr_out.cpu(), tr_y.cpu(), test_out.cpu(), test_y.cpu())

## Bonus-track Assignment 1: Mackey-Glass 17 task with RNN

### Hold out

In [None]:
source_aligned = np.zeros((2, source2.shape[0]-1, 1))
source_aligned[0], source_aligned[1] = source2[:-1], source2[1:]
tr_dataset, dev_dataset, ts_dataset = source_aligned[:,:4000], source_aligned[:,4000:5000], source_aligned[:,5000:]

In [None]:
gs = GridSearch(tr_dataset, dev_dataset)
best_config =  gs.best_config
best_model = gs.best_model

### Training loss

In [None]:
show_loss(gs.tr_loss)

### Train, Validation and Test errors in the best configuration

In [None]:
tr_loss = best_model.validate(tr_dataset, save_state=True)[0]
print("Train Error", round(tr_loss.item(), 6))

dev_loss = best_model.validate(dev_dataset)[0]
print("Validation Error", round(dev_loss.item(), 6))

test_loss = best_model.validate(ts_dataset)[0]
print("Test Error", round(test_loss.item(), 6))

### Final retrain with Training and Validation set (with the best configuration)

In [None]:
final_trainer = RNN_trainer(hidden=best_config["units"],
                            layers=best_config["layers"],
                            no_linearity=best_config["activ"])
# we use both training and validation as a training set, using the best parameters
# found in the previous model selection
final_tr = np.hstack([tr_dataset, dev_dataset])
tr_history = final_trainer.fit(final_tr, best_config["epochs"], lr=best_config["lr"])

tr_loss, tr_out, tr_y = final_trainer.validate(final_tr)
print("Validation Error", round(tr_loss.item(), 6))

test_loss, test_out, test_y  = final_trainer.validate(ts_dataset)
print("Test Error", round(test_loss.item(), 6))

In [None]:
show_loss(tr_history)

In [None]:
show_result(tr_out.cpu(), tr_y.cpu(), test_out.cpu(), test_y.cpu())

In [None]:
# import torch
# from torch import nn
#
# rnn = nn.RNN(1, 20, 1, batch_first=True) # x , hidden-node , layer
# read_out = nn.Linear(20,1) # hidden-node , out-size
#
# input = torch.randn(3000, 1) # steps, dim_x
# output, hn = rnn(input)
# print(output.shape, hn.shape)
# output = read_out(output)
# print(output.shape, hn.shape)
#
#
# print(read_out(output).shape)
#
# print(h0.shape)
# print(output.shape)