In [None]:
import torch
import pandas as pd
from numpy import ndarray

from torch import Tensor, zeros, empty, tanh, eye
from torch.linalg import eigvals, pinv

from LAB3_1.utils import make_sequence
from tqdm import tqdm
import sys
from itertools import product


In [None]:
class EchoStateNetwork_base:
    def __init__(self,input_dim:int, hidden_dim:int, leakage_rate:float,
                 spectral_radius:float, omega:float):

        self.leakage_rate = leakage_rate
        self.hidden_dim = hidden_dim

        Wx = torch.rand((hidden_dim,input_dim))
        Wh = torch.rand((hidden_dim, hidden_dim))
        b = torch.rand(hidden_dim)

        self.Wx =  (Wx * 2 -1) * omega
        self.b = (b * 2 -1) * omega

        Wh = Wh * 2 - 1
        self.Wh = Wh * spectral_radius / self.spectral_radius(Wh)

    def gpu(self, device:str):
        self.Wx = self.Wx.to(device)
        self.Wh = self.Wh.to(device)
        self.b = self.b.to(device)

    @staticmethod
    def spectral_radius(matrix) -> float:
        eigenvalues = eigvals(matrix)
        max_magnitude =torch.max(torch.abs(eigenvalues))
        return max_magnitude

    def resevoir(self, x:Tensor, h0:Tensor=None):

        h_stack = empty(x.size(0), self.hidden_dim)

        if h0 is None:
             h0 = zeros(self.hidden_dim)

        h_stack[0] = h0.clone()

        for step, x_ in enumerate(x):
            z = self.Wx @ x_.T + self.Wh @ h_stack[step].T + self.b
            h_stack[step] = (1 - self.leakage_rate) * h_stack[step] + self.leakage_rate * tanh(z.T)

        return h_stack

class ESN_Seq2Seq(EchoStateNetwork_base):
    def __init__(self, input_dim: int, hidden_dim: int, leakage_rate: float,
                 tikhonov:float, spectral_radius: float, omega: float):

        super().__init__(input_dim, hidden_dim, leakage_rate, spectral_radius, omega)

        self.Wo = None
        self.hidden_cache = None
        self.tikhonov = tikhonov

    @staticmethod
    def MSE(y: Tensor, y_pred: Tensor) -> float:
        """
        Mean square error
        :param y: Target
        :param y_pred: Predicted target
        """
        return torch.pow((y - y_pred), 2).mean()

    def fit(self, x:Tensor, y:Tensor, transient:int):

        h_stack = self.resevoir(x)
        h_stack, y = h_stack[transient:], y[transient:]

        I = eye(h_stack.shape[1])
        self.Wo = pinv(h_stack.T @ h_stack + self.tikhonov * I) @ h_stack.T @ y
        self.hidden_cache = h_stack


    def predict(self, x:Tensor, y:Tensor=None, h0:Tensor=None):
        h_stack = self.resevoir(x, h0)
        y_pred = h_stack @ self.Wo

        loss = None
        if y is not None:
            loss = self.MSE(y, y_pred)

        output = h_stack, y_pred
        return  (loss,) + output if loss is not None else output


In [None]:
class GridSearch:

    def __init__(self, parameters_grid:dict, tr:ndarray, dev:ndarray, dt_type:str):

        all_configs = [dict(zip(parameters_grid.keys(), configs)) for configs in product(*parameters_grid.values())]

        print("Number of configurations to try: ",len(all_configs))
        """
        Returns the performance in each configuration:
            rank = a list of results for each configuration
            loss = training loss history of the best model
        """
        rank = self.run(tr, dev, all_configs, dt_type)

        # we sort by validation loss
        rank = sorted(rank, key=lambda conf: conf[2])

        print("\nThe best solution in ", rank[0])
        self.best_config = rank[0][0]

    @staticmethod
    def run(tr:ndarray, dev:ndarray, configs:list, dt_type:str):
        """
        In the grid search, we explore all configurations provided and try to find the best
        hyperparameter configuration using the training set to train the model and the validation
        set to compare the performance among all models instantiated by configurations.
        """

        rank = [] # keep in track the configuration and the corresponding performance

        # we save the best trained model and the training loss history during the epochs
        best_dev_loss = sys.maxsize

        tr = make_sequence(tr, 1, dt_type)
        dev = make_sequence(dev, 1, dt_type)

        for config in tqdm(configs): # try each configuration

            # With the same hyperparameter, we perform 3 different "trainings" and
            # we evaluate the configuration as average of this training
            losses = multiple_train(tr, config, [dev])
            rank.append((config, round(losses[0].item(), 6), round(losses[1].item(), 6)))
            # we keep the best model
            if best_dev_loss > losses[1]:
                best_dev_loss = losses[1]

        return rank

def multiple_train(tr:tuple[Tensor,Tensor], config:dict,
                   dts:list[tuple]=None, return_model:bool=False):
    """
    Perform the training different time, and we keep the average loss
    :param tr: Training dataset using for fitting the readout
    :param config: Hyperparameters
    :param dts: other datasets to evaluate
    :param return_model: if true return the model
    """
    trains, trainer = [], None
    for _ in range(3):
        losses = []

        trainer = ESN_Seq2Seq(1, hidden_dim=config["units"],
                              omega=config["omega"],
                              spectral_radius=config["spectral"],
                              tikhonov=config["lambda"],
                              leakage_rate=config["leakage"])

        trainer.fit(*tr, transient=config["transient"])

        tr_loss, hs, _ = trainer.predict(*tr)

        losses.append(tr_loss)
        if dts is not None:
            for dt in dts:
                loss, hs, _ =  trainer.predict(*dt, hs[-1])
                losses.append(loss)

        trains.append(losses)
    print(trains)
    trains = torch.tensor(trains).mean(axis=0)
    return (trains, trainer) if return_model else trains


In [None]:
source2 = pd.read_csv("./../sources/MG17.csv", header=None).T.to_numpy()
tr_dataset, dev_dataset, ts_dataset = source2[:4000], source2[4000:5000], source2[5000:]

In [None]:
ranges_to_explore = {
    "units" : [50, 100, 150],
    "omega" : [0.5, 0.7],
    "spectral" : [0.8, 0.9],
    "lambda" : [1e-4, 1e-5],
    "leakage": [0.3, 0.7],
    "transient": [100]
}
gs = GridSearch(ranges_to_explore, tr_dataset, dev_dataset, "MG17")
best_config =  gs.best_config

Number of configurations to try:  48


  z = self.Wx @ x_.T + self.Wh @ h_stack[step].T + self.b
  0%|          | 0/48 [00:00<?, ?it/s]


_LinAlgError: linalg.svd: The algorithm failed to converge because the input matrix is ill-conditioned or has too many repeated singular values (error code: 23).

In [None]:
tr_seq = make_sequence(tr_dataset, 1, "MG17")
dev_seq = make_sequence(dev_dataset, 1, "MG17")
ts_seq = make_sequence(ts_dataset, 1, "MG17")

losses = multiple_train(tr_seq, config=best_config,dts=[dev_seq, ts_seq])

print("Train Error", round(losses[0].item(), 6))
print("Validation Error", round(losses[1].item(), 6))
print("Test Error", round(losses[2].item(), 6))

In [None]:
from numpy import vstack

final_tr = vstack([tr_dataset, dev_dataset])
final_tr = make_sequence(final_tr, 1, "MG17")

losses, model = multiple_train(final_tr, config=best_config, dts=[ts_seq], return_model=True)

In [None]:
_, _, tr_pred = model.predict(*final_tr)
_, _, ts_pred = model.predict(*ts_seq)

In [None]:
from LAB3_1.utils import show_result

show_result(tr_pred, tr_seq[1], ts_pred, ts_seq[1])