# Tutorial 1
Code modified from the tutorial at: 
https://towardsdatascience.com/building-rnn-lstm-and-gru-for-time-series-using-pytorch-a46e5b094e7b. They use a dataset about hourly energy consumption. 

# Preparing the data

In [2]:
import pandas as pd
import numpy as np

## Read in data

In [18]:
df = pd.read_csv('Data/biweekly_imputed.csv')
df = df.drop(labels=['Unnamed: 0'], axis=1)  # Drop old index column
df.head()

# TODO Change data to timestep data 
# TODO Make sure the data hasn't been imputed before test-train split

Unnamed: 0,total_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients_per_million,hosp_patients_per_million,total_tests_per_thousand,new_tests_smoothed_per_thousand,positive_rate,...,Eta,Delta,Alpha,non_who,Lambda,Mu,Omicron,date,iso_code,month_yr
0,79.741483,634.734734,274.677776,15.653383,1.030272,36.107112,593.984397,0.216,8.605749,0.184487,...,0.0,0.0,9.0,102.0,0.0,0.0,0.0,2021-02-08,ABW,02-2021
1,145.582004,634.517026,276.24793,15.659261,1.030004,36.116011,593.960639,0.216,8.603806,0.184433,...,0.0,0.0,37.0,101.0,0.0,0.0,0.0,2021-02-22,ABW,02-2021
2,3251.453033,652.915371,338.144384,15.346536,1.10048,39.05109,613.836104,0.216,9.105364,0.18076,...,0.0,0.0,65.0,36.0,0.0,0.0,0.0,2021-03-08,ABW,03-2021
3,3220.608962,652.271241,337.578018,15.347425,1.099897,39.009133,613.739417,0.216,9.107814,0.180558,...,0.0,0.0,66.0,11.0,0.0,0.0,0.0,2021-03-22,ABW,03-2021
4,48104.656055,192.597299,1043.073463,2.383964,0.958485,41.049494,308.813636,1568.504467,8.908849,0.145912,...,0.0,0.0,74.0,3.0,0.0,0.0,0.0,2021-04-05,ABW,04-2021


## Split into train, validation, and test datasets
Depending on what the .csv looks like, we might need to change this. We need to make sure that the time sequences are not affected by the split. 

Set the `train_test_split()` `shuffle` parameterto `False`.

In [24]:
from sklearn.model_selection import train_test_split


# Split the outcome column from the rest of the features
def feature_outcome_split(df, outcome_col):
    y = df[[outcome_col]]
    X = df.drop(labels=[outcome_col], axis=1)
    return X, y


# Split the dataset into training, validation, and test datasets
def train_val_test_split(df, outcome_col, test_ratio):
    # Calculate the validation ratio
    val_ratio = test_ratio / (1 - test_ratio)
    
    # Split the outcome column from the other features
    X, y = feature_outcome_split(df, outcome_col)
    
    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_ratio, shuffle=False)
    # Split the train data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_ratio, shuffle=False)
    
    return X_train, X_val, X_test, y_train, y_val, y_test
    

# New cases model
# Test ratio is 0.2
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(df, 'new_cases_smoothed_per_million', 0.2)

# New deaths model
# Test ratio is 0.2
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(df, 'new_deaths_smoothed_per_million', 0.2)

## Scale the datasets
Use sklearn `MinMaxScaler`

X_new = (X_i - min(X)) / (max(X)-min(X))

Also includes function to get other scalers if we want to try other ones

In [25]:
# TODO: Currently doesn't work due to the dates being strings. 
# Not sure how we will deal with dates in the actual dataset, 
# so leaving this for now

from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler, RobustScaler

def get_scaler(scaler):
    scalers = {
        'minmax': MinMaxScaler,
        'standard': StandardScaler,
        'maxabs': MaxAbsScaler,
        'robust': RobustScaler,
    }
    
    return scalers.get(scaler.lower())()

scaler = get_scaler('minmax')

X_train_arr = scaler.fit_transform(X_train)
X_val_arr = scaler.transform(X_val)
X_test_arr = scaler.transform(X_test)

y_train_arr = scaler.fit_transform(y_train)
y_val_arr = scaler.transform(y_val)
y_test_arr = scaler.transform(y_test)

ValueError: could not convert string to float: '2021-02-08'

## Load datasets into DataLoaders for mini-batch training

Use TensorDataset. 

Convert the NumPy arrays that resulted from scaling into TensorDatasets. Then put the TensorDatasets into DataLoaders.

In [None]:
from torch.utils.data import TensorDataset, DataLoader

# TODO might want to change batch size
batch_size = 64

# Convert all feature and outcome numpy arrays into tensors
train_features = torch.Tensor(X_train_arr)
train_targets = torch.Tensor(y_train_arr)
val_features = torch.Tensor(X_val_arr)
val_targets = torch.Tensor(y_val_arr)
test_features = torch.Tensor(X_test_arr)
test_targets = torch.Tensor(y_test_arr)

# Combine feature and outcome tensors into TensorDatasets
train = TensorDataset(train_features, train_targets)
val = TensorDataset(val_features, val_targets)
test = TensorDataset(test_features, test_targets)

# Load TensorDatasets into DataLoader
train_loader = DataLoader(train, batch_size=batch_size, shuffle=False, drop_last=True)
val_loader = DataLoader(val, batch_size=batch_size, shuffle=False, drop_last=True)
test_loader = DataLoader(test, batch_size=batch_size, shuffle=False, drop_last=True)
# Extra DataLoader with batch size 1
test_loader_one = DataLoader(test, batch_size=1, shuffle=False, drop_last=True)


# Models

## Vanilla RNN

Extend the `nn.Module` base class for NNs. 

The RNN will have 1 or more RNN layers connected by a fully connected layer to convert the RNN output into the desired output shape. 

In [None]:
import torch.nn as nn

class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, dropout_prob):
        super(RNNModel, self).__init__()

        # Define the number of layers and the nodes in each layer
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim

        # RNN layers
        self.rnn = nn.RNN(
            input_dim, hidden_dim, layer_dim, batch_first=True, dropout=dropout_prob
        )
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initialize hidden state for first input with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()

        # Forward propagation by passing in the input and hidden state into the model
        out, h0 = self.rnn(x, h0.detach())

        # Reshaping the outputs in the shape of (batch_size, seq_length, hidden_size)
        # so that it can fit into the fully connected layer
        out = out[:, -1, :]

        # Convert the final state to our desired output shape (batch_size, output_dim)
        out = self.fc(out)
        return out

## LSTM

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, dropout_prob):
        super(LSTMModel, self).__init__()

        # Defining the number of layers and the nodes in each layer
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim

        # LSTM layers
        self.lstm = nn.LSTM(
            input_dim, hidden_dim, layer_dim, batch_first=True, dropout=dropout_prob
        )

        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initializing hidden state for first input with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()

        # Initializing cell state for first input with zeros
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()

        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        # Forward propagation by passing in the input, hidden state, and cell state into the model
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

        # Reshaping the outputs in the shape of (batch_size, seq_length, hidden_size)
        # so that it can fit into the fully connected layer
        out = out[:, -1, :]

        # Convert the final state to our desired output shape (batch_size, output_dim)
        out = self.fc(out)

        return out

Function to switch between models

In [None]:
def get_model(model, model_params):
    models = {
        "rnn": RNNModel,
        "lstm": LSTMModel,
        "gru": GRUModel,
    }
    return models.get(model.lower())(**model_params)

## Training Set-Up

`train_step` loops between forward propagation and backward propagation. 

`train` function: 
* Call `train_step` at each epoch. 
* After each training step, the network's weights are updated to minimize loss. 
* Validation step checks if there was any improvement.

We use mini-batch training. Each batch has dimensions batchsize, sequence legth, and input_dim. 

In [None]:
class Optimization:
    def __init__(self, model, loss_fn, optimizer):
        self.model = model
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.train_losses = []
        self.val_losses = []
    
    def train_step(self, x, y):
        # Sets model to train mode
        self.model.train()

        # Makes predictions
        yhat = self.model(x)

        # Computes loss
        loss = self.loss_fn(y, yhat)

        # Computes gradients
        loss.backward()

        # Updates parameters and zeroes gradients
        self.optimizer.step()
        self.optimizer.zero_grad()

        # Returns the loss
        return loss.item()
    
    def train(self, train_loader, val_loader, batch_size=64, n_epochs=50, n_features=1):
        model_path = f'models/{self.model}_{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}'

        # Each epoch iterates over each mini-batch of training data, 
        # executes train_step(), and calculates loss for validation batches
        for epoch in range(1, n_epochs + 1):
            batch_losses = []
            for x_batch, y_batch in train_loader:
                x_batch = x_batch.view([batch_size, -1, n_features]).to(device)
                y_batch = y_batch.to(device)
                loss = self.train_step(x_batch, y_batch)
                batch_losses.append(loss)
            training_loss = np.mean(batch_losses)
            self.train_losses.append(training_loss)

            with torch.no_grad():
                batch_val_losses = []
                for x_val, y_val in val_loader:
                    x_val = x_val.view([batch_size, -1, n_features]).to(device)
                    y_val = y_val.to(device)
                    self.model.eval()
                    yhat = self.model(x_val)
                    val_loss = self.loss_fn(y_val, yhat).item()
                    batch_val_losses.append(val_loss)
                validation_loss = np.mean(batch_val_losses)
                self.val_losses.append(validation_loss)

            if (epoch <= 10) | (epoch % 50 == 0):
                print(
                    f"[{epoch}/{n_epochs}] Training loss: {training_loss:.4f}\t Validation loss: {validation_loss:.4f}"
                )

        torch.save(self.model.state_dict(), model_path)
    
    def evaluate(self, test_loader, batch_size=1, n_features=1):
        with torch.no_grad():
            predictions = []
            values = []
            for x_test, y_test in test_loader:
                x_test = x_test.view([batch_size, -1, n_features]).to(device)
                y_test = y_test.to(device)
                self.model.eval()
                yhat = self.model(x_test)
                predictions.append(yhat.to(device).detach().numpy())
                values.append(y_test.to(device).detach().numpy())

        return predictions, values
    
    def plot_losses(self):
        plt.plot(self.train_losses, label="Training loss")
        plt.plot(self.val_losses, label="Validation loss")
        plt.legend()
        plt.title("Losses")
        plt.show()
        plt.close()

## Training

In [None]:
import torch.optim as optim

input_dim = len(X_train.columns)
output_dim = 1
hidden_dim = 64
layer_dim = 3
batch_size = 64
dropout = 0.2
n_epochs = 100
learning_rate = 1e-3
weight_decay = 1e-6

model_params = {'input_dim': input_dim,
                'hidden_dim' : hidden_dim,
                'layer_dim' : layer_dim,
                'output_dim' : output_dim,
                'dropout_prob' : dropout}

model = get_model('lstm', model_params)  # Change this line if you want a different model ('rnn')

loss_fn = nn.MSELoss(reduction="mean")
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

opt = Optimization(model=model, loss_fn=loss_fn, optimizer=optimizer)
opt.train(train_loader, val_loader, batch_size=batch_size, n_epochs=n_epochs, n_features=input_dim)
opt.plot_losses()

predictions, values = opt.evaluate(test_loader_one, batch_size=1, n_features=input_dim)

### Un-scale the predictions
Reduce the multi-dimensional tensors to a one-dimensional vectr by flatting and applying inverse_transform to get real prediction values instead of scaled predictions. 

In [None]:
def inverse_transform(scaler, df, columns):
    for col in columns:
        df[col] = scaler.inverse_transform(df[col])
    return df


def format_predictions(predictions, values, df_test, scaler):
    vals = np.concatenate(values, axis=0).ravel()
    preds = np.concatenate(predictions, axis=0).ravel()
    df_result = pd.DataFrame(data={"value": vals, "prediction": preds}, index=df_test.head(len(vals)).index)
    df_result = df_result.sort_index()
    df_result = inverse_transform(scaler, df_result, [["value", "prediction"]])
    return df_result


df_result = format_predictions(predictions, values, X_test, scaler)

### Calculate error metrics

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def calculate_metrics(df):
    return {'mae' : mean_absolute_error(df.value, df.prediction),
            'rmse' : mean_squared_error(df.value, df.prediction) ** 0.5,
            'r2' : r2_score(df.value, df.prediction)}

result_metrics = calculate_metrics(df_result)

## Baseline Predictions
We already have linear regression models, so maybe we'll just use those?

In [None]:
from sklearn.linear_model import LinearRegression

def build_baseline_model(df, test_ratio, target_col):
    X, y = feature_label_split(df, target_col)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_ratio, shuffle=False
    )
    model = LinearRegression()
    model.fit(X_train, y_train)
    prediction = model.predict(X_test)

    result = pd.DataFrame(y_test)
    result["prediction"] = prediction
    result = result.sort_index()

    return result

df_baseline = build_baseline_model(df_features, 0.2, 'value')
baseline_metrics = calculate_metrics(df_baseline)

# Tutorial 2

Regression Network Architecture from https://towardsdatascience.com/lstms-in-pytorch-528b0440244 