In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import csv



Set DataSet and reformate it a littlebit

In [24]:
device = "cpu"
dataset = 'dataset.csv'

df = pd.read_csv(dataset, delimiter=",")
df.rename(columns={"Day": "day", "Exchange rate": "value"}, inplace=True)
df['day'] = pd.to_datetime(df['day'])
df


Unnamed: 0,day,value
0,2022-09-17,60.0316
1,2022-09-16,59.6663
2,2022-09-15,59.7751
3,2022-09-14,60.0676
4,2022-09-13,60.4568
...,...,...
6926,1992-07-15,130.2000
6927,1992-07-10,130.3000
6928,1992-07-08,130.5000
6929,1992-07-03,134.8000


Generating time-lagged observations

In [26]:
from torch.utils.data import TensorDataset, DataLoader
def generate_time_lags(df, n_lags):
    df_n = df.copy()
    for n in range(1, n_lags + 1):
        df_n[f"lag{n}"] = df_n["value"].shift(n)
    df_n = df_n.iloc[n_lags:]
    return df_n


test_df = df.set_index('day')
test_df.index = pd.to_datetime(test_df.index)
test_df['value'] = pd.to_numeric(test_df['value'], errors='coerce')
test_df = test_df.dropna()
df_timelags = generate_time_lags(test_df, 100)
df_timelags = df_timelags.dropna()
df_timelags

  df_n[f"lag{n}"] = df_n["value"].shift(n)


Unnamed: 0_level_0,value,lag1,lag2,lag3,lag4,lag5,lag6,lag7,lag8,lag9,...,lag91,lag92,lag93,lag94,lag95,lag96,lag97,lag98,lag99,lag100
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-04-23,73.5050,73.3611,72.7089,72.8764,72.2953,71.0237,69.4160,66.2378,67.3843,68.8389,...,60.9033,60.8544,61.1814,60.8010,60.4696,60.4568,60.0676,59.7751,59.6663,60.0316
2022-04-22,74.9990,73.5050,73.3611,72.7089,72.8764,72.2953,71.0237,69.4160,66.2378,67.3843,...,60.3713,60.9033,60.8544,61.1814,60.8010,60.4696,60.4568,60.0676,59.7751,59.6663
2022-04-21,77.0809,74.9990,73.5050,73.3611,72.7089,72.8764,72.2953,71.0237,69.4160,66.2378,...,60.2370,60.3713,60.9033,60.8544,61.1814,60.8010,60.4696,60.4568,60.0676,59.7751
2022-04-20,79.0287,77.0809,74.9990,73.5050,73.3611,72.7089,72.8764,72.2953,71.0237,69.4160,...,60.2386,60.2370,60.3713,60.9033,60.8544,61.1814,60.8010,60.4696,60.4568,60.0676
2022-04-19,79.4529,79.0287,77.0809,74.9990,73.5050,73.3611,72.7089,72.8764,72.2953,71.0237,...,60.3677,60.2386,60.2370,60.3713,60.9033,60.8544,61.1814,60.8010,60.4696,60.4568
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1992-07-15,130.2000,135.4000,151.1000,155.7000,161.1000,161.2000,161.4000,161.5000,161.7000,162.5000,...,1104.0000,1116.0000,1090.0000,1079.0000,1066.0000,1060.0000,1059.0000,1058.0000,1050.0000,1036.0000
1992-07-10,130.3000,130.2000,135.4000,151.1000,155.7000,161.1000,161.2000,161.4000,161.5000,161.7000,...,1072.0000,1104.0000,1116.0000,1090.0000,1079.0000,1066.0000,1060.0000,1059.0000,1058.0000,1050.0000
1992-07-08,130.5000,130.3000,130.2000,135.4000,151.1000,155.7000,161.1000,161.2000,161.4000,161.5000,...,1050.0000,1072.0000,1104.0000,1116.0000,1090.0000,1079.0000,1066.0000,1060.0000,1059.0000,1058.0000
1992-07-03,134.8000,130.5000,130.3000,130.2000,135.4000,151.1000,155.7000,161.1000,161.2000,161.4000,...,994.0000,1050.0000,1072.0000,1104.0000,1116.0000,1090.0000,1079.0000,1066.0000,1060.0000,1059.0000


Splitting the data into test, validation, and train sets

In [36]:
from sklearn.model_selection import train_test_split

def feature_label_split(df, target_col):
    y = df[[target_col]]
    X = df.drop(columns=[target_col])
    return X, y

def train_val_test_split(df, target_col, test_ratio):
    val_ratio = test_ratio / (1 - test_ratio)
    X, y = feature_label_split(df, target_col)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, shuffle=False)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_ratio, shuffle=False)
    return X_train, X_val, X_test, y_train, y_val, y_test

X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(df_timelags, 'value', 0.2)

Applying scale transformation

In [37]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler, RobustScaler

def get_scaler(scaler):
    scalers = {
        "minmax": MinMaxScaler,
        "standard": StandardScaler,
        "maxabs": MaxAbsScaler,
        "robust": RobustScaler,
    }
    return scalers.get(scaler.lower())()

In [38]:
scaler = get_scaler('minmax')
X_train_arr = scaler.fit_transform(X_train)
X_val_arr = scaler.transform(X_val)
X_test_arr = scaler.transform(X_test)

y_train_arr = scaler.fit_transform(y_train)
y_val_arr = scaler.transform(y_val)
y_test_arr = scaler.transform(y_test)

Loading the data into DataLoaders

In [41]:
from torch.utils.data import TensorDataset, DataLoader

batch_size = 64

train_features = torch.Tensor(X_train_arr)
train_targets = torch.Tensor(y_train_arr)
val_features = torch.Tensor(X_val_arr)
val_targets = torch.Tensor(y_val_arr)
test_features = torch.Tensor(X_test_arr)
test_targets = torch.Tensor(y_test_arr)

train = TensorDataset(train_features, train_targets)
val = TensorDataset(val_features, val_targets)
test = TensorDataset(test_features, test_targets)

train_loader = DataLoader(train, batch_size=batch_size, shuffle=False, drop_last=True)
val_loader = DataLoader(val, batch_size=batch_size, shuffle=False, drop_last=True)
test_loader = DataLoader(test, batch_size=batch_size, shuffle=False, drop_last=True)
test_loader_one = DataLoader(test, batch_size=1, shuffle=False, drop_last=True)

Vanilla RNN

In [42]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, dropout_prob):
        """The __init__ method that initiates an RNN instance.

        Args:
            input_dim (int): The number of nodes in the input layer
            hidden_dim (int): The number of nodes in each layer
            layer_dim (int): The number of layers in the network
            output_dim (int): The number of nodes in the output layer
            dropout_prob (float): The probability of nodes being dropped out

        """
        super(RNNModel, self).__init__()

        # Defining the number of layers and the nodes in each layer
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim

        # RNN layers
        self.rnn = nn.RNN(
            input_dim, hidden_dim, layer_dim, batch_first=True, dropout=dropout_prob
        )
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        """The forward method takes input tensor x and does forward propagation

        Args:
            x (torch.Tensor): The input tensor of the shape (batch size, sequence length, input_dim)

        Returns:
            torch.Tensor: The output tensor of the shape (batch size, output_dim)

        """
        # Initializing hidden state for first input with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()

        # Forward propagation by passing in the input and hidden state into the model
        out, h0 = self.rnn(x, h0.detach())

        # Reshaping the outputs in the shape of (batch_size, seq_length, hidden_size)
        # so that it can fit into the fully connected layer
        out = out[:, -1, :]

        # Convert the final state to our desired output shape (batch_size, output_dim)
        out = self.fc(out)
        return out
