In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import numpy as np
import torch
import tensorflow as tf

mpl.rcParams['figure.figsize'] = (16, 40)
mpl.rcParams['axes.grid'] = False

In [None]:
df = pd.read_csv('full.csv', index_col=0)
df.head()

In [None]:
df.plot(subplots=True)
plt.show()

In [None]:
class TimeSeriesDataset(torch.utils.data.Dataset):
    
    def __init__(self, df, input_cols, target_col, input_horizon, output_horizon):
        total_horizon = input_horizon + output_horizon
        list_features = []
        for col in input_cols:
            if col not in df.columns:
                print(f"Column {col} is not found, skipping.")
                continue
            series = df[col]
            merged_series = pd.concat([series.diff(periods=i) for i in range(total_horizon)], axis=1).dropna()
            inp = np.expand_dims(merged_series.values[:, :input_horizon], 1)
            list_features.append(inp)
            if col == target_col:
                self.tar = merged_series.values[:, -output_horizon:]
                print(self.tar.shape)
        self.inp = np.concatenate(list_features, axis=1)
        print(self.inp.shape)
        
    def __len__(self):
        return len(self.inp)

    def __getitem__(self, idx):
        inp = torch.from_numpy(self.inp[idx]).float()
        tar = torch.from_numpy(self.tar[idx]).float()
        return inp, tar

In [None]:
input_horizon = 24
output_horizon = 48
# Randomly choose 3 features with the original one
input_cols=['Lộ 173 | P', 'Lộ 173 | Q']
target_col='Lộ 173 | P'
n_channels = len(input_cols)

single_dataset = TimeSeriesDataset(
    df,
    input_cols=['Lộ 173 | P', 'Lộ 173 | Q'],
    target_col='Lộ 173 | P',
    input_horizon=input_horizon,
    output_horizon=output_horizon
)
train_size = int(0.8 * len(single_dataset))
test_size = len(single_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(
    single_dataset,
    [train_size, test_size]
)
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True
)
test_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=True
)

In [None]:
def get_historical_attributes(series, input_timesteps, output_timesteps):
    df = pd.DataFrame()
    n_features = input_timesteps + output_timesteps
    for i in range(n_features):
        df[f'lag-{i}'] = series.shift(i)
    df.dropna(inplace=True)
    past_df = df.iloc[:, -input_timesteps:]
    future_df = df.iloc[:, :output_timesteps]
    return past_df, future_df

In [None]:
INPUT_TIMESTEPS = 7
OUTPUT_TIMESTEPS = 3
past_df, future_df = get_historical_attributes(df.iloc[:, 0], INPUT_TIMESTEPS, OUTPUT_TIMESTEPS)

In [None]:
df.iloc[:14, 0]

In [None]:
past_df, future_df = get_historical_attributes(df.iloc[:, 0], INPUT_TIMESTEPS, OUTPUT_TIMESTEPS)
past_df.head()

In [None]:
future_df.head()

In [None]:
class DilatedCNN(torch.nn.Module):
    
    def __init__(self, input_horizon, output_horizon):
        super(DilatedCNN, self).__init__()
        self.conv = torch.nn.Sequential(
            torch.nn.Conv1d(
                in_channels=n_channels,
                out_channels=n_channels * 2,
                kernel_size=2,
                dilation=2,
                padding=1
            ),
            torch.nn.ReLU(),
            torch.nn.Conv1d(
                in_channels=n_channels * 2,
                out_channels=n_channels * 4,
                kernel_size=2,
                dilation=2,
                padding=1
            ),
            torch.nn.ReLU(),
            torch.nn.Conv1d(
                in_channels=n_channels * 4,
                out_channels=n_channels * 8,
                kernel_size=2,
                dilation=2,
                padding=1
            ),
            torch.nn.ReLU()
        )
        self.flatten = torch.nn.Flatten()
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(n_channels * 8 * input_horizon, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, output_horizon)
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

In [None]:
from tqdm import tqdm

eval_dict = {}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def eval_model_multi(model_cls: torch.nn.Module, model_name, epochs=5, **kwargs):
    model = model_cls(**kwargs)
    model.to(device)
    if torch.cuda.device_count() > 0:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = torch.nn.DataParallel(model)

    loss_fn = torch.nn.MSELoss()
    metr = torch.nn.L1Loss()
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=0.001
    )

    model.train()
    for epoch in range(epochs):
        print("Epoch {}/{}".format(epoch+1, epochs))
        loss_records = []
        for sample in tqdm(train_dataloader):
            model.zero_grad()

            inp, tar = sample
            inp, tar = inp.to(device), tar.to(device)
            pred = model.forward(inp)

            loss = loss_fn(pred, tar)
            loss.backward()
            optimizer.step()

            loss_records.append(loss.data.cpu().numpy())

            inp, tar = inp.to("cpu"), tar.to("cpu")

        print("loss: {}".format(sum(loss_records) / len(loss_records)))

    model.eval()
    mae_records = []
    mse_records = []
    for sample in tqdm(test_dataloader):
        model.zero_grad()
        inp, tar = sample
        inp, tar = inp.to(device), tar.to(device)
        pred = model.forward(inp)

        mse = loss_fn(pred, tar)
        mae = metr(pred, tar)

        mse_records.append(mse.data.cpu().numpy())
        mae_records.append(mae.data.cpu().numpy())

        inp, tar = inp.to("cpu"), tar.to("cpu")

    eval_dict[model_name] = dict()
    eval_dict[model_name]['MAE'] = sum(mae_records) / len(mae_records)
    eval_dict[model_name]['MSE'] = sum(mse_records) / len(mse_records)

    model.to("cpu")
    print(eval_dict)
    print('Done')
    return model

In [None]:
model = eval_model_multi(DilatedCNN, 'Dilated CNN for alpha forecasting', epochs=10, input_horizon=input_horizon, output_horizon=output_horizon)

In [None]:
dl_iteration = iter(test_dataloader)

In [None]:
sample = next(dl_iteration)

In [None]:
inp, tar = sample
pred = model.forward(inp)

In [None]:
for i, t, p in zip(inp, tar, pred):
    df_1 = pd.DataFrame({
        'true value': i.data.cpu().numpy()[0]
    })
    df_2 = pd.DataFrame({
        'true value': t.data.cpu().numpy(),
        'predicted value': p.data.cpu().numpy()
    })
    df_merged = pd.concat([df_1, df_2], ignore_index=True)
    df_merged.plot(figsize=(20, 5), style='.-')