# INSTALLATIONS DATA ACQUISITION AND IMPORTS

In [None]:
! pip install -q kaggle
! kaggle datasets download -qd swaptr/bitcoin-historical-data
! mkdir btc && unzip -q bitcoin-historical-data.zip -d btc

In [None]:
! pip install -q neuralforecast statsforecast datasetsforecast

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt

from utilsforecast.losses import smape, rmse, mae, mse
from utilsforecast.evaluation import evaluate
from statsforecast import StatsForecast

from neuralforecast import NeuralForecast
from neuralforecast.auto import TFT, LSTM, NBEATS, BiTCN, TimesNet, DeepAR
from neuralforecast.losses.pytorch import MAE

from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler

from ray import tune
import optuna


import time

from google.colab import drive

In [None]:
# we store data on gdrive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data = pd.read_csv('btc/data.csv')

invalid_date = '2017-01-01 00:00:00'
index = data.index[data['Date'] == invalid_date].tolist()

df = data.truncate(after=index[0])

df = df[['Date', 'Close']]

df = df.sort_values("Date", ascending=True)

df['Date'] = pd.to_datetime(df['Date'])

scaler = StandardScaler()

train_size = 0.90
split_index = int(df.shape[0] * train_size)

df['Close'] = scaler.fit_transform(df[['Close']])

aa = len(df) - split_index
test = df.tail(aa)
train = df.head(split_index)

horizon = 5

train.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True)
test.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True)

train['unique_id'] = 1
test['unique_id'] = 1



os.environ['NIXTLA_ID_AS_COL'] = '1'

StatsForecast.plot(test, engine='matplotlib')

# MODELS INITIALIZATION

In [None]:
# done
lstm = LSTM(h=horizon, input_size=25, inference_input_size=-5,
       encoder_n_layers=1, encoder_hidden_size=50,
       context_size=5, decoder_hidden_size=64,
       loss=MAE(), valid_loss=None,
       max_steps=2000, learning_rate=0.00019058032335399208,
       batch_size=16,
       scaler_type='robust', random_seed=1
)

# done
bitcn = BiTCN(h=horizon, input_size=25, hidden_size=32, dropout= 0.12848839287344987,
        loss=MAE(), max_steps=2000, learning_rate=0.005652625486826798,
        batch_size=256,
        windows_batch_size=512, inference_windows_batch_size=512,
        step_size=5, scaler_type='standard', random_seed=18
)

#done
nbeats = NBEATS(h=horizon, input_size=15, loss=MAE(), valid_loss=None,
         max_steps=1000, learning_rate=0.007920715236497127,
         batch_size=32, windows_batch_size=256,
         step_size=5, scaler_type='standard', random_seed=1,
)

#done
tft = TFT(h=horizon, input_size=15, hidden_size=64, loss=MAE(),
      max_steps=1000, learning_rate=0.00538096845409797,
      batch_size=32,
      windows_batch_size=256, start_padding_enabled=False,
      step_size=5, scaler_type='robust', random_seed=12
)


In [None]:
# training without hyperparameters optimization is less costly so we can train all models at once

models = [
    bitcn, nbeats, tft, lstm
    ]

In [None]:
# build project files structure:
#    -content
#      -models
#        -data
#        -plots

pwd = '/content/models'
models_plots = os.path.join(pwd, 'plots')
models_data = os.path.join(pwd, 'data')

os.mkdir(pwd)
os.mkdir(models_plots)
os.mkdir(models_data)

for model in models:
    path = os.path.join(models_plots, str(model))
    os.mkdir(path)

In [None]:
# training with cross validation

torch.cuda.empty_cache()

nf = NeuralForecast(models=models, freq='T')

cv_df = nf.cross_validation(train, n_windows=10)

# or
# nf.fit(train, val_size=2*horizon)

nf.save(models_data, overwrite=True, save_dataset=False)

In [None]:
# save trained models to gdrive

!cp /content/models/data/* /content/drive/MyDrive/models/data/

In [None]:
# perform post training evaluation

df_path = f'{pwd}/evaluation.csv'

evaluation_df = evaluate(cv_df.loc[:, cv_df.columns != 'cutoff'], metrics=[rmse, mae, mse])
evaluation_df['best_model'] = evaluation_df.drop(columns=['metric', 'unique_id']).idxmin(axis=1)
print(evaluation_df)

evaluation_df.to_csv(df_path)

summary_df = evaluation_df.groupby(['metric', 'best_model']).size().sort_values().to_frame()
summary_df = summary_df.reset_index()
summary_df.columns = ['metric', 'model', 'num. of unique_ids']
print(summary_df)

In [None]:
# save trained models to gdrive

!cp /content/models/plots/* /content/drive/MyDrive/models/plots/