<a href="https://colab.research.google.com/github/Yissan8/volatility/blob/main/Models%20spectification%20and%20training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

First, we create classes for the architectures of all our models, starting with the transformer architecture with its positional encoding function

In [None]:
import math
import numpy as np
import torch.nn as nn
from torch import nn, Tensor
import torch.nn.functional as F

def position_encoding_init(n_position, d_pos_vec):
    position_enc = np.array([
        [math.pi*(pos/(n_position-1)) for i in range(d_pos_vec)]
        if pos != 0 else np.zeros(d_pos_vec) for pos in range(n_position)])
    return np.cos(position_enc)


class customtransformer(nn.Module):

    def __init__(self,dim = 256, inp = 32,n_heads = 8,drop = 0.1,drop_att = 0.1):

        self.drop_att = drop_att
        self.d = drop
        self.n_heads = n_heads
        self.inp = inp
        self.dim = dim
        super().__init__()

        self.attention_query = nn.Linear(
            in_features=dim,
            out_features=dim
            )

        self.drop1 = nn.Dropout(p=self.d)
        self.drop2 = nn.Dropout(p=self.d)

        self.attention_value = nn.Linear(
            in_features=dim,
            out_features=dim
            )

        self.attention_key = nn.Linear(
            in_features=dim,
            out_features=dim
            )


        self.layernorm1 = nn.LayerNorm([inp, dim])
        self.layernorm2 = nn.LayerNorm([inp,1])
        self.layernorm3 = nn.LayerNorm(1)

        self.drop = nn.Dropout(self.d)


        self.linear_mapping_initial = nn.Linear(
            in_features=1,
            out_features=dim
            )

        self.linear_mapping1 = nn.Linear(
            in_features=dim,
            out_features=1
            )

        self.linear_mapping2 = nn.Linear(
            in_features=inp,
            out_features=1
            )


        self.r = nn.ReLU()
        self.r2 = nn.ReLU()
        self.sig = nn.Sigmoid()
        self.sig2 = nn.Sigmoid()

        self.ly1 = nn.LayerNorm([inp])
        self.ly2 = nn.LayerNorm([inp,dim])

        self.att = nn.MultiheadAttention(embed_dim = dim,num_heads=n_heads,dropout=self.drop_att)


    def forward(self, src: Tensor) -> Tensor:

        t = torch.Tensor(position_encoding_init(self.inp,1)).squeeze().to('cuda')
        src = src + t
        src_ = src# self.ly1(src)
        src = self.linear_mapping_initial(src.unsqueeze(1))#.unsqueeze(0)

        #src_a = torch.transpose()
        q = self.attention_query(src)
        k = self.attention_key(src)
        v = self.attention_value(src)



        attn_output, attn_output_weights = self.att(q,k,v)

        x = self.drop1(attn_output)#.squeeze()


        #x = self.layernorm1(src+x)
        x = src + x #self.ly2(x)
        x = self.linear_mapping1(x)
        x = self.sig(x)
        x = self.drop2(x)
        x = src_ + x.squeeze()
        x = self.linear_mapping2(x)
        #x = self.r(x)
        #x = self.layernorm3(x)
        #x = self.sig(x)
        return x

Now, we create a class and functions that will allow us to process a 1-D tensor inpput into a shape that is digestible by our transformer

In [None]:
import os
import torch
from torch.utils.data import Dataset
import numpy as np
from torch import nn, Tensor
from typing import Optional, Any, Union, Callable, Tuple
import pandas as pd
from pathlib import Path

class TransformerDataset(Dataset):
    def __init__(self,
        data: torch.tensor,
        indices: list,
        enc_seq_len: int,
        dec_seq_len: int,
        target_seq_len: int,
        wind,
        mean,
        std
        ) -> None:


        super().__init__()

        self.indices = indices

        self.data = data

        print("From get_src_trg: data size = {}".format(data.size()))

        self.enc_seq_len = enc_seq_len

        self.dec_seq_len = dec_seq_len

        self.target_seq_len = target_seq_len


        self.wind = wind
        self.mean = mean
        self.std = std

    def __len__(self):

        return len(self.indices)

    def __getitem__(self, index):
        start_idx = self.indices[index][0]
        end_idx = self.indices[index][1]

        sequence = self.data[start_idx:end_idx]

        src, trg, trg_y = self.get_src_trg(
            sequence=sequence,
            enc_seq_len=self.enc_seq_len,
            dec_seq_len=self.dec_seq_len,
            target_seq_len=self.target_seq_len,
            wind = self.wind,
            mean = self.mean,
            std = self.std
            )

        return src, trg, trg_y

    def get_src_trg(
        self,
        sequence: torch.Tensor,
        enc_seq_len: int,
        dec_seq_len: int,
        target_seq_len: int,
        wind,
        mean,
        std
        ) -> Tuple[torch.tensor, torch.tensor, torch.tensor]:

        src = (sequence[:enc_seq_len] - mean)/std

        trg = sequence[enc_seq_len-1:len(sequence)-1-wind]

        trg_y = sequence[-target_seq_len:]
        return src, trg, trg_y.squeeze(-1)

def generate_square_subsequent_mask(dim1: int, dim2: int) -> Tensor:

    return torch.triu(torch.ones(dim1, dim2) * float('-inf'), diagonal=1)


def get_indices_input_target(num_obs, input_len, step_size, forecast_horizon, target_len,wind:int):

        input_len = round(input_len) # just a precaution
        start_position = 0
        stop_position = num_obs # because of 0 indexing

        subseq_first_idx = start_position
        subseq_last_idx = start_position + input_len
        target_first_idx = subseq_last_idx + forecast_horizon
        target_last_idx = target_first_idx + target_len
        print("target_last_idx is {}".format(target_last_idx))
        print("stop_position is {}".format(stop_position))
        indices = []
        while target_last_idx <= stop_position:
            indices.append((subseq_first_idx, subseq_last_idx, target_first_idx, target_last_idx))
            subseq_first_idx += step_size
            subseq_last_idx += step_size
            target_first_idx = subseq_last_idx + forecast_horizon + wind
            target_last_idx = target_first_idx + target_len + wind

        return indices

def get_indices_entire_sequence(data: pd.DataFrame, window_size: int, step_size: int,wind:int) -> list:

        stop_position = len(data) # 1- because of 0 indexing

        # Start the first sub-sequence at index position 0
        subseq_first_idx = 0

        subseq_last_idx = window_size +wind

        indices = []

        while subseq_last_idx <= stop_position:

            indices.append((subseq_first_idx, subseq_last_idx))

            subseq_first_idx += step_size

            subseq_last_idx += step_size

        return indices


def read_data(data_path,
    timestamp_col_name: str="Date") -> pd.DataFrame:

    print("Reading file in {}".format(data_path))

    data = pd.read_csv(
        data_path,
        parse_dates=[timestamp_col_name],
        index_col=[timestamp_col_name],
        infer_datetime_format=True,
        low_memory=False
    )


    if is_ne_in_df(data):
        raise ValueError("data frame contains 'n/e' values. These must be handled")

    data = to_numeric_and_downcast_data(data)

    data.sort_values(by=[timestamp_col_name], inplace=True)

    return data

def is_ne_in_df(df:pd.DataFrame):
    for col in df.columns:

        true_bool = (df[col] == "n/e")

        if any(true_bool):
            return True

    return False


def to_numeric_and_downcast_data(df: pd.DataFrame):
    fcols = df.select_dtypes('float').columns

    icols = df.select_dtypes('integer').columns

    df[fcols] = df[fcols].apply(pd.to_numeric, downcast='float')

    df[icols] = df[icols].apply(pd.to_numeric, downcast='integer')

    return df

In [None]:
from torch.utils.data import DataLoader
import torch
import datetime
import numpy as np

def trans_data(data,horizon,enc_seq_len = 30,output_sequence_length = 1,step_size=1,dec_seq_len = 1):
  data = data.iloc[:,0]
  mean = data.mean()
  std = data.std()
  window_size = enc_seq_len + output_sequence_length
  training_data = pd.DataFrame(data[:round(0.85*len(data))])

  validation_data = pd.DataFrame(data[round(0.85*len(data)):])

  dates = validation_data.index


  training_indices = get_indices_entire_sequence(
      data=training_data,
      window_size=window_size,
      step_size=step_size,  wind = horizon -1)

  validation_indices = get_indices_entire_sequence(
      data=validation_data,
      window_size=window_size,
      step_size=step_size, wind = horizon - 1)


  training_data = TransformerDataset(
      data=torch.tensor(training_data.iloc[:,0].values).float(),
      indices=training_indices,
      enc_seq_len=enc_seq_len,
      dec_seq_len=dec_seq_len,
      target_seq_len=output_sequence_length,
      mean = mean,
      std = std,
      wind = horizon - 1
      )

  validation_data = TransformerDataset(
      data=torch.tensor(validation_data.iloc[:,0].values).float(),
      indices=validation_indices,
      enc_seq_len=enc_seq_len,
      dec_seq_len=dec_seq_len,
      target_seq_len=output_sequence_length,
      mean = mean,
      std = std,
      wind = horizon - 1
      )

  training = []
  for j in training_data:
    r,t,y = (enumerate(j))
    d = [r,y]
    training.append(d)

  validation = []

  for j in validation_data:
    r,t,y = (enumerate(j))
    d = [r,y]
    validation.append(d)

  return training,validation

Here we create a funtion to process the 1-D timeseries into a digestible format for the LSTM

In [None]:
def create_dataset(dataset, lookback,ahead):
    X, y, ind = [], [], []
    for i in range(lookback,len(dataset)-ahead):
        feature = dataset[i-lookback:i]
        target = dataset[i+ahead]
        X.append(feature)
        y.append(target)
        #ind.append(dataset.index[i+ahead])
    return torch.tensor(X), torch.tensor(y)#,dataset[ahead:]

Here we create the function to process a 1-D timeseries into a digestible format for the Boosting models

In [None]:
from pandas import DataFrame
from pandas import concat

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_out = n_out+1
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols, names = list(), list()
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

def create_sequence(dataset, lookback,ahead):
	X = []
	mean = dataset.mean()
	std = dataset.std()
	dataset = (dataset -mean) /std
	for i in range(lookback,len(dataset)-ahead):
			feature = torch.from_numpy(np.array(dataset[i-lookback:i]))
			X.append(feature)
	return X

Here, we create the function for importing historical daily prices for any stock ticker and converting them into returns and volatiliity time series

In [None]:
import yfinance as yf
import numpy as np
import pandas as pd

def ds(Ticker,output='vol'):
  SP = yf.Ticker(Ticker)
  SP = SP.history(start='1989-12-22').reset_index()
  SP['SP_returns'] = np.log(SP['Close']/SP['Close'].shift(1))
  SP['Volatility'] = SP['SP_returns'].rolling(5).std()
  SP['Date'] = pd.to_datetime(SP['Date']).dt.date
  if output == 'vol':
    dataset = SP[['Date','Volatility']].dropna().set_index('Date')
    dataset.columns = [Ticker + '_volatility']
  elif output == 'return':
    dataset = SP[['Date','SP_returns']].dropna().set_index('Date')
    dataset.columns = [Ticker + '_returns']
  return dataset



SP = ds("^GSPC")
NQ = ds("^IXIC")
RTY = ds("^RUT")
COM = ds("^DJCI")


Here we create the functions to evaluate and train our transformer

In [None]:
import time
def train(model: nn.Module,criterion,optimizer,training) -> None:
    model.train()  # turn on train mode
    total_loss = 0.
    y = torch.Tensor().cuda()
    out = torch.Tensor().cuda()

    log_interval = 200
    start_time = time.time()
    for i in range(len(training)):
      src = training[i][0][1]
      src = src.cuda()
      trg_y = training[i][1][1]
      trg_y = trg_y.cuda()
      output = model(
      src=src
      ).cuda()

      y = torch.cat((y.cpu(),trg_y.unsqueeze(0).cpu()),0).cuda()
      out = torch.cat((out.cuda(),output),0)




      if i%256 == 0 and i != 0:
        total_loss = criterion(y,out) #+ criterion2(y,out)

        optimizer.zero_grad()
        total_loss.backward()
        #torch.nn.utils.clip_grad_norm_(model.parameters(), 40)
        optimizer.step()
        y = torch.Tensor()
        out = torch.Tensor()


def evaluate(model: nn.Module,criterion,validation) :
    model.eval()
    total_loss = 0.

    with torch.no_grad():
      y = torch.Tensor()
      out = torch.Tensor()
      for i in range(len(validation)):
        src = validation[i][0][1].cuda()
        trg_y = validation[i][1][1].cuda()
        output = model(
        src=src).cuda()
        y = torch.cat((y.cpu(),trg_y.unsqueeze(0).cpu()),0).cuda()
        out = torch.cat((out.cuda(),output),0).cuda()

      total_loss = criterion(y,out)

      print(f"validation_loss: {total_loss:>7f},   batch n : {i:>7f}")
    return total_loss

Here is our LSTM model class

In [None]:
class LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
        super(LSTM, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state
        self.seq_length = seq_length #sequence length

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) #lstm
        self.fc_1 =  nn.Linear(hidden_size, 128) #fully connected 1
        self.fc = nn.Linear(128, num_classes) #fully connected last layer

        self.relu = nn.ReLU()

    def forward(self,x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
        # Propagate input through LSTM
        output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
        hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
        out = self.relu(hn)
        out = self.fc_1(out) #first Dense
        out = self.relu(out) #relu
        out = self.fc(out) #Final Output
        return out


In [None]:
!pip install ngboost

Here is the single function used to train all 4 of our models. The function provides us with the flexibility to directly specify the ticker of the asset we want to train our models on as weel as the desired forecasting horizon

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from ngboost import NGBRegressor,NGBSurvival
from sklearn.neural_network import MLPRegressor
import xgboost as xgb
from sklearn.preprocessing import StandardScaler, MinMaxScaler



def train_models(n,ticker='^GSPC',epochs=30,l_epochs=30):
  data = ds(ticker)
  cutoff = round(0.85*len(data))
  training,validation = trans_data(data,n)
  trans = customtransformer(dim = 128,inp = 30,n_heads = 8)
  trans.cuda()
  criterion = nn.L1Loss()
  lr = 0.05
  optimizer = torch.optim.Adam(trans.parameters(), lr=lr, weight_decay=1e-5)
  scheduler = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.01, total_iters=90,verbose=True)
  best_val_loss = 100000


  for epoch in range(epochs + 1):
    if epoch!=0:
      trans.load_state_dict(torch.load('/content/transformer_params'))
    epoch_start_time = time.time()
    train(trans,criterion,optimizer,training)
    val_loss = evaluate(trans,criterion, validation )
    elapsed = time.time() - epoch_start_time
    if val_loss < best_val_loss:
      best_val_loss = val_loss
      torch.save(trans.state_dict(), '/content/transformer_params')

    scheduler.step()
    print(f"epoch n:  {epoch}")

  ss = StandardScaler()
  train_l = np.array(data.iloc[:cutoff,0].values.astype('float32'))
  train_l = ss.fit_transform(train_l.reshape(len(train_l),1))
  X_train, y_train = create_dataset(train_l,30,h - 1)
  y_train_tensors = Variable(torch.Tensor(y_train))

  lstm = LSTM(1, 1, 4, 1, 30)

  lr = 0.1
  criterion = nn.L1Loss()
  optimizer = torch.optim.Adam(lstm.parameters(), lr=lr, weight_decay=1e-5)
  scheduler = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.0001, total_iters=1750,verbose=True)
  best_val_loss = 100000
  for epoch in range(l_epochs):
    if epoch!=0:
      lstm.load_state_dict(torch.load('/content/lstm_params'))
    outputs = lstm.forward(X_train)
    optimizer.zero_grad()
    loss = criterion(outputs, y_train_tensors)
    loss.backward()
    if loss < best_val_loss:
      best_val_loss = loss
      torch.save(lstm.state_dict(), '/content/lstm_params')
    optimizer.step()
    scheduler.step()
    print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))


  boost = series_to_supervised(data,10,n)
  ng = NGBRegressor().fit(boost.iloc[:cutoff,:10], boost.iloc[:cutoff,-1])
  xg = xgb.XGBRegressor(eval_metric='rmsle').fit(boost.iloc[:cutoff,:10], boost.iloc[:cutoff,-1])

  #arch = arch_model(100*data,mean = 'AR',lags=1,dist='ged')
  #arch.volatility = EGARCH(p=1, o=1, q=1)
  #arch = arch.fit(last_obs=cutoff)

  return xg,ng,lstm,trans#,arch

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import joblib
folder_path = '/content/drive/MyDrive/Colab Notebooks/model_parameters'
assets = ["^GSPC", "^IXIC", "^RUT","GC=F","CL=F"]
#assets = ["^RUT"]
for asset in assets:
  for h in  [5,10,15,20,60]:25
    mods = train_models(h,ticker = asset, epochs=125,l_epochs=2000)
    joblib.dump(mods[0], folder_path + '/xgb_' + asset + '_' + str(h) + '_model')
    joblib.dump(mods[1], folder_path + '/ngb_' + asset + '_' + str(h) + '_model')
    joblib.dump(mods[2], folder_path + '/lstm_' + asset + '_' + str(h) + '_model')
    joblib.dump(mods[3], folder_path + '/transformer_' + asset + '_' + str(h) + '_model')

In [None]:
class ensemble(nn.Module):

    def __init__(self,feat):

        self.feat = feat
        super().__init__()

        self.map = nn.Linear(
            in_features=feat,
            out_features=1
            )

    def forward(self,x):
        return self.map(x)

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.autograd import Variable
def predict(models,x:pd.DataFrame,n=20):
  g_data = series_to_supervised(pd.DataFrame(x),10,-1)
  ng_res = pd.DataFrame(models[0].predict(g_data))
  xg_res = pd.DataFrame(models[1].predict(g_data))
  trans_data = create_sequence(x.reset_index(drop=True),30,n-1)
  trans_res = []
  for i in range(len(trans_data)):
    trans_res.append(models[2](trans_data[i].squeeze().float().cuda()).squeeze(0).item())
  trans_res= pd.DataFrame(trans_res)

  #split_date
  forecast = models[4].forecast(start=0,horizon = n,method='simulation')

  G_forecast = forecast.variance/100
  cutoff = round(len(x)*0.85)
  #x = x.iloc[cutoff:]
  G = pd.DataFrame(G_forecast.iloc[:-n,-1])
  #G.index=dataset.index[cutoff+3:]
  ss = StandardScaler()
  mm = MinMaxScaler()

  l_data = ss.fit_transform(np.array(x).reshape(len(x),1))
  l_data, _ = create_dataset(l_data,30,n-1)

  l_res = models[3](l_data.float()).detach().numpy()
  #l_res = pd.DataFrame(l_res)
  l_res = pd.DataFrame(ss.inverse_transform(l_res))
  l_res.index = x.index[30+n-1:]

  trans_res.index = x.index[30+n-1:]
  ng_res.index = x.index[10:]
  xg_res.index = x.index[10:]
  ds = ng_res.merge(xg_res,left_index=True,right_index=True).merge(l_res,left_index=True,right_index=True).merge(trans_res,left_index=True,right_index=True).merge(G,left_index=True,right_index=True).merge(x,left_index=True,right_index=True)
  ds.columns = ['NgBoost','XgBoost','LSTM','Transformer','GARCH','actual']
  return ds

#ds = predict([ng,regressor,trans,lstm,arch],data,n=20)

In [None]:
def train_ensemble(mod: nn.Module,optimizer,df) -> None:
    mod.train()  # turn on train mode
    total_loss = 0.
    y = torch.Tensor()
    out = torch.Tensor()
    criterion = nn.L1Loss()
    log_interval = 200
    #start_time = time.time()
    for i in range(len(df)):
      src = torch.Tensor(df.iloc[i,:-1])
      trg_y = torch.Tensor([df.iloc[i,-1]])
      output = mod(src)

      y = torch.cat([y,trg_y])
      out = torch.cat([out,output])



      total_loss = criterion(y,out)

      optimizer.zero_grad()
      total_loss.backward()

      optimizer.step()
      y = torch.Tensor()
      out = torch.Tensor()


In [None]:
!pip install arch
from arch.univariate import GARCH, ARCH, FIGARCH,EGARCH,APARCH, EWMAVariance
from arch import arch_model

In [None]:
import joblib
def horizon_result(n,ticker='^GSPC',epochs=30):
  data = ds(ticker)#['Volatility']
  g_data = ds(ticker, 'return')
  cutoff = round(0.85*len(data))
  cutoff1 = round(0.35*len(data))
  folder_path = '/content/drive/MyDrive/Colab Notebooks/model_parameters'
  xg_path = folder_path + '/xgb_' + ticker + '_' + str(n) + '_model'
  ng_path =  folder_path + '/ngb_' + ticker + '_' + str(n) + '_model'
  lstm_path = folder_path + '/lstm_' + ticker + '_' + str(n) + '_model'
  tr_path =  folder_path + '/transformer_' + ticker + '_' + str(n) + '_model'

  arch = arch_model(100*data,mean = 'AR',lags=1,dist='ged')
  arch.volatility = EGARCH(p=1, o=1, q=1)
  arch = arch.fit(last_obs=cutoff)

  ng = joblib.load(ng_path)
  xg = joblib.load(xg_path)
  lstm = joblib.load(lstm_path)
  trans = joblib.load(tr_path)

  td = predict([ng,xg,trans,lstm,arch],data,n=n)
  ens = ensemble(5)
  ens_s = ensemble(4)
  criterion = nn.L1Loss()
  lr = 0.01
  optimizer = torch.optim.Adam(ens.parameters(), lr=lr, weight_decay=1e-5)
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.4)
  for epoch in range(50):
    train_ensemble(ens,optimizer,td[round(0.35*len(td)):round(0.85*len(td))])
    train_ensemble(ens_s,optimizer,td[['NgBoost','XgBoost','LSTM','Transformer','actual']][round(0.35*len(td)):round(0.85*len(td))])
    scheduler.step()
    print(f"epoch n:  {epoch}")

  Ens_pred = []
  test = td[round(0.85*len(td)):]
  for i in range(len(test)):
    src = torch.Tensor(test.iloc[i,:-1])
    X = ens(src)
    Ens_pred.append(X.squeeze(0).item())

  Ens_pred = pd.DataFrame(Ens_pred)
  Ens_pred.index = test.index
  Ens_pred.columns = ['X-N-L-T GARCH']

  XG_GARCH = ensemble(2)
  NG_GARCH = ensemble(2)
  LSTM_GARCH = ensemble(2)
  TRANS_GARCH = ensemble(2)

  XG_optimizer = torch.optim.Adam(XG_GARCH.parameters(), lr=lr, weight_decay=1e-5)
  XG_scheduler = torch.optim.lr_scheduler.StepLR(XG_optimizer, 1.0, gamma=0.4)

  NG_optimizer = torch.optim.Adam(NG_GARCH.parameters(), lr=lr, weight_decay=1e-5)
  NG_scheduler = torch.optim.lr_scheduler.StepLR(NG_optimizer, 1.0, gamma=0.4)


  LSTM_optimizer = torch.optim.Adam(LSTM_GARCH.parameters(), lr=lr, weight_decay=1e-5)
  LSTM_scheduler = torch.optim.lr_scheduler.StepLR(LSTM_optimizer, 1.0, gamma=0.4)

  TRANS_optimizer = torch.optim.Adam(TRANS_GARCH.parameters(), lr=lr, weight_decay=1e-5)
  TRANS_scheduler = torch.optim.lr_scheduler.StepLR(TRANS_optimizer, 1.0, gamma=0.4)


  for epoch in range(epochs):
    train_ensemble(XG_GARCH,XG_optimizer,td[['XgBoost','GARCH','actual']][round(0.35*len(td)):round(0.85*len(td))])
    train_ensemble(NG_GARCH,NG_optimizer,td[['NgBoost','GARCH','actual']][round(0.35*len(td)):round(0.85*len(td))])
    train_ensemble(LSTM_GARCH,LSTM_optimizer,td[['LSTM','GARCH','actual']][round(0.35*len(td)):round(0.85*len(td))])
    train_ensemble(TRANS_GARCH,TRANS_optimizer,td[['Transformer','GARCH','actual']][round(0.35*len(td)):round(0.85*len(td))])
    XG_scheduler.step()
    NG_scheduler.step()
    LSTM_scheduler.step()
    TRANS_scheduler.step()
    print(f"epoch n:  {epoch}")

  G_ens = []
  test = td[round(0.85*len(td)):]
  for i in range(len(test)):
    y1 = XG_GARCH(torch.Tensor(test[['XgBoost','GARCH']].iloc[i])).squeeze(0).item()
    y2 = NG_GARCH(torch.Tensor(test[['NgBoost','GARCH']].iloc[i])).squeeze(0).item()
    y3 = LSTM_GARCH(torch.Tensor(test[['LSTM','GARCH']].iloc[i])).squeeze(0).item()
    y4 = TRANS_GARCH(torch.Tensor(test[['Transformer','GARCH']].iloc[i])).squeeze(0).item()
    G_ens.append([y1,y2,y3,y4])

  G_ens = pd.DataFrame(G_ens)
  G_ens.columns = ['XG GARCH','NG GARCH','LSTM GARCH','TRANS GARCH']
  G_ens.index = test.index

  final = td.merge(G_ens,left_index=True,right_index=True).merge(Ens_pred,left_index=True,right_index=True)
  final.columns = [i + '_' + ticker[1:] + '_' + str(n) for i in final.columns]

  return [XG_GARCH,NG_GARCH,LSTM_GARCH,TRANS_GARCH,ens_s,ens]

In [None]:
import joblib
folder_path = '/content/drive/MyDrive/Colab Notebooks/all_model_parameters'
assets = ["^RUT","GC=F","CL=F"]
for asset in assets:
  for h in [60]:
    mods = horizon_result(h,asset,15)

    joblib.dump(mods[0], folder_path + '/xgb_EGARCH_' + asset + '_' + str(h) + '_model')
    joblib.dump(mods[1], folder_path + '/ngb_EGARCH_' + asset + '_' + str(h) + '_model')
    joblib.dump(mods[2], folder_path + '/lstm_EGARCH_' + asset + '_' + str(h) + '_model')
    joblib.dump(mods[3], folder_path + '/transformer_EGARCH_' + asset + '_' + str(h) + '_model')
    joblib.dump(mods[4], folder_path + '/X-N-L-T_' + asset + '_' + str(h) + '_model')
    joblib.dump(mods[5], folder_path + '/X-N-L-T_EGARCH_' + asset + '_' + str(h) + '_model')
