# Libraries and Drive

In [None]:
import pandas as pd
import numpy as np
from google.colab import drive
drive.mount("/content/drive", force_remount=True)
import os
os.chdir('/content/drive/My Drive/Volatility_forecasting/')

In [None]:
!pip install neuralforecast dask[dataframe]
import os
os.environ['NIXTLA_ID_AS_COL'] = '1'

# Modelling original (20 years total, 14 train, 2 val, 4 test)



In [None]:
from neuralforecast.core import NeuralForecast
from ray import tune
from ray.tune.search.hyperopt import HyperOptSearch
from neuralforecast.losses.pytorch import DistributionLoss
from neuralforecast.auto import AutoTiDE

In [None]:
df = pd.read_csv('Data/S&P500/3ProSP500.csv')
df['ds'] = pd.to_datetime(df['ds'])
df = df.astype({col: 'int32' if dtype == 'int64' else 'float32' if dtype == 'float64' else dtype
                for col, dtype in df.dtypes.items()})
df = df.rename(columns={'840E': 'y'})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2008545 entries, 0 to 2008544
Data columns (total 21 columns):
 #   Column     Dtype         
---  ------     -----         
 0   unique_id  object        
 1   ds         datetime64[ns]
 2   y          float32       
 3   MA(2,9)    int32         
 4   MA(3,9)    int32         
 5   MA(1,12)   int32         
 6   MA(2,12)   int32         
 7   MA(3,12)   int32         
 8   MOM(9)     int32         
 9   MOM(12)    int32         
 10  RSI(7)     int32         
 11  RSI(14)    int32         
 12  EMA(3,9)   int32         
 13  EMA(5,9)   int32         
 14  EMA(5,12)  int32         
 15  DY         float32       
 16  PTBV       float32       
 17  P          float32       
 18  PO         float32       
 19  VO         float32       
 20  PE         float32       
dtypes: datetime64[ns](1), float32(7), int32(12), object(1)
memory usage: 176.2+ MB


In [None]:
start_2020_2023 = '2020-01-01'
end_2020_2023 = '2023-12-31'

start_2018_2019 = '2018-01-01'
end_2018_2019 = '2019-12-31'

# Filter the DataFrame for each time range and extract unique days
unique_days_2020_2023 = df[(df['ds'] >= start_2020_2023) & (df['ds'] <= end_2020_2023)]['ds'].dt.date.unique()
unique_days_2018_2019 = df[(df['ds'] >= start_2018_2019) & (df['ds'] <= end_2018_2019)]['ds'].dt.date.unique()

# Count the number of unique days in each range
count_unique_days_2020_2023 = len(unique_days_2020_2023)
count_unique_days_2018_2019 = len(unique_days_2018_2019)

# Create a summary DataFrame
summary_counts = pd.DataFrame({
    'Period': ['2020-2023', '2018-2019'],
    'Unique Days Covered': [count_unique_days_2020_2023, count_unique_days_2018_2019]
})
summary_counts

Unnamed: 0,Period,Unique Days Covered
0,2020-2023,1043
1,2018-2019,522


In [None]:
exog_list = list(df.columns)
exog_list.remove('ds')
exog_list.remove('y')
exog_list.remove('unique_id')

levels = [90]
val_size = count_unique_days_2018_2019
test_size = count_unique_days_2020_2023
horizons = [1, 5, 10, 20]

In [None]:
import os
from time import time

class AutoTiDETrainer:
    def __init__(self, horizons, levels, exog_list, df, val_size, test_size):
        self.horizons = horizons
        self.levels = levels
        self.exog_list = exog_list
        self.df = df
        self.val_size = val_size
        self.test_size = test_size

    def check_existing_files(self, horizon):
        """Checks if model and CSV already exist for a given horizon."""
        model_path = f'Trained Models/AutoTiDE/horizon_{horizon}/'
        output_csv = f'Data/Test/tide_model0_1_horizon_{horizon}.csv'
        return os.path.exists(model_path) and os.path.exists(output_csv)

    def save_results(self, nf, horizon, Y_hat_df):
        """Saves the trained model and prediction results."""
        model_path = f'Trained Models/AutoTiDE/horizon_{horizon}/'
        output_csv = f'Data/Test/horizon_{horizon}/tide_model0_1_horizon_{horizon}.csv'

        # Create model directory if it doesn't exist
        os.makedirs(model_path, exist_ok=True)

        # Save the model, predictions and hyperparameter search
        for idx, model in enumerate(nf.models):
          hpo = f'HPO/horizon_{horizon}/tide_model{idx}_horizon_{horizon}_hpo.csv'
          results = model.results.get_dataframe()
          results.to_csv(hpo, index=False)

        nf.save(path=model_path, model_index=None, overwrite=True, save_dataset=False)

        for col in Y_hat_df.select_dtypes(include='float32').columns:
          Y_hat_df[col] = Y_hat_df[col].astype('float16')

        Y_hat_df.to_csv(output_csv, index=False)

    def configure_models(self, horizon):
        """Configures two AutoTiDE models for the given horizon."""
        # Model 0 Configuration
        tide_config0 = AutoTiDE.get_default_config(h=horizon, backend="ray")
        tide_config0['random_seed'] = 42
        tide_config0['learning_rate'] = tune.choice([0.01, 0.005, 0.001, 0.0005, 0.0001, 0.0005, 0.0001, 0.00005, 0.00001])

        # Model 1 Configuration
        tide_config1 = AutoTiDE.get_default_config(h=horizon, backend="ray")
        tide_config1['hist_exog_list'] = self.exog_list
        tide_config1['random_seed'] = 42
        tide_config1['learning_rate'] = tune.choice([0.01, 0.005, 0.001, 0.0005, 0.0001, 0.0005, 0.0001, 0.00005, 0.00001])

        # Instantiate the models
        tide_model0 = AutoTiDE(h=horizon,
                                 config=tide_config0,
                                 search_alg=HyperOptSearch(), num_samples=30,
                                 backend='ray',
                                 loss=DistributionLoss(distribution='StudentT', level=self.levels),
                                 cpus = 12, gpus = 1)

        tide_model1 = AutoTiDE(h=horizon,
                                 config=tide_config1,
                                 search_alg=HyperOptSearch(), num_samples=30,
                                 backend='ray',
                                 loss=DistributionLoss(distribution='StudentT', level=self.levels),
                                 cpus = 12, gpus = 1)

        return tide_model0, tide_model1

    def run_training(self):
        """Runs the training loop over all horizons."""
        for horizon in self.horizons:
            if self.check_existing_files(horizon):
                print(f"Horizon {horizon}: Model and CSV already exist. Skipping this run.")
                continue

            # Configure the models
            tide_model0, tide_model1 = self.configure_models(horizon)

            # Start training and cross-validation
            init = time()
            nf = NeuralForecast(models=[tide_model0, tide_model1], freq='B')
            Y_hat_df = nf.cross_validation(df=self.df,
                                           val_size=self.val_size,
                                           test_size=self.test_size,
                                           n_windows=None)

            # Save results
            self.save_results(nf, horizon, Y_hat_df)

            # Log the time taken
            end = time()
            print(f'Horizon {horizon} CV Minutes: {(end - init) / 60}')

In [None]:
trainer = AutoTiDETrainer(horizons, levels, exog_list, df, val_size, test_size)
trainer.run_training()

Output hidden; open in https://colab.research.google.com to view.

# Modelling half (12 years total, 6 train, 2 val, 4 test)

In [None]:
from neuralforecast.core import NeuralForecast
from ray import tune
from ray.tune.search.hyperopt import HyperOptSearch
from neuralforecast.losses.pytorch import DistributionLoss
from neuralforecast.auto import AutoTiDE

In [None]:
df = pd.read_csv('Data/S&P500/3ProSP500.csv')
df['ds'] = pd.to_datetime(df['ds'])
df = df.astype({col: 'int32' if dtype == 'int64' else 'float32' if dtype == 'float64' else dtype
                for col, dtype in df.dtypes.items()})
df = df.rename(columns={'840E': 'y'})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2008545 entries, 0 to 2008544
Data columns (total 21 columns):
 #   Column     Dtype         
---  ------     -----         
 0   unique_id  object        
 1   ds         datetime64[ns]
 2   y          float32       
 3   MA(2,9)    int32         
 4   MA(3,9)    int32         
 5   MA(1,12)   int32         
 6   MA(2,12)   int32         
 7   MA(3,12)   int32         
 8   MOM(9)     int32         
 9   MOM(12)    int32         
 10  RSI(7)     int32         
 11  RSI(14)    int32         
 12  EMA(3,9)   int32         
 13  EMA(5,9)   int32         
 14  EMA(5,12)  int32         
 15  DY         float32       
 16  PTBV       float32       
 17  P          float32       
 18  PO         float32       
 19  VO         float32       
 20  PE         float32       
dtypes: datetime64[ns](1), float32(7), int32(12), object(1)
memory usage: 176.2+ MB


In [None]:
print(df['ds'].min().year, df['ds'].max().year)
print(df['ds'].max().year - df['ds'].min().year + 1)

2004 2023
20


In [None]:
# Calculate the starting year and add 8 years
starting_year = df['ds'].min().year + 8
# Create a new date range
new_start_date = pd.to_datetime(str(starting_year) + '-' + str(df['ds'].min().month) + '-' + str(df['ds'].min().day))
#Filter out the data based on the new starting year
df = df[df['ds'] >= new_start_date]
print(df['ds'].min().year, df['ds'].max().year)
print(df['ds'].max().year - df['ds'].min().year + 1)

2012 2023
12


In [None]:
start_2020_2023 = '2020-01-01'
end_2020_2023 = '2023-12-31'

start_2018_2019 = '2018-01-01'
end_2018_2019 = '2019-12-31'

# Filter the DataFrame for each time range and extract unique days
unique_days_2020_2023 = df[(df['ds'] >= start_2020_2023) & (df['ds'] <= end_2020_2023)]['ds'].dt.date.unique()
unique_days_2018_2019 = df[(df['ds'] >= start_2018_2019) & (df['ds'] <= end_2018_2019)]['ds'].dt.date.unique()

# Count the number of unique days in each range
count_unique_days_2020_2023 = len(unique_days_2020_2023)
count_unique_days_2018_2019 = len(unique_days_2018_2019)

# Create a summary DataFrame
summary_counts = pd.DataFrame({
    'Period': ['2020-2023', '2018-2019'],
    'Unique Days Covered': [count_unique_days_2020_2023, count_unique_days_2018_2019]
})
summary_counts

Unnamed: 0,Period,Unique Days Covered
0,2020-2023,1043
1,2018-2019,522


In [None]:
exog_list = list(df.columns)
exog_list.remove('ds')
exog_list.remove('y')
exog_list.remove('unique_id')

levels = [90]
val_size = count_unique_days_2018_2019
test_size = count_unique_days_2020_2023
horizons = [1, 5, 10, 20]

In [None]:
import os
from time import time

class AutoTiDETrainer:
    def __init__(self, horizons, levels, exog_list, df, val_size, test_size):
        self.horizons = horizons
        self.levels = levels
        self.exog_list = exog_list
        self.df = df
        self.val_size = val_size
        self.test_size = test_size

    def check_existing_files(self, horizon):
        """Checks if model and CSV already exist for a given horizon."""
        model_path = f'Trained Models/AutoTiDE/8TYhorizon_{horizon}/'
        output_csv = f'Data/Test/8TYtide_model0_1_horizon_{horizon}.csv'
        return os.path.exists(model_path) and os.path.exists(output_csv)

    def save_results(self, nf, horizon, Y_hat_df):
        """Saves the trained model and prediction results."""
        model_path = f'Trained Models/AutoTiDE/8TYhorizon_{horizon}/'
        output_csv = f'Data/Test/horizon_{horizon}/8TYtide_model0_1_horizon_{horizon}.csv'

        # Create model directory if it doesn't exist
        os.makedirs(model_path, exist_ok=True)

        # Save the model, predictions and hyperparameter search
        for idx, model in enumerate(nf.models):
          hpo = f'HPO/horizon_{horizon}/8TYtide_model{idx}_horizon_{horizon}_hpo.csv'
          results = model.results.get_dataframe()
          results.to_csv(hpo, index=False)

        nf.save(path=model_path, model_index=None, overwrite=True, save_dataset=False)

        for col in Y_hat_df.select_dtypes(include='float32').columns:
          Y_hat_df[col] = Y_hat_df[col].astype('float16')

        Y_hat_df.to_csv(output_csv, index=False)

    def configure_models(self, horizon):
        """Configures two AutoTiDE models for the given horizon."""
        # Model 0 Configuration
        tide_config0 = AutoTiDE.get_default_config(h=horizon, backend="ray")
        tide_config0['random_seed'] = 42
        tide_config0['learning_rate'] = tune.choice([0.01, 0.005, 0.001, 0.0005, 0.0001, 0.0005, 0.0001, 0.00005, 0.00001])

        # Model 1 Configuration
        tide_config1 = AutoTiDE.get_default_config(h=horizon, backend="ray")
        tide_config1['hist_exog_list'] = self.exog_list
        tide_config1['random_seed'] = 42
        tide_config1['learning_rate'] = tune.choice([0.01, 0.005, 0.001, 0.0005, 0.0001, 0.0005, 0.0001, 0.00005, 0.00001])

        # Instantiate the models
        tide_model0 = AutoTiDE(h=horizon,
                                 config=tide_config0,
                                 search_alg=HyperOptSearch(), num_samples=30,
                                 backend='ray',
                                 loss=DistributionLoss(distribution='StudentT', level=self.levels),
                                 cpus = 12, gpus = 1)

        tide_model1 = AutoTiDE(h=horizon,
                                 config=tide_config1,
                                 search_alg=HyperOptSearch(), num_samples=30,
                                 backend='ray',
                                 loss=DistributionLoss(distribution='StudentT', level=self.levels),
                                 cpus = 12, gpus = 1)

        return tide_model0, tide_model1

    def run_training(self):
        """Runs the training loop over all horizons."""
        for horizon in self.horizons:
            if self.check_existing_files(horizon):
                print(f"Horizon {horizon}: Model and CSV already exist. Skipping this run.")
                continue

            # Configure the models
            tide_model0, tide_model1 = self.configure_models(horizon)

            # Start training and cross-validation
            init = time()
            nf = NeuralForecast(models=[tide_model0, tide_model1], freq='B')
            Y_hat_df = nf.cross_validation(df=self.df,
                                           val_size=self.val_size,
                                           test_size=self.test_size,
                                           n_windows=None)

            # Save results
            self.save_results(nf, horizon, Y_hat_df)

            # Log the time taken
            end = time()
            print(f'Horizon {horizon} CV Minutes: {(end - init) / 60}')

In [None]:
trainer = AutoTiDETrainer(horizons, levels, exog_list, df, val_size, test_size)
trainer.run_training()

Output hidden; open in https://colab.research.google.com to view.

# Modelling Technical Indicators (20 years total, 14 train, 2 val, 4 test)

In [None]:
from neuralforecast.core import NeuralForecast
from ray import tune
from ray.tune.search.hyperopt import HyperOptSearch
from neuralforecast.losses.pytorch import DistributionLoss
from neuralforecast.auto import AutoTiDE

In [None]:
df = pd.read_csv('Data/S&P500/3ProSP500.csv')
df['ds'] = pd.to_datetime(df['ds'])
df = df.astype({col: 'int32' if dtype == 'int64' else 'float32' if dtype == 'float64' else dtype
                for col, dtype in df.dtypes.items()})
df = df.rename(columns={'840E': 'y'})

selected_columns = [
    'unique_id', 'ds', 'y', 'MA(2,9)', 'MA(3,9)', 'MA(1,12)',
    'MA(2,12)', 'MA(3,12)', 'MOM(9)', 'MOM(12)', 'RSI(7)',
    'RSI(14)', 'EMA(3,9)', 'EMA(5,9)', 'EMA(5,12)'
]
df = df[selected_columns]

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2008545 entries, 0 to 2008544
Data columns (total 15 columns):
 #   Column     Dtype         
---  ------     -----         
 0   unique_id  object        
 1   ds         datetime64[ns]
 2   y          float32       
 3   MA(2,9)    int32         
 4   MA(3,9)    int32         
 5   MA(1,12)   int32         
 6   MA(2,12)   int32         
 7   MA(3,12)   int32         
 8   MOM(9)     int32         
 9   MOM(12)    int32         
 10  RSI(7)     int32         
 11  RSI(14)    int32         
 12  EMA(3,9)   int32         
 13  EMA(5,9)   int32         
 14  EMA(5,12)  int32         
dtypes: datetime64[ns](1), float32(1), int32(12), object(1)
memory usage: 130.3+ MB


In [None]:
start_2020_2023 = '2020-01-01'
end_2020_2023 = '2023-12-31'

start_2018_2019 = '2018-01-01'
end_2018_2019 = '2019-12-31'

# Filter the DataFrame for each time range and extract unique days
unique_days_2020_2023 = df[(df['ds'] >= start_2020_2023) & (df['ds'] <= end_2020_2023)]['ds'].dt.date.unique()
unique_days_2018_2019 = df[(df['ds'] >= start_2018_2019) & (df['ds'] <= end_2018_2019)]['ds'].dt.date.unique()

# Count the number of unique days in each range
count_unique_days_2020_2023 = len(unique_days_2020_2023)
count_unique_days_2018_2019 = len(unique_days_2018_2019)

# Create a summary DataFrame
summary_counts = pd.DataFrame({
    'Period': ['2020-2023', '2018-2019'],
    'Unique Days Covered': [count_unique_days_2020_2023, count_unique_days_2018_2019]
})
summary_counts

Unnamed: 0,Period,Unique Days Covered
0,2020-2023,1043
1,2018-2019,522


In [None]:
exog_list = list(df.columns)
exog_list.remove('ds')
exog_list.remove('y')
exog_list.remove('unique_id')
exog_list

['MA(2,9)',
 'MA(3,9)',
 'MA(1,12)',
 'MA(2,12)',
 'MA(3,12)',
 'MOM(9)',
 'MOM(12)',
 'RSI(7)',
 'RSI(14)',
 'EMA(3,9)',
 'EMA(5,9)',
 'EMA(5,12)']

In [None]:
levels = [90]
val_size = count_unique_days_2018_2019
test_size = count_unique_days_2020_2023
horizons = [1, 5, 10, 20]

In [None]:
import os
from time import time

class AutoTiDETrainer:
    def __init__(self, horizons, levels, exog_list, df, val_size, test_size):
        self.horizons = horizons
        self.levels = levels
        self.exog_list = exog_list
        self.df = df
        self.val_size = val_size
        self.test_size = test_size

    def check_existing_files(self, horizon):
        """Checks if model and CSV already exist for a given horizon."""
        model_path = f'Trained Models/AutoTiDE/TECHhorizon_{horizon}/'
        output_csv = f'Data/Test/horizon_{horizon}/TECHtide_model1_horizon_{horizon}.csv'
        return os.path.exists(model_path) and os.path.exists(output_csv)

    def save_results(self, nf, horizon, Y_hat_df):
        """Saves the trained model and prediction results."""
        model_path = f'Trained Models/AutoTiDE/TECHhorizon_{horizon}/'
        output_csv = f'Data/Test/horizon_{horizon}/TECHtide_model1_horizon_{horizon}.csv'

        # Create model directory if it doesn't exist
        os.makedirs(model_path, exist_ok=True)

        # Save the model, predictions and hyperparameter search
        hpo = f'HPO/horizon_{horizon}/TECHtide_model1_horizon_{horizon}_hpo.csv'
        results = nf.models[0].results.get_dataframe()
        results.to_csv(hpo, index=False)

        nf.save(path=model_path, model_index=None, overwrite=True, save_dataset=False)

        for col in Y_hat_df.select_dtypes(include='float32').columns:
            Y_hat_df[col] = Y_hat_df[col].astype('float16')

        Y_hat_df.to_csv(output_csv, index=False)

    def configure_model(self, horizon):
        """Configures the AutoTiDE model for the given horizon."""
        tide_config = AutoTiDE.get_default_config(h=horizon, backend="ray")
        tide_config['hist_exog_list'] = self.exog_list
        tide_config['random_seed'] = 42
        tide_config['learning_rate'] = tune.choice([0.01, 0.005, 0.001, 0.0005, 0.0001, 0.00005, 0.00001])

        # Instantiate the model
        tide_model = AutoTiDE(h=horizon,
                            config=tide_config,
                            search_alg=HyperOptSearch(), num_samples=30,
                            backend='ray',
                            loss=DistributionLoss(distribution='StudentT', level=self.levels),
                            cpus=12, gpus=1)

        return tide_model

    def run_training(self):
        """Runs the training loop over all horizons."""
        for horizon in self.horizons:
            if self.check_existing_files(horizon):
                print(f"Horizon {horizon}: Model and CSV already exist. Skipping this run.")
                continue

            # Configure the model
            tide_model = self.configure_model(horizon)

            # Start training and cross-validation
            init = time()
            nf = NeuralForecast(models=[tide_model], freq='B')
            Y_hat_df = nf.cross_validation(df=self.df,
                                           val_size=self.val_size,
                                           test_size=self.test_size,
                                           n_windows=None)

            # Save results
            self.save_results(nf, horizon, Y_hat_df)

            # Log the time taken
            end = time()
            print(f'Horizon {horizon} CV Minutes: {(end - init) / 60}')

In [None]:
# Initialize and run the trainer
trainer = AutoTiDETrainer(horizons, levels, exog_list, df, val_size, test_size)
trainer.run_training()

# Modelling Firm Indicators (20 years total, 14 train, 2 val, 4 test)

In [None]:
from neuralforecast.core import NeuralForecast
from ray import tune
from ray.tune.search.hyperopt import HyperOptSearch
from neuralforecast.losses.pytorch import DistributionLoss
from neuralforecast.auto import AutoTiDE

In [None]:
df = pd.read_csv('Data/S&P500/3ProSP500.csv')
df['ds'] = pd.to_datetime(df['ds'])
df = df.astype({col: 'int32' if dtype == 'int64' else 'float32' if dtype == 'float64' else dtype
                for col, dtype in df.dtypes.items()})
df = df.rename(columns={'840E': 'y'})

selected_columns = ['unique_id', 'ds', 'y', 'DY',  'PTBV',  'P',  'PO',  'VO',  'PE']
df = df[selected_columns]

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2008545 entries, 0 to 2008544
Data columns (total 9 columns):
 #   Column     Dtype         
---  ------     -----         
 0   unique_id  object        
 1   ds         datetime64[ns]
 2   y          float32       
 3   DY         float32       
 4   PTBV       float32       
 5   P          float32       
 6   PO         float32       
 7   VO         float32       
 8   PE         float32       
dtypes: datetime64[ns](1), float32(7), object(1)
memory usage: 84.3+ MB


In [None]:
print(df['ds'].min().year, df['ds'].max().year)
print(df['ds'].max().year - df['ds'].min().year + 1)

2004 2023
20


In [None]:
start_2020_2023 = '2020-01-01'
end_2020_2023 = '2023-12-31'

start_2018_2019 = '2018-01-01'
end_2018_2019 = '2019-12-31'

# Filter the DataFrame for each time range and extract unique days
unique_days_2020_2023 = df[(df['ds'] >= start_2020_2023) & (df['ds'] <= end_2020_2023)]['ds'].dt.date.unique()
unique_days_2018_2019 = df[(df['ds'] >= start_2018_2019) & (df['ds'] <= end_2018_2019)]['ds'].dt.date.unique()

# Count the number of unique days in each range
count_unique_days_2020_2023 = len(unique_days_2020_2023)
count_unique_days_2018_2019 = len(unique_days_2018_2019)

# Create a summary DataFrame
summary_counts = pd.DataFrame({
    'Period': ['2020-2023', '2018-2019'],
    'Unique Days Covered': [count_unique_days_2020_2023, count_unique_days_2018_2019]
})
summary_counts

Unnamed: 0,Period,Unique Days Covered
0,2020-2023,1043
1,2018-2019,522


In [None]:
exog_list = list(df.columns)
exog_list.remove('ds')
exog_list.remove('y')
exog_list.remove('unique_id')
exog_list

['DY', 'PTBV', 'P', 'PO', 'VO', 'PE']

In [None]:
levels = [90]
val_size = count_unique_days_2018_2019
test_size = count_unique_days_2020_2023
horizons = [1, 5, 10, 20]

In [None]:
import os
from time import time

class AutoTiDETrainer:
    def __init__(self, horizons, levels, exog_list, df, val_size, test_size):
        self.horizons = horizons
        self.levels = levels
        self.exog_list = exog_list
        self.df = df
        self.val_size = val_size
        self.test_size = test_size

    def check_existing_files(self, horizon):
        """Checks if model and CSV already exist for a given horizon."""
        model_path = f'Trained Models/AutoTiDE/FIRMhorizon_{horizon}/'
        output_csv = f'Data/Test/horizon_{horizon}/FIRMtide_model1_horizon_{horizon}.csv'
        return os.path.exists(model_path) and os.path.exists(output_csv)

    def save_results(self, nf, horizon, Y_hat_df):
        """Saves the trained model and prediction results."""
        model_path = f'Trained Models/AutoTiDE/FIRMhorizon_{horizon}/'
        output_csv = f'Data/Test/horizon_{horizon}/FIRMtide_model1_horizon_{horizon}.csv'

        # Create model directory if it doesn't exist
        os.makedirs(model_path, exist_ok=True)

        # Save the model, predictions and hyperparameter search
        hpo = f'HPO/horizon_{horizon}/FIRMtide_model1_horizon_{horizon}_hpo.csv'
        results = nf.models[0].results.get_dataframe()
        results.to_csv(hpo, index=False)

        nf.save(path=model_path, model_index=None, overwrite=True, save_dataset=False)

        for col in Y_hat_df.select_dtypes(include='float32').columns:
            Y_hat_df[col] = Y_hat_df[col].astype('float16')

        Y_hat_df.to_csv(output_csv, index=False)

    def configure_model(self, horizon):
        """Configures the AutoTiDE model for the given horizon."""
        tide_config = AutoTiDE.get_default_config(h=horizon, backend="ray")
        tide_config['hist_exog_list'] = self.exog_list
        tide_config['random_seed'] = 42
        tide_config['learning_rate'] = tune.choice([0.01, 0.005, 0.001, 0.0005, 0.0001, 0.00005, 0.00001])

        # Instantiate the model
        tide_model = AutoTiDE(h=horizon,
                            config=tide_config,
                            search_alg=HyperOptSearch(), num_samples=30,
                            backend='ray',
                            loss=DistributionLoss(distribution='StudentT', level=self.levels),
                            cpus=12, gpus=1)

        return tide_model

    def run_training(self):
        """Runs the training loop over all horizons."""
        for horizon in self.horizons:
            if self.check_existing_files(horizon):
                print(f"Horizon {horizon}: Model and CSV already exist. Skipping this run.")
                continue

            # Configure the model
            tide_model = self.configure_model(horizon)

            # Start training and cross-validation
            init = time()
            nf = NeuralForecast(models=[tide_model], freq='B')
            Y_hat_df = nf.cross_validation(df=self.df,
                                           val_size=self.val_size,
                                           test_size=self.test_size,
                                           n_windows=None)

            # Save results
            self.save_results(nf, horizon, Y_hat_df)

            # Log the time taken
            end = time()
            print(f'Horizon {horizon} CV Minutes: {(end - init) / 60}')

In [None]:
# Initialize and run the trainer
trainer = AutoTiDETrainer(horizons, levels, exog_list, df, val_size, test_size)
trainer.run_training()