# ARIMA - Individual benchmark
### Load packages

In [None]:
import time as time
import pandas as pd
import numpy as np
import warnings
import os

from statsmodels.tools.sm_exceptions import ConvergenceWarning
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima

### Load data

In [None]:
Directory = 'C:/.../TFT_for_Stock_Movement_Prediction/data'

# Target and return feature
CCR = pd.read_csv(os.path.join(Directory, 'CCR.csv'), index_col = [0])

## Features
# Time features - Categorical
time_features = pd.read_csv(os.path.join(Directory, 'time_features.csv'), index_col = [0])['0'].tolist()
for i in range(len(time_features)):
    locals()[time_features[i]] = pd.read_csv(os.path.join(Directory, time_features[i] + '.csv'), index_col = [0])

### Model preparation
#### Define variables

In [None]:
# Study periods length
period_b = 0, 250, 500, 750, 1000, 1250, 1500, 1750, 2000, 2250
period_e = 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750, 3000, 3250

# Split period into training, validation and test set
training_size = 750
test_size = 250

# Target
Target_feature = CCR

# Exogenous variables
Dates = pd.concat([Weekday, Week, Month, Monthday], axis = 1)

# Ignore convergence warning
warnings.simplefilter('ignore', ConvergenceWarning)

# File path to save results
File_name_results = 'results/ARIMA/Results_ARIMA.csv'

### Model with exogenous variables
#### Auto-parameter selection

In [None]:
start = time.time()
Results = pd.DataFrame(index = range(period_b[0], period_e[9] - training_size), columns = Target_feature.columns)
for i in range(len(period_b)):
    start_period = time.time()
    Exogenous = pd.concat([Dates[period_b[i] : period_e[i]], pd.DataFrame(range(period_b[0] + 1, period_e[0] + 1), index = range(period_b[i], period_e[i]), columns = ['Time_idx'])], axis = 1).reset_index(drop = True).values.tolist()
    for j in range(len(Target_feature.columns)):
        start_stock = time.time()
        training = Target_feature.iloc[period_b[i] : period_b[i] + training_size, j].to_list()
        test = Target_feature.iloc[period_e[i] - test_size : period_e[i], j].reset_index(drop = True)
        exogenous = Exogenous[period_b[0] : period_b[0] + training_size]
        with np.errstate(divide='ignore'):
            arima_model = auto_arima(training, exogenous, start_p = 0, start_q = 0, seasonal = False)
        predictions = []
        for k in range(test_size):
            model = ARIMA(training, exogenous, order = arima_model.get_params().get("order"))
            model_fit = model.fit()
            predictions.append(model_fit.forecast(exog = Exogenous[k + training_size])[0])
            training.append(test[k])
            exogenous.append(Exogenous[k + training_size])
        Results.iloc[period_b[i] : period_b[i] + test_size, j] = predictions
        print(f'Compilation time - Period {i + 1} - Stock {j + 1}: {round(time.time() - start_stock)} seconds')
    print(f'Compilation time - Period {i + 1}: {round(time.time() - start_period)} seconds')
Results.to_csv(File_name_results)
print(f'Compilation time: {round(time.time() - start)} seconds')

### Model without exogenous variables
#### Auto-parameter selection

In [None]:
start = time.time()
Results = pd.DataFrame(index = range(period_b[0], period_e[9] - training_size), columns = Target_feature.columns)
for i in range(len(period_b)):
    start_period = time.time()
    for j in range(len(Target_feature.columns)):
        start_stock = time.time()
        training = Target_feature.iloc[period_b[i] : period_b[i] + training_size, j].to_list()
        test = Target_feature.iloc[period_e[i] - test_size : period_e[i], j].reset_index(drop = True)
        arima_model = auto_arima(training, start_p = 0, start_q = 0, seasonal = False)
        predictions = []
        for k in range(test_size):
            model = ARIMA(training, order = arima_model.get_params().get("order"))
            model_fit = model.fit()
            predictions.append(model_fit.forecast()[0])
            training.append(test[k])
        Results.iloc[period_b[i] : period_b[i] + test_size, j] = predictions
        print(f'Compilation time - Period {i + 1} - Stock {j + 1}: {round(time.time() - start_stock)} seconds')
    print(f'Compilation time - Period {i + 1}: {round(time.time() - start_period)} seconds')
Results.to_csv(File_name_results)
print(f'Compilation time: {round(time.time() - start)} seconds')