In [1]:

import matplotlib.pyplot as plt
import numpy as np
import xlrd
from arch.bootstrap import SPA

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import acf, pacf
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning
from statsmodels.tsa.arima.model import ARIMA
import pandas as pd
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.stats.diagnostic import het_arch
from statsmodels.stats.diagnostic import het_arch
from arch import arch_model


In [2]:
df = pd.read_csv('/home/vishi/bolinger/OHLC_New.csv', parse_dates=['DateTime'], index_col='DateTime')
df.index = pd.to_datetime(df.index)

In [3]:
len(df)

43875

In [4]:
def func_of_arima_1_1_1(t_1_data, t_2_data, ar_param, ma_param, prev_residual):
    """
    Calculate the ARIMA(1,1,1) value based on previous data and parameters.
    
    :param t_1_data: Previous time series data point (t-1)
    :param t_2_data: Previous time series data point (t-2)
    :param ar_param: AR parameter
    :param ma_param: MA parameter
    :param prev_residual: Previous residual
    :return: Computed value for the current time step
    """
    return ar_param * (t_1_data - t_2_data) + ma_param * prev_residual + t_1_data

def func_of_garch_1_1(omega, alpha, beta, prev_volatility, prev_residual):
    """
    Calculate the GARCH(1,1) value based on previous volatility and residuals.
    :param omega: Constant term
    :param alpha: Coefficient for lagged squared residuals
    :param beta: Coefficient for lagged volatility
    :param prev_volatility: Previous volatility
    :param prev_residual: Previous residual
    :return: Computed volatility for the current time step
    """
    return np.sqrt(omega + alpha * prev_residual ** 2 + beta * prev_volatility**2)


def forcast_closing_prices_forxgboost(df, prediction_interval = 250, renewal_interval=500, order_arima= (1,1,1), order_garch=(1,1)):
    """
    Forecast closing prices using ARIMA and GARCH models.
    
    :param df: DataFrame containing the time series data
    :param predction_interval: Number of periods to forecast
    :param renewal_interval: Interval for renewing the model
    :param order_arima: Order of the ARIMA model (p,d,q)
    :param order_garch: Order of the GARCH model (p,q)
    :return: DataFrame with forecasted values
    """
    #   take first 500 closing prices and then fit an arima model to the solsing prices and fit the residuals to a garchmodel
    # after that use arima and garch to predict the next 250 closing pricesand then repeat the process
      # for the next 500 closing prices that is from 250 to 750 and then predict the next 250 closing prices
      # and so on until the end of the dataframe

    no_of_arima_models = int(len(df) / prediction_interval)
    no_of_garch_models = int(len(df) / prediction_interval)
    arima_predictions = []
    garch_vol_predictions = []
    for i in range(no_of_arima_models-1):    
        start_index = i * prediction_interval
        end_index = start_index + renewal_interval
        start_index_for_prediction = start_index + renewal_interval + 1
        current_index_for_prediction = start_index_for_prediction
        end_index_for_prediction = min(start_index_for_prediction + prediction_interval, len(df))
        arima_data = df['close_scaled_down'][start_index:end_index]
        arima_model = ARIMA(arima_data, order=order_arima)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", ConvergenceWarning)
            arima_fit = arima_model.fit()
        arima_residuals = np.array(arima_fit.resid)
        garch_model = arch_model(arima_residuals, vol='Garch', p=order_garch[0], q=order_garch[1])
        garch_fit = garch_model.fit(disp="off")

        ma_param = arima_fit.maparams[0]
        ar_param = arima_fit.arparams[0]

        predition_of_arima = []
        residual_prediction = []

        predition_of_arima.append(func_of_arima_1_1_1(arima_data.iloc[-1],
                                                    arima_data.iloc[-2],
                                                    ar_param,
                                                    ma_param, 
                                                    arima_residuals[-1]))

        residual_prediction.append(df['close_scaled_down'][current_index_for_prediction] - predition_of_arima[-1])
        current_index_for_prediction += 1
        predition_of_arima.append(func_of_arima_1_1_1(predition_of_arima[-1],
                                                    arima_data.iloc[-1],
                                                    ar_param,
                                                    ma_param, 
                                                    residual_prediction[-1]))
        residual_prediction.append(df['close_scaled_down'][current_index_for_prediction] - predition_of_arima[-1])
        current_index_for_prediction += 1
        for i in range(start_index_for_prediction + 2, end_index_for_prediction):
            predition_of_arima.append(func_of_arima_1_1_1(predition_of_arima[-1],
                                                        predition_of_arima[-2],
                                                        ar_param,
                                                        ma_param, 
                                                        residual_prediction[-1]))
            residual_prediction.append(df['close_scaled_down'][current_index_for_prediction] - predition_of_arima[-1])
            current_index_for_prediction += 1
        arima_predictions.extend(predition_of_arima)
        garch_params = garch_fit.params
        omega = garch_params['omega']
        alpha = garch_params['alpha[1]']
        beta = garch_params['beta[1]']

        vol_garch = garch_fit.conditional_volatility
        vol_prediction = []

        vol_prediction.append(func_of_garch_1_1(omega, alpha, beta, vol_garch[-1], residual_prediction[-1]))

        for i in range(start_index_for_prediction + 1, end_index_for_prediction):
            vol_prediction.append(func_of_garch_1_1(omega, alpha, beta, vol_prediction[-1], residual_prediction[i - start_index_for_prediction - 1]))
        garch_vol_predictions.extend(vol_prediction)

    return arima_predictions, garch_vol_predictions



In [5]:
df.head()

Unnamed: 0_level_0,open,high,low,close,close_scaled_down
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-12-31 09:16:00,2355950,2356430,2354005,2355135,2355.135
2024-12-31 09:17:00,2355070,2356150,2354500,2355915,2355.915
2024-12-31 09:18:00,2355980,2356025,2355100,2355405,2355.405
2024-12-31 09:19:00,2355400,2357195,2355215,2356905,2356.905
2024-12-31 09:20:00,2356960,2357015,2355475,2355475,2355.475


In [6]:
df['close_scaled_down'] = df['close_scaled_down']/10
arima_predictions, garch_vol_predictions = forcast_closing_prices_forxgboost(df)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  residual_prediction.append(df['close_scaled_down'][current_index_for_prediction] - predition_of_arima[-1])
  residual_prediction.append(df['close_scaled_down'][current_index_for_prediction] - predition_of_arima[-1])
  residual_prediction.append(df['close_scaled_down'][current_index_for_prediction] - predition_of_arima[-1])
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  residual_prediction.append(df['close_scaled_down'][current_index_for_prediction] - predition_of_arima[-1])
  residual_prediction.append(df['close_scaled_down'][current_index_for_prediction] - predition_of_arima[-1])
  residual_prediction.append(df['close_scaled_down'][current_index_for_prediction] - predition_of_arima[-1])
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  residual_prediction.append(df['close_scaled_down'][curre

In [7]:
(len(arima_predictions)),len(df)
# i need to appen the arima predictions and garch vol predictions in df 
df['arima_predictions'] = np.nan
df['garch_vol_predictions'] = np.nan
df['arima_predictions'].iloc[500:500+len(arima_predictions)] = arima_predictions
df['garch_vol_predictions'].iloc[500:500+len(garch_vol_predictions)] = garch_vol_predictions


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['arima_predictions'].iloc[500:500+len(arima_predictions)] = arima_predictions
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-vie

In [8]:
# save the df to a csv file
df.to_csv('/home/vishi/bolinger/mid_vol.csv', index=True)

In [9]:
# i have a csv file with the header like time,"RI=150, TW=300","RI=150, TW=400","RI=150, TW=500","RI=150, TW=600","RI=200, TW=300","RI=200, TW=400","RI=200, TW=500","RI=200, TW=600","RI=250, TW=300","RI=250, TW=400","RI=250, TW=500","RI=250, TW=600","RI=300, TW=300","RI=300, TW=400","RI=300, TW=500","RI=300, TW=600","RI=350, TW=300","RI=350, TW=400","RI=350, TW=500","RI=350, TW=600","RI=400, TW=300","RI=400, TW=400","RI=400, TW=500","RI=400, TW=600"
# path /home/vishi/bolinger/_SPA_DifferentIntervals.csv
# i want to store each column in a list in a numpy array
# read the csv file
df_spa = pd.read_csv('/home/vishi/bolinger/_SPA_DifferentIntervals.csv', parse_dates=['time'], index_col='time')

# convert the dataframe to a numpy array
df_numpy = df_spa.to_numpy()
df_numpy.shape

(1225, 24)

In [None]:
# convert like every row has profit series and there are 24 rows
df_test = df_numpy.T
returns_numpy = []
for i in range(len(df_test)):
    returns_numpy.append(np.diff(df_test[i]))
returns_numpy = np.array(returns_numpy).T
for i in range(returns_numpy.shape[1]):
    benchmark_losses = -returns_numpy[:, i]
    # remove the i th column from returns_numpy
    stratergy_losses = -np.delete(returns_numpy, i, axis=1)
    spa = SPA(benchmark=benchmark_losses,
        models=stratergy_losses,
        reps=1000,
        block_size=100,  
        bootstrap="stationary",
        studentize=True,
        nested=False,
        seed=42
    )
    spa.compute()
    pvals = spa.pvalues
    print(f"---- SPA Test Results -for {i}---")
    print("Lower     p-value (liberal test):   ", round(pvals["lower"], 4))
    print("Consistent p-value (recommended):   ", round(pvals["consistent"], 4))
    print("Upper     p-value (conservative):   ", round(pvals["upper"], 4))

    # Interpretation
    if pvals["lower"] < 0.01:
        print("✅ At least one model is significantly better than the benchmark (at 5% level).")
    else:
        print("❌ No model significantly outperforms the benchmark (at 5% level).")

    # Which models are better?
    better_model_indices = spa.better_models(pvalue=0.01, pvalue_type="lower")
    print(f"Indices of better models {i} (at 1% level):", better_model_indices)
    





    
    

---- SPA Test Results -for 0---
Lower     p-value (liberal test):    0.528
Consistent p-value (recommended):    0.775
Upper     p-value (conservative):    0.806
❌ No model significantly outperforms the benchmark (at 5% level).
Indices of better models 0 (at 5% level): []
---- SPA Test Results -for 1---
Lower     p-value (liberal test):    0.205
Consistent p-value (recommended):    0.277
Upper     p-value (conservative):    0.279
❌ No model significantly outperforms the benchmark (at 5% level).
Indices of better models 1 (at 5% level): []
---- SPA Test Results -for 2---
Lower     p-value (liberal test):    0.093
Consistent p-value (recommended):    0.1
Upper     p-value (conservative):    0.1
❌ No model significantly outperforms the benchmark (at 5% level).
Indices of better models 2 (at 5% level): []
---- SPA Test Results -for 3---
Lower     p-value (liberal test):    0.424
Consistent p-value (recommended):    0.654
Upper     p-value (conservative):    0.665
❌ No model significantly ou

In [None]:
benchmark_losses = -np.array(returns_numpy[0]).T
stratergy_losses = -np.array(returns_numpy[1:]).T
spa = SPA(benchmark=benchmark_losses,
        models=stratergy_losses,
        reps=1000,
        block_size=100,  
        bootstrap="stationary",
        studentize=True,
        nested=False,
        seed=42
    )
spa.compute()
pvals = spa.pvalues
print("---- SPA Test Results ----")
print("Lower     p-value (liberal test):   ", round(pvals["lower"], 4))
print("Consistent p-value (recommended):   ", round(pvals["consistent"], 4))
print("Upper     p-value (conservative):   ", round(pvals["upper"], 4))

# Interpretation
if pvals["lower"] < 0.05:
    print("✅ At least one model is significantly better than the benchmark (at 5% level).")
else:
    print("❌ No model significantly outperforms the benchmark (at 5% level).")

# Which models are better?
better_model_indices = spa.better_models(pvalue=0.05, pvalue_type="lower")
print("Indices of better models (at 5% level):", better_model_indices)

