## Time series prediction

In [10]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from utils import MACD, RSI, Drawdown_current, volatility_rolling, extract, Close_price, log_return, WFCV
import statsmodels.api as sm

In [11]:
returns_all = pd.read_csv('230216_returns.csv')

In [12]:
hu1_comdty = extract(returns_all, 'HU1 Comdty')
hu1_comdty["Close"] = Close_price(hu1_comdty)
hu1_comdty["log_return"] = log_return(hu1_comdty)

In [13]:
hu1_comdty["MACD"] = MACD(hu1_comdty)
hu1_comdty["RSI"] = RSI(hu1_comdty)
hu1_comdty["Drawdown"] = Drawdown_current(hu1_comdty)
hu1_comdty["Volatility_20"] = volatility_rolling(hu1_comdty, window=20)
hu1_comdty["Volatility_60"] = volatility_rolling(hu1_comdty, window=60)

lags = list(range(1, 11))
for lag in lags:
    hu1_comdty[f'lag_{lag}'] = hu1_comdty['log_return'].shift(lag)

In [14]:
X = hu1_comdty.dropna().drop(columns=['log_return', 'Close', 'return'])
y = hu1_comdty.dropna()['log_return']

rf = RandomForestRegressor(n_estimators=100, max_depth=5, random_state=42)

rf_pred, rf_truth, rf_mse, rf_r2 = WFCV(X, y, rf)

In [None]:
reg = sm.OLS(rf_truth, sm.add_constant(rf_pred)).fit()
print(reg.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     6.311
Date:                Mon, 26 Jan 2026   Prob (F-statistic):             0.0121
Time:                        15:25:03   Log-Likelihood:                 3069.3
No. Observations:                1450   AIC:                            -6135.
Df Residuals:                    1448   BIC:                            -6124.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0001      0.001      0.177      0.8

: 