In [28]:
import yfinance as yf
import pandas as pd
import numpy as np
from typing import Callable
from functools import lru_cache

from statsforecast import StatsForecast
from statsforecast.models import (
    GARCH, 
    ARCH, 
    Naive
)

from lib.utils import get_sp500, log_returns

In [2]:
sp500 = get_sp500()
# get random stock from each sector
tickers = sp500.groupby('GICS Sector').apply(lambda x: x.sample(1)).reset_index(drop=True).loc[:, 'Symbol'].tolist()

In [29]:
@lru_cache(maxsize=1)
def yf_data_cache():
    df = yf.download(tickers, start = '2018-01-01', end = '2022-12-31', interval='1d') # use daily prices
    return df

In [54]:
df = yf_data_cache()

In [68]:
def statsforecast_preprocess_yf(df: pd.DataFrame, y_func: Callable[[pd.Series], pd.Series]):
    """
    Takes a yfinance price dataframe and returns a dataframe with columns:
    """
    df = df.loc[:, (['Adj Close'], tickers)] # just get adj close
    df.columns = df.columns.droplevel() # drop MultiIndex
    df = df.reset_index() # reset index
    ds_colname = df.columns[0] # get ds column name
    df.rename(columns={ds_colname: 'ds'}, inplace=True) # rename ds column
    prices = df.melt(id_vars='ds') # melt by ds index
    prices['y_func'] = prices.groupby('variable')['value'].transform(y_func) # apply y_func to value (Adj Close)
    prices.rename(columns={'variable': 'unique_id', 'y_func' : 'y'}, inplace=True)
    prices.drop(columns='value', inplace=True) # rename columns
    return prices

In [69]:
df = yf_data_cache()
data = statsforecast_preprocess_yf(df, log_returns)

In [71]:
models = [
    ARCH(1),
    GARCH(1, 1),
    Naive()
]

sf = StatsForecast(
    df = data,
    models = models,
    freq = 'MS',
    n_jobs = -1 # -1 selects all available cores
)

In [74]:
crossvalidation_df = sf.cross_validation(
    df = data,
    h = 3, # forecast horizon
    step_size = 3, # step size between forecasts
    n_windows = 4 # number of windows to use
)

In [83]:
def calc_error(cv_df:pd.DataFrame, models: list, func='mae'):
    def _mae(y_true: pd.Series, y_pred: pd.Series):
        return np.mean(np.abs(y_true - y_pred))
    def _mse(y_true: pd.Series, y_pred: pd.Series):
        return np.mean((y_true - y_pred)**2)
    def _rmse(y_true: pd.Series, y_pred: pd.Series):
        return np.sqrt(_mse(y_true, y_pred))
    def _mape(y_true: pd.Series, y_pred: pd.Series):
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    
    funcs = {
        'mae': _mae,
        'mse': _mse,
        'rmse': _rmse,
        'mape': _mape
    }

    res = {f"{mod}_{func}": funcs[func](cv_df['y'], cv_df[str(mod)]) for mod in models} # calculate MAE for each model
    return pd.Series(res) # return as Series

In [87]:
crossvalidation_df

Unnamed: 0_level_0,ds,cutoff,y,ARCH(1),"GARCH(1,1)",Naive
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CBRE,2022-01-01,2021-12-01,-0.005651,0.189406,0.036289,0.042984
CBRE,2022-02-01,2021-12-01,-0.025121,-0.072137,-0.015133,0.042984
CBRE,2022-03-01,2021-12-01,-0.021405,-0.061656,-0.012472,0.042984
CBRE,2022-04-01,2022-03-01,-0.010477,0.051408,0.149528,-0.021405
CBRE,2022-05-01,2022-03-01,-0.001868,-0.020846,-0.056315,-0.021405
...,...,...,...,...,...,...
VRTX,2022-08-01,2022-06-01,-0.014496,-0.039085,-0.031745,-0.009182
VRTX,2022-09-01,2022-06-01,-0.008215,-0.033642,-0.027408,-0.009182
VRTX,2022-10-01,2022-09-01,-0.005444,0.127356,0.031324,-0.008215
VRTX,2022-11-01,2022-09-01,0.011551,-0.047965,-0.013122,-0.008215


In [84]:
errors_df = crossvalidation_df.groupby(['unique_id', 'cutoff']).agg(lambda x: calc_error(x, models)).reset_index()