### Imports

In [219]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from arch import arch_model
from sklearn.metrics import mean_squared_error as mse

plt.rcParams["figure.figsize"] = (12, 7)

### Getting the data

In [4]:
def get_sp500_ticker_list():
    """
    Returns a list with all SP500 tickers
    """
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    tables = pd.read_html(url)
    table = tables[0]
    ticker_list = table['Symbol']
    return ticker_list

In [28]:
def get_sample_ticker(ticker_list, n=5):
    sample = ticker_list.sample(n).to_list()
    return sample

In [11]:
def get_adj_close(ticker_list, start, end, interval):
    """
    Returns the adjusted close for a unique ticker as string or a list of tickers.
    Format of dates: 'yyyy-mm-dd'
    Possible intervals: '1d', '5d', '1mo' 
    or intraday measures but limited to max a week's worth: '1m', '2m', '5m', '15m', '30m'
    """
    full_df = yf.download(ticker_list, start=start, end=end, interval=interval)
    adj_close_df = full_df['Adj Close']
    return adj_close_df

In [60]:
def get_adj_close_df(n=5, start='2020-01-01', end='2021-12-31', interval='1d'):
    """
    Returns the returns df and adj close df for a unique ticker as string or a list of tickers.
    n = sample size
    Format of dates: 'yyyy-mm-dd'
    Possible intervals: '1d', '5d', '1mo' 
    or intraday measures but limited to max a week's worth: '1m', '2m', '5m', '15m', '30m'
    Returns a dataframe of a random sample of the sp500 adj closes over a certain period of time
    """
    sp500_tickers = get_sp500_ticker_list()
    sample_tickers = get_sample_ticker(sp500_tickers, n)
    full_df = yf.download(sample_tickers, start=start, end=end, interval=interval)
    adj_close_df = full_df['Adj Close']
    return adj_close_df

In [64]:
adj_close_df = get_adj_close_df()

[*********************100%***********************]  5 of 5 completed


In [63]:
def get_returns(adj_close_df):
    df_returns = (adj_close_df.pct_change())*100
    df_returns.dropna(axis=0,inplace=True)
    return df_returns

In [65]:
returns_df = get_returns(adj_close_df)

In [67]:
def get_volatility(returns_df):
    realized_vol = returns_df.rolling(5).std()
    realized_vol.dropna(inplace=True)
    return realized_vol

In [68]:
volatility_df = get_volatility(returns_df)

### Splitting the dataset by observations

In [158]:
def split_df(df, n=50):
    df_test = df.iloc[-n:]
    df_train = df.iloc[:-n]
    split_date = df.iloc[-n:].index
    return df_train, df_test, split_date

In [159]:
df_train_vol, df_test_vol, split_date = split_df(volatility_df)
df_train_ret, df_test_ret, split_date = split_df(returns_df)

### GARCH

In [280]:
def garch(returns, n):

    aic_garch = []

    for p in range(1, 2): 
        for q in range(1, 2):
            garch = arch_model(returns, mean='zero', vol='GARCH', p=p, q=q)\
                .fit(disp='off') 
            aic_garch.append(garch.aic) 

            if garch.aic == np.min(aic_garch): 
                best_param = (p,q) 
    
    #fitting the best GARCH model
    garch = arch_model(returns, mean='zero', vol='GARCH', p=best_param[0], q=best_param[1]).fit(disp='off')

    #forecasts
    forecasts = garch.forecast(horizon=n, reindex=False)
    #forecasts = garch.forecast(horizon=50, start=split_date[0], reindex=True)
    return forecasts, forecasts.residual_variance.dropna().transpose()

In [281]:
forecasts, residuals = garch(df_train_ret.iloc[:, :1], 50)

In [282]:
def get_rmse(residuals, df_test):
    rmse = np.sqrt(mse(df_test.iloc[:, :1]/100, np.sqrt(residuals/100)))
    return rmse

In [284]:
get_rmse(residuals, df_test)

0.2205656102776126

### ARCH

In [292]:
def arch(returns, n):
    aic_arch = []

    for p in range(1, 2): # Iterating ARCH parameter p
        arch = arch_model(returns, mean='zero', vol='ARCH', p=p)\
             .fit(disp='off') # Running ARCH(p)
        aic_arch.append(arch.aic) # Storing aic for the ARCH(p)

        if arch.aic == np.min(aic_arch): 
             best_param = p # Finding the minimum AIC score
                
    # Fitting best arch
    arch = arch_model(returns, mean='zero', vol='ARCH', p=best_param)\
         .fit(disp='off')
    
    forecasts = arch.forecast(horizon=n, reindex=False)
    
    return forecasts, forecasts.residual_variance.dropna().transpose()

In [293]:
forecasts, residuals = garch(df_train_ret.iloc[:, :1], 50)

In [294]:
get_rmse(residuals, df_test)

0.2205656102776126

### GJR garch

In [295]:
def gjr_garch(returns, n):
    aic_gjr_garch = []

    for p in range(1, 2): 
        for q in range(1, 2):
            gjr_garch = arch_model(returns, mean='zero', vol='GARCH', p=p, o=1, q=q)\
                 .fit(disp='off') 
            aic_gjr_garch.append(gjr_garch.aic) 

            if gjr_garch.aic == np.min(aic_gjr_garch): 
                 best_param = p, q # Finding the minimum AIC score
    
    gjr_garch = arch_model(returns, mean='zero', vol='ARCH', p=best_param[0], o=1,
                       q=best_param[1]).fit(disp='off')
    
    forecasts = gjr_garch.forecast(horizon=n, reindex=True)
    
    return forecasts, forecasts.residual_variance.dropna().transpose()

In [304]:
forecasts, residuals = gjr_garch(df_train_ret.iloc[:, :1], 50)

In [297]:
get_rmse(residuals, df_test)

0.20397548607626004

### EGARCH

In [326]:
def egarch(returns, n):
    aic_egarch = []

    for p in range(1, 2):
        for q in range(1, 2):
            egarch = arch_model(returns, mean='zero', vol='EGARCH', p=p, q=q)\
                  .fit(disp='off')
            aic_egarch.append(egarch.aic)
            if egarch.aic == np.min(aic_egarch):
                best_param = (p, q)
    
    egarch = arch_model(returns, mean='zero', vol='EGARCH',
                        p=best_param[0], q=best_param[1], dist="skewt").fit(disp='off')
    
    forecasts = egarch.forecast(horizon=50, method='simulation', reindex=False)
    
    return forecasts, forecasts.residual_variance.dropna().transpose()

In [327]:
forecasts, residuals = egarch(df_train_ret.iloc[:, :1], 50)

In [328]:
get_rmse(residuals, df_test)

0.23406873545174414

### Neural nets

In [None]:
def prepare_data()