### Imports

In [2]:
import yfinance as yf
import pandas as pd

### Getting the data

In [4]:
def get_sp500_ticker_list():
    """
    Returns a list with all SP500 tickers
    """
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    tables = pd.read_html(url)
    table = tables[0]
    ticker_list = table['Symbol']
    return ticker_list

In [8]:
def get_sample_ticker(ticker_list, n=5):
    sample = ticker_list.sample(n)
    return sample

In [11]:
def get_adj_close(ticker_list, start, end, interval):
    """
    Returns the adjusted close for a unique ticker as string or a list of tickers.
    Format of dates: 'yyyy-mm-dd'
    Possible intervals: '1d', '5d', '1mo' 
    or intraday measures but limited to max a week's worth: '1m', '2m', '5m', '15m', '30m'
    """
    full_df = yf.download(ticker_list, start=start, end=end, interval=interval)
    adj_close_df = full_df['Adj Close']
    return adj_close_df

In [None]:
df = get_adj_close(['aapl', 'nvda', 'ftnt'], '2018-12-3')

In [17]:
def get_df(n=5, start='2018-01-01', end='2021-12-31', interval='1mo'):
    """
    Returns a dataframe of a random sample of the sp500 adj closes over a certain period of time
    """
    sp500_tickers = get_sp500_ticker_list()
    sample_tickers = get_sample_ticker(sp500_tickers, n)
    df = get_adj_close(sample_tickers, start, end, interval)
    return df

In [18]:
df_test = get_df()

AttributeError: 'Series' object has no attribute 'split'

In [3]:
# def sp500_closes_to_csv(start, end, interval):
#     """
#     Saves adjusted closes for all  SP500 companies in a csv file
#     Format of dates: 'yyyy-mm-dd'
#     Possible intervals: '1d', '5d', '1mo' 
#     or intraday measures but limited to max a week's worth: '1m', '2m', '5m', '15m', '30m'
#     """
#     ticker_list = get_sp500_ticker_list()
#     adj_closes = get_adj_close(ticker_list, start, end, interval)
#     adj_closes.to_csv('sp500_ajdclose.csv')

### Garch

In [None]:
#finding the best parameters
split_date = dt.datetime(2020,12,20)
aapl_ret = returns["AAPL"]

def garch(returns,split_date):

    aic_garch = []

    for p in range(1, 5): 
        for q in range(1,5):
            garch = arch_model(returns, mean='zero', vol='GARCH', p=p, q=q)\
                .fit(disp='off') 
            aic_garch.append(garch.aic) 

            if garch.aic == np.min(aic_garch): 
                best_param = (p,q) 
    print("GARCH(",best_param,")")
    
    #fitting the GARCH model on the train data set
    garch = arch_model(returns, mean='zero', vol='GARCH', p=best_param[0], q=best_param[1])
    garch_results = garch.fit(disp='off', last_obs=split_date)
    print(garch_results.summary)
    
    #forecasts
    forecasts_aapl = garch_results.forecast(horizon=1, start=split_date, reindex=False)
    
    #plotting realized volatility vs volatility prediction with GARCH
    plt.plot(realized_vol['AAPL'] /100, label="realized volatility")
    plt.plot(forecasts_aapl.variance/100, label="Volatility prediction GARCH")
    plt.legend()
    plt.show()