This notebook was created by Donna Faith Go.

In [1]:
# import sys
# !{sys.executable} -m pip install dtw-python

In [2]:
# standard imports
import matplotlib.pyplot as plt
import pandas as pd 
import pickle
import numpy as np
import seaborn as sns
from typing import Tuple

# webscraping
import requests
from bs4 import BeautifulSoup

# data gathering
import yfinance as yf
import time
import pandas_datareader.data as web
from datetime import datetime, timedelta

# statsmodels
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller, kpss

# GARCH model
from arch import arch_model

# dynamic time warping
from dtaidistance import dtw
from sklearn.preprocessing import StandardScaler

# ignore warnings
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

## Experiments with the VIX

In this notebook, I do experiments with the VIX to check if there is anything that would be interesting for our research project.

### Data Gathering

In [3]:
# getting closing prices for the 30 stocks with batching
start_date = '2000-01-01'
end_date = '2026-01-01'

def download_stocks_in_batches(tickers, batch_size=5, delay=1):
    """
    Download stock data in batches to avoid rate limiting
    """
    all_data = {}
    
    for i in range(0, len(tickers), batch_size):
        batch = tickers[i:i + batch_size]
        print(f"Downloading batch {i//batch_size + 1}: {batch}")
        
        try:
            # Download the batch
            batch_data = yf.download(
                batch,
                start=start_date,
                end=end_date,
                progress=False
            )
            
            # Extract closing prices for this batch
            if not batch_data.empty and 'Close' in batch_data.columns:
                closes = batch_data['Close']
                if isinstance(closes, pd.Series):
                    all_data[batch[0]] = closes
                else:
                    for ticker in closes.columns:
                        all_data[ticker] = closes[ticker]
                print(f"Successfully downloaded {len(batch)} stocks")
            else:
                print(f"No data returned for batch: {batch}")
            
        except Exception as e:
            print(f"Error downloading batch {batch}: {e}")
        
        # Add delay to avoid rate limiting
        if i + batch_size < len(tickers):
            print(f"Waiting {delay} seconds before next batch...")
            time.sleep(delay)
    
    if all_data:
        return pd.DataFrame(all_data)
    else:
        return pd.DataFrame()

In [4]:
# # Download the volatility indices
# closing_df = download_stocks_in_batches(
#     ['^VIX'], 
#     batch_size=5, 
#     delay=5
# )

# if not closing_df.empty:
#     closing_df.to_pickle('data/volatility indices.pkl')

In [5]:
# get the volatility data
filepath = r'data/volatility indices.pkl'
with open(filepath, 'rb') as f:
    volatility_data = pickle.load(f)

### Data Preprocessing

### GARCH

### Hidden Markov Model

### GARCH