This notebook was created by Donna Faith Go.

In [1]:
import sys
!{sys.executable} -m pip install dtw-python

Collecting dtw-python
  Downloading dtw_python-1.7.2-cp312-cp312-win_amd64.whl.metadata (7.6 kB)
Downloading dtw_python-1.7.2-cp312-cp312-win_amd64.whl (366 kB)
Installing collected packages: dtw-python
Successfully installed dtw-python-1.7.2




In [1]:
# standard imports
import matplotlib.pyplot as plt
import pandas as pd 
import pickle
import numpy as np
import seaborn as sns
from typing import Tuple

# webscraping
import requests
from bs4 import BeautifulSoup
import pandas as pd

# data gathering
import yfinance as yf
import time

# statsmodels
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller, kpss

# ignore warnings
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

## Step 1: Data Gathering

### world indices

In [5]:
# setting up 
url = "https://finance.yahoo.com/markets/world-indices/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36"
}

# fetch and parse
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, "lxml")

# get rows
rows = soup.select('[class*="data-row"]')
stock_indices = []
for item in rows:
    symbol = (
        item.select_one('.data-col0') and 
        item.select_one('.data-col0').get_text(strip=True)
    )

    stock_indices.append([symbol])

### yfinance

In [2]:
# getting closing prices for the 30 stocks with batching
start_date = '2013-01-01'# '2019-01-01'
end_date = '2026-01-01' #'2025-08-01'

def download_stocks_in_batches(tickers, batch_size=5, delay=1):
    """
    Download stock data in batches to avoid rate limiting
    """
    all_data = {}
    
    for i in range(0, len(tickers), batch_size):
        batch = tickers[i:i + batch_size]
        print(f"Downloading batch {i//batch_size + 1}: {batch}")
        
        try:
            # Download the batch
            batch_data = yf.download(
                batch,
                start=start_date,
                end=end_date,
                progress=False
            )
            
            # Extract closing prices for this batch
            if not batch_data.empty and 'Close' in batch_data.columns:
                closes = batch_data['Close']
                if isinstance(closes, pd.Series):
                    all_data[batch[0]] = closes
                else:
                    for ticker in closes.columns:
                        all_data[ticker] = closes[ticker]
                print(f"Successfully downloaded {len(batch)} stocks")
            else:
                print(f"No data returned for batch: {batch}")
            
        except Exception as e:
            print(f"Error downloading batch {batch}: {e}")
        
        # Add delay to avoid rate limiting
        if i + batch_size < len(tickers):
            print(f"Waiting {delay} seconds before next batch...")
            time.sleep(delay)
    
    if all_data:
        return pd.DataFrame(all_data)
    else:
        return pd.DataFrame()

In [3]:
# # Download the volatility indices
# closing_df = download_stocks_in_batches(
#     ['^VIX'], 
#     batch_size=5, 
#     delay=5
# )

# if not closing_df.empty:
#     closing_df.to_pickle('data/volatility indices.pkl')

In [4]:
# major_stocks = [
#     'PSEI.PS', # philippines
#     '^N225', '000001.SS', '^NSEI','^HSI',# asia
#     '^DJI', '^GSPC', '^IXIC', # united states
#     '^GDAXI', '^FTSE', '^STOXX50E' # europe
# ]

# # Download the major stock indices
# closing_df = download_stocks_in_batches(
#     major_stocks, 
#     batch_size=5, 
#     delay=5
# )

# if not closing_df.empty:
#     closing_df.to_pickle('data/major stock indices.pkl')

In [5]:
# get the major stock indices data
filepath = r'data/major stock indices.pkl'
with open(filepath, 'rb') as f:
    indices_data = pickle.load(f)

In [6]:
# get the volatility data
filepath = r'data/volatility indices.pkl'
with open(filepath, 'rb') as f:
    volatility_data = pickle.load(f)

## Step 2: EDA

In [7]:
# look at the number of nulls
print(indices_data.isna().sum())

000001.SS    233
PSEI.PS      222
^DJI         117
^N225        210
^NSEI        192
^FTSE        103
^GDAXI        93
^GSPC        117
^HSI         190
^STOXX50E    129
dtype: int64


There seems to be a lot of nulls in the dataset.
These nulls will have to be ignored for the L1 and L2 analysis for each stock when compared to the VIX.

## Step 3: Data Preprocessing

## null handling

In [8]:
# dropping all nulls
indices_data.dropna(how='any', inplace=True, axis=0)

## real closing prices

## stationarity

In [9]:
# making data stationary
tau_val = 1
log_returns = np.log(indices_data).diff(tau_val).dropna()
simple_returns = 100 * indices_data.pct_change().dropna()

## Step 4: Gidea and Katz

In [10]:
class methodology:
    def __init__(self, indices_data, windows):
        self.indices_data = indices_data
        self.windows = windows
    
    def get_l1_l2_norms(self):
        return

### Embedding data

In [11]:
def create_xy(series: pd.Series, lookback: int = 12, horizon: int = 1) -> Tuple[np.ndarray, np.ndarray]:
    """Create the X and y arrays from a pd.Series object. The `lookback`
    determines the number of features we would include in X. While the
    `horizon` paramter informs us how many points we'll need to forecast.

    Paramters:
    ----------
    series (pd.Series): the (n, 1) time series to be sliced into X and Y
    lookback (int): the lookback window to consider (default=12)
    horizon (int): the number of points to forecast for each row in X (default=1)

    Returns:
    -------
    (X, y) (Tuple): a tuple of ndarrays
    """
    x = []
    y = []
    
    series_size = series.shape[0]
    for i in range(series_size):
        
        # break loop if series is less than the required time horizon
        if series.iloc[(i + lookback): (i + lookback + horizon)].shape[0] < horizon:
            break
        x.append(
            series.iloc[i: (i + lookback)]
        )
        y.append(
            series.iloc[(i + lookback): (i + lookback + horizon)]
        )
    x = np.dstack(x)
    y = np.dstack(y)

    # Reshape x to (samples, features, lookback)
    x = np.swapaxes(x, 0, 2)
    # Then flatten (samples, features * lookback)
    x = x.reshape(x.shape[0], x.shape[1] * x.shape[2])

    # Reshape y tp (samples, horizon)
    y = np.swapaxes(y, 0, 2)
    y = y[:, :, 0]


    
    return x, np.array(y).flatten()

In [12]:
# storing data
embedded_data = []
stocks = []

# setting constants
dim_size = 25
for col in indices_data.columns:
    embedded, value = create_xy(
        log_returns[col], dim_size
    )
    embedded_data.append(embedded)
    stocks.append(col)

# convert to numpy array
embedded_data = np.array(embedded_data)
stocks = np.array(stocks)

In [13]:
embedded_data.shape

(10, 2481, 25)

### Volatility Index

In [14]:
def get_l1_l2_norms(data):
    # store data
    l1_norms = []
    l2_norms = []

    # get l1 and l2 norms
    for i in range(len(data)):
        masked = data[i][~np.isnan(data[i])]
        l1 = np.linalg.norm(masked, ord=1)
        l2 = np.linalg.norm(masked, ord=2)
        l1_norms.append(l1)
        l2_norms.append(l2)
    return l1_norms, l2_norms

In [15]:
# get lp norms
dates = indices_data[dim_size:].index
vix_data = volatility_data[['^VIX']]

for idx in range(embedded_data.shape[0]):
    l1, l2 = get_l1_l2_norms(embedded_data[idx])
    stock = stocks[idx]
    plot_dates = dates[:len(l1)]


    # plot the landscapes and VIX
    fig, ax1 = plt.subplots(figsize=(15, 5))
    ax1.plot(plot_dates, l1, label="L1 Norms")
    ax1.plot(plot_dates, l2, label="L2 Norms")
    ax1.set_xlabel("Date")
    ax1.set_ylabel("Lp Norms")
    # ax1.set_ylim(0, tau_val + 1)
    
    ax2 = ax1.twinx()
    ax2.plot(
        vix_data.index, vix_data.values, label="VIX", 
        alpha=0.3, color='green', linestyle='-.'
    )
    ax2.set_ylabel("VIX")
    
    # Combine legends
    lines_1, labels_1 = ax1.get_legend_handles_labels()
    lines_2, labels_2 = ax2.get_legend_handles_labels()
    ax1.legend(lines_1 + lines_2, labels_1 + labels_2, loc="upper right")
    
    title = f'Lp Norms and VIX of the {stock} with Lag {tau_val} and Window {dim_size}'
    plt.title(title)
    plt.tight_layout()
    plt.savefig(f'figures/{title}.png')
    plt.close()

## Dynamic Time Warping

## Results and Discussion

## Conclusion