This notebook was created by Donna Faith Go.

In [1]:
# standard imports
import matplotlib.pyplot as plt
import pandas as pd 
import pickle
import numpy as np
import seaborn as sns
from typing import Tuple

# webscraping
import requests
from bs4 import BeautifulSoup

# data gathering
import yfinance as yf
import time
import pandas_datareader.data as web
from datetime import datetime, timedelta

# statsmodels
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller, kpss

# GARCH model
from arch import arch_model

# dynamic time warping
from dtaidistance import dtw
from sklearn.preprocessing import StandardScaler

# ignore warnings
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

## Comparison of Chinese Stocks and Philippine Stocks

## Data Gathering

In [2]:
# getting closing prices for the 30 stocks with batching
start_date = '2000-01-01'
end_date = '2026-01-01'

def download_stocks_in_batches(tickers, batch_size=5, delay=1):
    """
    Download stock data in batches to avoid rate limiting
    """
    all_data = {}
    
    for i in range(0, len(tickers), batch_size):
        batch = tickers[i:i + batch_size]
        print(f"Downloading batch {i//batch_size + 1}: {batch}")
        
        try:
            # Download the batch
            batch_data = yf.download(
                batch,
                start=start_date,
                end=end_date,
                progress=False
            )
            
            # Extract closing prices for this batch
            if not batch_data.empty and 'Close' in batch_data.columns:
                closes = batch_data['Close']
                if isinstance(closes, pd.Series):
                    all_data[batch[0]] = closes
                else:
                    for ticker in closes.columns:
                        all_data[ticker] = closes[ticker]
                print(f"Successfully downloaded {len(batch)} stocks")
            else:
                print(f"No data returned for batch: {batch}")
            
        except Exception as e:
            print(f"Error downloading batch {batch}: {e}")
        
        # Add delay to avoid rate limiting
        if i + batch_size < len(tickers):
            print(f"Waiting {delay} seconds before next batch...")
            time.sleep(delay)
    
    if all_data:
        return pd.DataFrame(all_data)
    else:
        return pd.DataFrame()

In [6]:
# Download the philippine index
closing_df = download_stocks_in_batches(
    ['PSEI.PS'], 
    batch_size=5, 
    delay=5
)

if not closing_df.empty:
    closing_df.to_pickle('data/philippine index.pkl')

Downloading batch 1: ['PSEI.PS']
Successfully downloaded 1 stocks


In [7]:
# Download the chinese and philippine indices
chinese_indices = [
    # 中国内地 (China Mainland)
    '000001.SS', '399001.SZ', '399006.SZ', '000300.SS',
    '000905.SS', '000852.SS', '399673.SZ', '000016.SS',
    '000688.SS',
    
    # 香港 (Hong Kong)
    '^HSI', '^HSCE', '^HSCC', '^HSTECH', 'GOVT.HK',
    
    # 台湾 (Taiwan)
    '^TWII', '^TPEx', '0050.TW', '006208.TW',
    
    # 新加坡 (Singapore) - 与中国相关指数
    'STI.SI', 'F3E.SI', 'XINA50.SI',
    
    # 美国 (US) - 中国相关ETF
    'FXI', 'KWEB', 'CQQQ', 'MCHI', 'GXC',
]

closing_df = download_stocks_in_batches(
    chinese_indices, 
    batch_size=5, 
    delay=5
)

if not closing_df.empty:
    closing_df.to_pickle('data/chinese indices.pkl')

Downloading batch 1: ['000001.SS', '399001.SZ', '399006.SZ', '000300.SS', '000905.SS']



2 Failed downloads:
['399006.SZ', '000905.SS']: YFPricesMissingError('possibly delisted; no price data found  (1d 2000-01-01 -> 2026-01-01)')


Successfully downloaded 5 stocks
Waiting 5 seconds before next batch...
Downloading batch 2: ['000852.SS', '399673.SZ', '000016.SS', '000688.SS', '^HSI']



4 Failed downloads:
['000852.SS', '000016.SS', '399673.SZ', '000688.SS']: YFPricesMissingError('possibly delisted; no price data found  (1d 2000-01-01 -> 2026-01-01)')


Successfully downloaded 5 stocks
Waiting 5 seconds before next batch...
Downloading batch 3: ['^HSCE', '^HSCC', '^HSTECH', 'GOVT.HK', '^TWII']



2 Failed downloads:
['GOVT.HK', '^HSTECH']: YFTzMissingError('possibly delisted; no timezone found')


Successfully downloaded 5 stocks
Waiting 5 seconds before next batch...
Downloading batch 4: ['^TPEx', '0050.TW', '006208.TW', 'STI.SI', 'F3E.SI']



3 Failed downloads:
['F3E.SI', '^TPEX', 'STI.SI']: YFTzMissingError('possibly delisted; no timezone found')


Successfully downloaded 5 stocks
Waiting 5 seconds before next batch...
Downloading batch 5: ['XINA50.SI', 'FXI', 'KWEB', 'CQQQ', 'MCHI']



1 Failed download:
['XINA50.SI']: YFTzMissingError('possibly delisted; no timezone found')


Successfully downloaded 5 stocks
Waiting 5 seconds before next batch...
Downloading batch 6: ['GXC']
Successfully downloaded 1 stocks


In [9]:
# store data in variables
filepath = r'data/chinese indices.pkl'
with open(filepath, 'rb') as f:
    chinese_data = pickle.load(f)

filepath = r'data/philippine index.pkl'
with open(filepath, 'rb') as f:
    psei_data = pickle.load(f)

# remove the invalid tickers
chinese_data.dropna(how='all', axis=1, inplace=True)
psei_data.dropna(how='all', axis=1, inplace=True)

## Data Preprocessing

## Dynamic Time Warping