In [167]:
pip install -r requirements.txt

Collecting openpyxl (from -r requirements.txt (line 7))
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl->-r requirements.txt (line 7))
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [openpyxl]1/2[0m [openpyxl]
[1A[2KSuccessfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import numpy as np
from urllib.request import urlopen
import certifi
import json
from config import FMP_API_KEY

import warnings
warnings.filterwarnings("ignore")

# Data Collection

In [None]:
def get_jsonparsed_data(url):
    response = urlopen(url, cafile=certifi.where())
    data = response.read().decode("utf-8")
    return json.loads(data)

idx = pd.read_csv('data/index_symbols.csv')
comm = pd.read_csv('data/commodity_symbols.csv')

url = f"https://financialmodelingprep.com/stable/index-list?apikey={FMP_API_KEY}"
fmp_idx = pd.DataFrame(get_jsonparsed_data(url))
fmp_idx = fmp_idx[fmp_idx['symbol'].isin(idx['FMP API Symbol'])].reset_index(drop=True)
fmp_idx['fx_symbol'] = fmp_idx['currency'].apply(lambda x: x+'USD' if x != 'USD' else None)

url = f"https://financialmodelingprep.com/stable/commodities-list?apikey={FMP_API_KEY}"
fmp_comm = pd.DataFrame(get_jsonparsed_data(url))
fmp_comm = fmp_comm[fmp_comm['symbol'].isin(comm['FMP API Symbol'])].reset_index(drop=True)

### Equity Index Data

In [None]:
symbol = fmp_idx.loc[0, 'symbol']
url = f"https://financialmodelingprep.com/stable/historical-price-eod/full?symbol={symbol}&from=1990-01-01&to=2025-09-26&apikey={FMP_API_KEY}"
df = pd.DataFrame(get_jsonparsed_data(url))[['symbol', 'date', 'close', 'volume']]
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')
df.columns = pd.MultiIndex.from_product([[df['symbol'].iloc[0]], df.columns])
df = df.drop(columns=df.columns[0]).sort_index()

for i in range(1, len(fmp_idx)):
    symbol = fmp_idx.loc[i, 'symbol']
    url = f"https://financialmodelingprep.com/stable/historical-price-eod/full?symbol={symbol}&from=1990-01-01&to=2025-09-26&apikey={FMP_API_KEY}"
    temp = pd.DataFrame(get_jsonparsed_data(url))[['symbol', 'date', 'close', 'volume']]
    temp['date'] = pd.to_datetime(temp['date'])
    temp = temp.set_index('date')
    temp.columns = pd.MultiIndex.from_product([[temp['symbol'].iloc[0]], temp.columns])
    temp = temp.drop(columns=temp.columns[0]).sort_index()
    df = pd.concat([df, temp], axis=1)

msci = pd.read_excel('data/MSCI_China_Index.xlsx')[:-1]
msci['Date'] = pd.to_datetime(msci['Date'])
# msci = msci.reindex(index=df.index)
msci = msci.set_index('Date')
msci.columns = pd.MultiIndex.from_product([['MSCI_China (USD)'], ['close']])
msci[('MSCI_China', 'volume')] = np.nan

df.join(msci, how='left').to_csv('data/index_data.csv')

### Commodity Data

In [None]:
symbol = fmp_comm.loc[0, 'symbol']
url = f"https://financialmodelingprep.com/stable/historical-price-eod/full?symbol={symbol}&from=1990-01-01&to=2025-09-26&apikey={FMP_API_KEY}"
df = pd.DataFrame(get_jsonparsed_data(url))[['symbol', 'date', 'close', 'volume']]
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')
df.columns = pd.MultiIndex.from_product([[df['symbol'].iloc[0]], df.columns])
df = df.drop(columns=df.columns[0]).sort_index()

for i in range(1, len(fmp_comm)):
    symbol = fmp_comm.loc[i, 'symbol']
    url = f"https://financialmodelingprep.com/stable/historical-price-eod/full?symbol={symbol}&from=1990-01-01&to=2025-09-26&apikey={FMP_API_KEY}"
    temp = pd.DataFrame(get_jsonparsed_data(url))[['symbol', 'date', 'close', 'volume']]
    temp['date'] = pd.to_datetime(temp['date'])
    temp = temp.set_index('date')
    temp.columns = pd.MultiIndex.from_product([[temp['symbol'].iloc[0]], temp.columns])
    temp = temp.drop(columns=temp.columns[0]).sort_index()
    df = pd.concat([df, temp], axis=1)

# df.to_csv('data/commodity_data.csv')
nickel = pd.read_csv('data/Nickel_futures.csv', parse_dates=['Date'], dayfirst=True, index_col='Date')\
    .rename_axis('date').sort_index().rename(columns={'Price': 'close'})[['close']]
nickel.columns = pd.MultiIndex.from_product([['Nickel'], nickel.columns])
nickel[('Nickel', 'volume')] = np.nan
df.join(nickel, how='left').to_csv('data/commodity_data.csv')

### FX Data

In [244]:
fx_symbols_retieved = []
symbol = fmp_idx.loc[0, 'fx_symbol']
fx_symbols_retieved.append(symbol)
url = f"https://financialmodelingprep.com/stable/historical-price-eod/full?symbol={symbol}&from=1990-01-01&to=2025-09-26&apikey={FMP_API_KEY}"
df = pd.DataFrame(get_jsonparsed_data(url))[['symbol', 'date', 'close']]
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')
df.rename(columns={'close': df['symbol'].iloc[0]}, inplace=True)
df = df.drop(columns=df.columns[0]).sort_index()

for i in range(1, len(fmp_idx)):
    symbol = fmp_idx.loc[i, 'fx_symbol']
    if symbol is None or symbol in fx_symbols_retieved:
        continue
    fx_symbols_retieved.append(symbol)
    url = f"https://financialmodelingprep.com/stable/historical-price-eod/full?symbol={symbol}&from=1990-01-01&to=2025-09-26&apikey={FMP_API_KEY}"
    temp = pd.DataFrame(get_jsonparsed_data(url))[['symbol', 'date', 'close']]
    temp['date'] = pd.to_datetime(temp['date'])
    temp = temp.set_index('date')
    temp.rename(columns={'close': temp['symbol'].iloc[0]}, inplace=True)
    temp = temp.drop(columns=temp.columns[0]).sort_index()
    df = pd.concat([df, temp], axis=1)

df.to_csv('data/fx_data.csv')

# Data Preprocessing

### Read Equity Index, Commodity, and FX data from saved csv files

In [261]:
# All commodities are in USD
commodity_data = pd.read_csv('data/commodity_data.csv', index_col=0, header=[0,1])
commodity_data.index = pd.to_datetime(commodity_data.index)
commodity_data.tail()

Unnamed: 0_level_0,ALIUSD,ALIUSD,GCUSD,GCUSD,KEUSX,KEUSX,ZCUSX,ZCUSX,HGUSD,HGUSD,...,LEUSX,LEUSX,NGUSD,NGUSD,KCUSX,KCUSX,BZUSD,BZUSD,Nickel,Nickel
Unnamed: 0_level_1,close,volume,close,volume,close,volume,close,volume,close,volume,...,close,volume,close,volume,close,volume,close,volume,close,volume
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-09-22,2530.25,2612.0,3775.1,219170.0,502.25,31994.0,421.75,189769.0,4.632,22730.0,...,242.35,42757.0,2.806,163184.0,367.35,25299.0,66.57,24400.0,15197.13,
2025-09-23,2522.0,44.0,3815.7,291564.0,511.5,31994.0,426.25,189769.0,4.644,23732.0,...,241.125,26537.0,2.853,163184.0,350.15,25338.0,67.63,27893.0,15335.38,
2025-09-24,2533.5,2113.0,3768.1,236560.0,506.75,24625.0,424.25,145931.0,4.8135,72671.0,...,238.975,23186.0,2.858,70892.0,367.75,25338.0,69.31,28459.0,15413.13,
2025-09-25,2551.0,1.0,3771.1,262183.0,512.25,20671.0,425.75,146851.0,4.758,65306.0,...,236.05,28829.0,2.904,74214.0,371.35,18066.0,69.42,28459.0,15252.88,
2025-09-26,2544.75,1914.0,3809.0,214083.0,505.5,20671.0,422.0,146851.0,4.7715,35205.0,...,236.5,18874.0,3.206,168752.0,378.05,14512.0,70.13,21388.0,15150.38,


In [262]:
equity_index_data = pd.read_csv('data/index_data.csv', index_col=0, header=[0,1])
equity_index_data.index = pd.to_datetime(equity_index_data.index)
equity_index_data.tail()

Unnamed: 0_level_0,^GSPTSE,^GSPTSE,^TWII,^TWII,^AXJO,^AXJO,^GSPC,^GSPC,^N225,^N225,...,^NSEI,^NSEI,^NDX,^NDX,^GDAXI,^GDAXI,^FCHI,^FCHI,MSCI_China (USD),MSCI_China
Unnamed: 0_level_1,close,volume,close,volume,close,volume,close,volume,close,volume,...,close,volume,close,volume,close,volume,close,volume,close,volume
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-09-22,29959.0,352957619.0,25880.6,4316700.0,8810.9,611200.0,6693.74,5642620000.0,45493.66,116600000.0,...,25202.35,254509892.0,24761.07,1429210000.0,23527.05,53681300.0,7830.11,42599900.0,190.769091,
2025-09-23,29815.6,296641424.0,26247.37,4775000.0,8845.9,742600.0,6656.93,5633620000.0,,,...,25169.5,299204338.0,24580.17,1294683000.0,23611.33,48153500.0,7872.02,48991400.0,189.082017,
2025-09-24,29757.0,312079721.0,26196.73,4309500.0,8764.5,715300.0,6637.98,5459180000.0,45630.31,131700000.0,...,25056.9,244382281.0,24503.57,1223742000.0,23666.81,54163200.0,7827.45,52936900.0,192.635829,
2025-09-25,29732.0,293274800.0,26023.85,5318500.0,8773.0,837400.0,6604.73,5874670000.0,45754.93,127200000.0,...,24890.85,342534768.0,24397.31,1435051000.0,23534.83,55308500.0,7795.42,51878400.0,192.769367,
2025-09-26,29761.3,297599800.0,25580.32,4370400.0,8787.7,832800.0,6643.71,5103110000.0,45354.99,147500000.0,...,24654.7,291537949.0,24503.85,1235069000.0,23739.47,45110800.0,7870.68,52920700.0,189.959098,


In [263]:
fx_data = pd.read_csv('data/fx_data.csv', index_col=0)
fx_data.index = pd.to_datetime(fx_data.index)
fx_data.tail()

Unnamed: 0_level_0,CADUSD,TWDUSD,AUDUSD,JPYUSD,KRWUSD,EURUSD,GBPUSD,CHFUSD,INRUSD
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2025-09-22,0.723,0.033099,0.6598,0.006767,0.000715,1.18025,1.35123,1.2607,0.011323
2025-09-23,0.7224,0.032954,0.65978,0.006773,0.000714,1.18148,1.35242,1.2629,0.011258
2025-09-24,0.7191,0.032913,0.65818,0.006719,0.000708,1.1738,1.3446,1.2575,0.011256
2025-09-25,0.717,0.032698,0.654,0.006675,0.000705,1.1666,1.33405,1.2492,0.011261
2025-09-26,0.7167,0.032815,0.65441,0.006687,0.000706,1.17002,1.33985,1.2525,0.011272


### Converting all Equity Indices to USD
Note: MSCI China Index is already in USD as it was downloaded directly from MSCI's website

In [265]:
# Convert all close prices to USD for indices not already in USD
usd_equity_index_data = equity_index_data.copy()

for idx, row in fmp_idx.iterrows():
    symbol = row['symbol']
    fx_symbol = row['fx_symbol']
    # Only convert if there is a corresponding fx_symbol (i.e., not already in USD)
    if fx_symbol and (symbol, 'close') in usd_equity_index_data.columns and fx_symbol in fx_data.columns:
        fx_series = fx_data[fx_symbol].reindex(usd_equity_index_data.index)
        usd_equity_index_data[(symbol, 'close')] = usd_equity_index_data[(symbol, 'close')] * fx_series

usd_equity_index_data.tail()

Unnamed: 0_level_0,^GSPTSE,^GSPTSE,^TWII,^TWII,^AXJO,^AXJO,^GSPC,^GSPC,^N225,^N225,...,^NSEI,^NSEI,^NDX,^NDX,^GDAXI,^GDAXI,^FCHI,^FCHI,MSCI_China (USD),MSCI_China
Unnamed: 0_level_1,close,volume,close,volume,close,volume,close,volume,close,volume,...,close,volume,close,volume,close,volume,close,volume,close,volume
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-09-22,21660.357,352957619.0,856.621979,4316700.0,5813.43182,611200.0,6693.74,5642620000.0,307.855597,116600000.0,...,285.366209,254509892.0,24761.07,1429210000.0,27767.800762,53681300.0,9241.487327,42599900.0,190.769091,
2025-09-23,21538.78944,296641424.0,864.955831,4775000.0,5836.347902,742600.0,6656.93,5633620000.0,,,...,283.358231,299204338.0,24580.17,1294683000.0,27896.314168,48153500.0,9300.63419,48991400.0,189.082017,
2025-09-24,21398.2587,312079721.0,862.212974,4309500.0,5768.61861,715300.0,6637.98,5459180000.0,306.590053,131700000.0,...,282.040466,244382281.0,24503.57,1223742000.0,27780.101578,54163200.0,9187.86081,52936900.0,192.635829,
2025-09-25,21317.844,293274800.0,850.927847,5318500.0,5737.542,837400.0,6604.73,5874670000.0,305.414158,127200000.0,...,280.295862,342534768.0,24397.31,1435051000.0,27455.732678,55308500.0,9094.136972,51878400.0,192.769367,
2025-09-26,21329.92371,297599800.0,839.418201,4370400.0,5750.758757,832800.0,6643.71,5103110000.0,303.288818,147500000.0,...,277.907778,291537949.0,24503.85,1235069000.0,27775.654689,45110800.0,9208.853014,52920700.0,189.959098,


### Computing Derived Data Fields