In [3]:
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt

# Functions
# Add the directory containing the module to sys.path
sys.path.append(os.path.abspath('functions'))
from preprocessing import read_csv_files, check_missing_values, forward_fill, backward_fill, linear_interpolation, calculate_volatility, adf_test, check_date_range, extract_date_range, map_date_range, create_volatility_df, stationary_transformation
from spillover import calculate_avg_spillover_table, calculate_net_pairwise_spillover_table

## Data Preprocessing

In [4]:
# Retrieve a list of DataFrames
dataframes = read_csv_files("data")

In [5]:
# Convert each column in every DataFrame to lowercase
for key in dataframes:
    dataframes[key].columns = map(str.lower, dataframes[key].columns)

Converting the `date` column to datetime object, this process has to be handled individually since each DataFrame has a different date format. We cannot let Pandas infers the date format for each DataFrame since it can be prone to infer the wrong format. Below are the format of each DataFrame:
- Philippines: MM/DD/YYYY
- Singapore: MM/DD/YYYY
- India: YYYY-MM-DD
- United Kingdom: DD/MM/YYYY
- Mexico: YYYY-MM-DD
- Japan: YYYY-MM-DD
- Vietnam: DD/MM/YYYY
- Korea: YYYY-MM-DD
- Thailand: YYYY-MM-DD
- Brazil: YYYY-MM-DD
- Malaysia: DD/MM/YYYY
- Switzerland: YYYY-MM-DD
- China: DD/MM/YYYY
- Russia: YYYY-MM-DD
- United States: YYYY-MM-DD

In [10]:
date_format_mapping = {
  'philippines': '%m/%d/%Y',
  'singapore': '%m/%d/%Y',
  'india': '%Y-%m-%d',
  'uk': '%d/%m/%Y',
  'mexico': '%Y-%m-%d',
  'japan': '%Y-%m-%d',
  'vietnam': '%d/%m/%Y',
  'korea': '%Y-%m-%d',
  'thailand': '%Y-%m-%d',
  'brazil': '%Y-%m-%d',
  'malaysia': '%d/%m/%Y',
  'switzerland': '%Y-%m-%d',
  'china': '%d/%m/%Y',
  'russia': '%Y-%m-%d',
  'us': '%Y-%m-%d',
}

# Convert the date columns to datetime objects
for key in dataframes:
    try:
      dataframes[key]['date'] = pd.to_datetime(
        dataframes[key]['date'], 
        format=date_format_mapping[key]
      )
    except Exception as e:
      print(f"Error occurred for country: {key}")
      print(f"Error message: {str(e)}")

In [6]:
# Sort the dataframes by date in ascending order
for key in dataframes:
    dataframes[key] = dataframes[key].sort_values(by='date')

In [7]:
# Reset the index of the dataframes
for key in dataframes:
    dataframes[key] = dataframes[key].reset_index(drop=True)

In [8]:
# Extract only open, high, low, close columns
for key in dataframes:
    dataframes[key] = dataframes[key][['date', 'open', 'high', 'low', 'close']]

In [9]:
for df in dataframes.values():
    print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3055 entries, 0 to 3054
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    3055 non-null   object
 1   open    3055 non-null   object
 2   high    3055 non-null   object
 3   low     3055 non-null   object
 4   close   3055 non-null   object
dtypes: object(5)
memory usage: 119.5+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3181 entries, 0 to 3180
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    3181 non-null   object
 1   open    3181 non-null   object
 2   high    3181 non-null   object
 3   low     3181 non-null   object
 4   close   3181 non-null   object
dtypes: object(5)
memory usage: 124.4+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4101 entries, 0 to 4100
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date 

Since the columns open, high, low, close have different dtypes (`float64` and `object`) for different DataFrame, they should be converted to `float64`.

In [11]:
# Convert open, high, low, close columns to float
for key in dataframes:
  for col in ['open', 'high', 'low', 'close']:
    if dataframes[key][col].dtype == 'object':
      dataframes[key][col] = dataframes[key][col].str.replace(',', '').astype(float)


In [12]:
# Check whether the columns have been converted to float
for df in dataframes.values():
    print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3055 entries, 0 to 3054
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    3055 non-null   datetime64[ns]
 1   open    3055 non-null   float64       
 2   high    3055 non-null   float64       
 3   low     3055 non-null   float64       
 4   close   3055 non-null   float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 119.5 KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3181 entries, 0 to 3180
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    3181 non-null   datetime64[ns]
 1   open    3181 non-null   float64       
 2   high    3181 non-null   float64       
 3   low     3181 non-null   float64       
 4   close   3181 non-null   float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 124.4 KB
None
<class 'pandas.core.frame.DataFrame'>
Ra

##### Data imputation

This section aims to handle the missing values present in each DataFrame. Since these stock datasets will eventually be fed into a Vector Auto-regressive (VAR) model, it is quite important to choose a data imputation method that will preserve the temporal and cross-sectional relationships among countries.

Data imputation will be performed in two separate sections since one of our following step will create more missing values. A suitable method is chosen for each step, details are below:

1. Using linear interpolation to fill in the NA values originally presented in each dataset.
2. Dividing the imputed datasets from step 1 into 5 windows as defined in the Window Extraction step, resulting in 5 dictionaries containing DataFrames of countries with available data in each period.
3. The goal of this step is to normalize the date range in each time window since every country has their own public holidays, making the number of available daily trading data different for each country. Performing this step will ensure that holidays data will be included and the number of daily data available for each country within a time window is the same. For each DataFrame in a time window, map the available data into a new empty DataFrame with a date column containing every business days (excluding public holidays) of the associated period. Then, missing data from the mapped DataFrame will be imputed using forward filling method.
4. Calculate daily volatility for each DataFrame in every time window.
5. Create a single DataFrame for each time window with each column representing a country's volatility calculation.


In [13]:
check_missing_values(dataframes)

Missing values for philippines:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for singapore:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for india:
date      0
open     39
high     39
low      39
close    39
dtype: int64


Missing values for uk:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for mexico:
date      0
open     49
high     49
low      49
close    49
dtype: int64


Missing values for japan:
date      0
open     93
high     93
low      93
close    93
dtype: int64


Missing values for vietnam:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for korea:
date      0
open     66
high     66
low      66
close    66
dtype: int64


Missing values for thailand:
date      0
open     73
high     73
low      73
close    73
dtype: int64


Missing values for brazil:
date       0
open     462
high     462
low      462
close    462
dtype:

In [14]:
# Impute missing values in the dataframes using linear interpolation method
dataframes = linear_interpolation(dataframes)

In [15]:
# Check whether the missing values have been imputed
check_missing_values(dataframes)

Missing values for philippines:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for singapore:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for india:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for uk:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for mexico:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for japan:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for vietnam:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for korea:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for thailand:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for brazil:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for malaysi

##### Window extraction

In this section, I will perform window extraction for the following time period:
- **Window 1:** 02.01.2002 to 17.09.2007
- **Window 2:** 18.09.2007 to 27.10.2011 (India's dataset starts from 18.09.2007)
- **Window 3:** 28.10.2011 to 31.12.2018 (Philippines's dataset starts from 28.10.2011)
- **Window 4:** 02.01.2019 to 31.12.2022 
- **Window 5:** 02.01.2023 to 30.04.2024 

##### 02.01.2002 - 17.09.2007: Recovery period from Dot-com Bubble

In [16]:
# Check whether each DataFrame contains data from 02.01.2002 to 17.09.2007
window_1_start_date = '2002-01-02'
window_1_end_date = '2007-09-17'
check_date_range(dataframes, window_1_start_date, window_1_end_date)

{'philippines': False,
 'singapore': False,
 'india': False,
 'uk': True,
 'mexico': True,
 'japan': True,
 'vietnam': False,
 'korea': True,
 'thailand': True,
 'brazil': False,
 'malaysia': False,
 'switzerland': True,
 'china': True,
 'russia': False,
 'us': True}

The following countries do not have data available for this time period:
1. Philippine
2. Singapore
3. India
4. Vietnam
5. Brazil
6. Malaysia
7. Russia

These 7 countries will be be considered in VAR experiment for this time period, with the addition of China since it only has data from 2005.

In [17]:
dfs_window_1 = extract_date_range(dataframes, window_1_start_date, window_1_end_date)

# Check whether each DataFrame contains data from 02.01.2002 to 17.09.2007
dfs_window_1

{'philippines': Empty DataFrame
 Columns: [date, open, high, low, close]
 Index: [],
 'singapore': Empty DataFrame
 Columns: [date, open, high, low, close]
 Index: [],
 'india': Empty DataFrame
 Columns: [date, open, high, low, close]
 Index: [],
 'uk':            date    open    high     low   close
 0    2002-02-01  5164.8  5227.4  5164.8  5189.7
 1    2005-02-01  4852.3  4906.2  4852.3  4906.2
 2    2006-02-01  5760.3  5816.0  5746.2  5801.6
 3    2007-02-01  6203.1  6300.3  6203.1  6282.2
 17   2002-03-01  5101.0  5172.3  5101.0  5169.0
 ...         ...     ...     ...     ...     ...
 5568 2005-10-31  5213.4  5318.4  5213.4  5317.3
 5569 2006-10-31  6126.8  6149.9  6110.9  6129.2
 5582 2002-12-31  3900.6  3949.1  3890.7  3940.4
 5583 2003-12-31  4470.4  4491.8  4470.4  4476.9
 5584 2004-12-31  4820.1  4822.3  4801.1  4814.3
 
 [1443 rows x 5 columns],
 'mexico':            date          open          high           low         close
 0    2002-01-02   6386.180176   6415.850098   6

In [20]:
# Remove countries that do not have data for the specified date range (02.01.2002 to 17.09.2007) from the dictionary
for key in list(dfs_window_1.keys()):
  if dfs_window_1[key].empty:
    del dfs_window_1[key]

# Remove the 'china' DataFrame from the dictionary
del dfs_window_1['china']

In [18]:
len(dfs_window_1)

15

In [19]:
# Map dfs_window_1 to a new date range
dfs_window_1 = map_date_range(dfs_window_1, window_1_start_date, window_1_end_date)

In [21]:
# Check for missing values in the dfs_window_1
check_missing_values(dfs_window_1)

Missing values for philippines:
date        0
open     1489
high     1489
low      1489
close    1489
dtype: int64


Missing values for singapore:
date        0
open     1489
high     1489
low      1489
close    1489
dtype: int64


Missing values for india:
date        0
open     1489
high     1489
low      1489
close    1489
dtype: int64


Missing values for uk:
date      0
open     46
high     46
low      46
close    46
dtype: int64


Missing values for mexico:
date     0
open     5
high     5
low      5
close    5
dtype: int64


Missing values for japan:
date      0
open     13
high     13
low      13
close    13
dtype: int64


Missing values for vietnam:
date        0
open     1489
high     1489
low      1489
close    1489
dtype: int64


Missing values for korea:
date      0
open     21
high     21
low      21
close    21
dtype: int64


Missing values for thailand:
date      0
open     27
high     27
low      27
close    27
dtype: int64


Missing values for brazil:
date        0
op

In [22]:
# Impute missing values in dfs_window_1 using forward fill method
dfs_window_1 = forward_fill(dfs_window_1)

  df.fillna(method="ffill", inplace=True)


In [23]:
# Check for missing values in the dfs_window_1
check_missing_values(dfs_window_1)

Missing values for philippines:
date        0
open     1489
high     1489
low      1489
close    1489
dtype: int64


Missing values for singapore:
date        0
open     1489
high     1489
low      1489
close    1489
dtype: int64


Missing values for india:
date        0
open     1489
high     1489
low      1489
close    1489
dtype: int64


Missing values for uk:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for mexico:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for japan:
date     0
open     2
high     2
low      2
close    2
dtype: int64


Missing values for vietnam:
date        0
open     1489
high     1489
low      1489
close    1489
dtype: int64


Missing values for korea:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for thailand:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for brazil:
date        0
open     1489
high    

Japan still has 2 missing values. These missing values are probably the first two days of the period. Let's use backward fill to handle these two missing values.

In [24]:
dfs_window_1 = backward_fill(dfs_window_1)

  df.fillna(method="bfill", inplace=True)


In [25]:
# Check for missing values in the dfs_window_1 again to make sure the missing values have been imputed
check_missing_values(dfs_window_1)

Missing values for philippines:
date        0
open     1489
high     1489
low      1489
close    1489
dtype: int64


Missing values for singapore:
date        0
open     1489
high     1489
low      1489
close    1489
dtype: int64


Missing values for india:
date        0
open     1489
high     1489
low      1489
close    1489
dtype: int64


Missing values for uk:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for mexico:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for japan:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for vietnam:
date        0
open     1489
high     1489
low      1489
close    1489
dtype: int64


Missing values for korea:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for thailand:
date     0
open     0
high     0
low      0
close    0
dtype: int64


Missing values for brazil:
date        0
open     1489
high    

In [26]:
# Calculate the volatility for each country
dfs_window_1 = calculate_volatility(dfs_window_1)

In [27]:
# Create volatility DataFrame for window 1
df_window_1 = create_volatility_df(dfs_window_1)

In [28]:
# Print out the first 5 rows of the volatility DataFrame
df_window_1.head()

Unnamed: 0,date,philippines,singapore,india,uk,mexico,japan,vietnam,korea,thailand,brazil,malaysia,switzerland,russia,us
0,2002-01-02,,,,17.955641,5.943942,19.791884,,51.994083,12.228153,,,20.881106,,21.664973
1,2002-01-03,,,,18.456913,21.957613,19.791884,,25.862243,17.854374,,,12.890345,,8.171472
2,2002-01-04,,,,12.755441,10.922138,19.791884,,21.446678,16.412695,,,11.019224,,13.93611
3,2002-01-07,,,,19.064627,14.29972,16.900705,,41.70036,18.717009,,,17.311078,,14.120694
4,2002-01-08,,,,15.443547,10.420974,17.203246,,22.994618,12.864248,,,12.207484,,11.487229


Performing ADF Stationarity Test

In [30]:
for country in df_window_1.drop(columns=['date']).columns:
    adf_test(df_window_1, country)

MissingDataError: exog contains inf or nans

In [None]:
df_window_1 = stationary_transformation(df_window_1, 'switzerland')

In [None]:
df_window_1 = df_window_1.dropna()

In [None]:
df_window_1.head()

In [None]:
# Peform ADF test again to make sure that the data is stationary
for country in df_window_1.drop(columns=['date']).columns:
    adf_test(df_window_1, country)

##### 18.09.2007 - 27.10.2011: Global Financial Crisis

In [None]:
# Check whether each DataFrame contains data from 18.09.2007 to 27.10.2011
window_2_start_date = '2007-09-18'
window_2_end_date = '2011-10-27'
check_date_range(dataframes, window_2_start_date, window_2_end_date)

Most countries have data from 18.09.2007 to 27.10.2011, with the exception of Philippines and Russia. In addition, there are Singapore, Vietnam, Brazil, and Malaysia which only has data from 07.03.2011, 05.01.2009, 24.02.2010, and 20.05.2010 respectively. These 4 countries will be remove from this time window.

In [None]:
dfs_window_2 = extract_date_range(dataframes, window_2_start_date, window_2_end_date)

# Check whether each DataFrame contains data from 18.09.2007 to 27.10.2011
dfs_window_2

In [None]:
# Remove countries that do not have data for the specified date range (02.01.2002 to 17.09.2007) from the dictionary
addtional_columns = ['singapore', 'vietnam', 'brazil', 'malaysia']

for key in list(dfs_window_2.keys()):
  if dfs_window_2[key].empty or key in addtional_columns:
    del dfs_window_2[key]

In [None]:
len(dfs_window_2)

In [None]:
# Map dfs_window_2 to a new date range
dfs_window_2 = map_date_range(dfs_window_2, window_2_start_date, window_2_end_date)

In [None]:
# Check for missing values in the dfs_window_2
check_missing_values(dfs_window_2)

In [None]:
# Perform forward filling to fill in the missing values in dfs_window_2
dfs_window_2 = forward_fill(dfs_window_2)

In [None]:
# Check for missing values in the dfs_window_2
check_missing_values(dfs_window_2)

In [None]:
# Calculate the volatility for each country
dfs_window_2 = calculate_volatility(dfs_window_2)

In [None]:
# Create volatility DataFrame for window 2
df_window_2 = create_volatility_df(dfs_window_2)

In [None]:
df_window_2.head()

Performing ADF test

In [None]:
for country in df_window_2.drop(columns=['date']).columns:
    adf_test(df_window_2, country)

Since all the variables are stationary, no transformation is needed.

##### 28.10.2011 - 31.12.2018: Recovery from Global Financial Crisis

In [None]:
# Check whether each DataFrame contains data from 28.10.2011 to 31.12.2018
window_3_start_date = '2011-10-28'
window_3_end_date = '2018-12-31'
check_date_range(dataframes, window_3_start_date, window_3_end_date)

Most DataFrame appears to have data from 28.10.2011 to 31.12.2018, except from Russia, which only has data from 2013. Russia will be removed from this time window.

In [None]:
dfs_window_3 = extract_date_range(dataframes, window_3_start_date, window_3_end_date)

# Check whether each DataFrame contains data from 28.10.2011 - 31.12.2018
dfs_window_3

In [None]:
del dfs_window_3['russia']

In [None]:
len(dfs_window_3)

In [None]:
# Map dfs_window_3 to a new date range
dfs_window_3 = map_date_range(dfs_window_3, window_3_start_date, window_3_end_date)

In [None]:
# Check for missing values in the dfs_window_3
check_missing_values(dfs_window_3)

In [None]:
# Peform forward fill to handle the missing values
dfs_window_3 = forward_fill(dfs_window_3)

In [None]:
# Check for missing values in the dfs_window_3 to make sure there is no missing values left
check_missing_values(dfs_window_3)

In [None]:
# Calculate the volatility for each country
dfs_window_3 = calculate_volatility(dfs_window_3)

In [None]:
# Create volatility DataFrame for window 3
df_window_3 = create_volatility_df(dfs_window_3)

In [None]:
df_window_3.head()

Performing ADF test

In [None]:
for country in df_window_3.drop(columns=['date']).columns:
    adf_test(df_window_3, country)

All of the variables are stationary. Therefore, no transformation is needed.

##### 02.01.2019 - 31.12.2022: COVID-19 pandemic

In [None]:
# Check whether each DataFrame contains data from 02.01.2019 to 31.12.2022
window_4_start_date = '2019-01-02'
window_4_end_date = '2022-12-31'
check_date_range(dataframes, window_4_start_date, window_4_end_date)

In [None]:
dfs_window_4 = extract_date_range(dataframes, window_4_start_date, window_4_end_date)

# Check whether each DataFrame contains data from 02.01.2019 to 31.12.2022
dfs_window_4

In [None]:
# Map dfs_window_4 to a new date range
dfs_window_4 = map_date_range(dfs_window_4, window_4_start_date, window_4_end_date)

In [None]:
# Check for missing values in the dfs_window_4
check_missing_values(dfs_window_4)

In [None]:
# Peforming forward filling imputation to handle the missing values
dfs_window_4 = forward_fill(dfs_window_4)

In [None]:
# Check for missing values in the dfs_window_4 to make sure that there is no missing data left
check_missing_values(dfs_window_4)

In [None]:
# Perform backward filling imputation to handle the remaining missing values
dfs_window_4 = backward_fill(dfs_window_4)

In [None]:
# Check for missing values in the dfs_window_4 to make sure that there is no missing data left
check_missing_values(dfs_window_4)

In [None]:
# Calculate the volatility for each country
dfs_window_4 = calculate_volatility(dfs_window_4)

In [None]:
# Create volatility DataFrame for window 4
df_window_4 = create_volatility_df(dfs_window_4)

Perform ADF test

In [None]:
for country in df_window_4.drop(columns=['date']).columns:
    adf_test(df_window_4, country)

All of the variables are stationary. Therefore, no transformation is needed.

##### 02.01.2023 - 30.04.2024: Recovery from COVID-19 pandemic

In [None]:
# Check whether each DataFrame contains data from 02.01.2023 to 30.04.2024
window_5_start_date = '2023-01-02'
window_5_end_date = '2024-04-30'
check_date_range(dataframes, window_5_start_date, window_5_end_date)

In [None]:
dfs_window_5 = extract_date_range(dataframes, window_5_start_date, window_5_end_date)

# Check whether each DataFrame contains data from 02.01.2023 to 30.04.2024
dfs_window_5

In [None]:
# Map dfs_window_4 to a new date range
dfs_window_5 = map_date_range(dfs_window_5, window_5_start_date, window_5_end_date)

In [None]:
# Check for missing values in the dfs_window_5
check_missing_values(dfs_window_5)

In [None]:
# Perform forward filling imputation
dfs_window_5 = forward_fill(dfs_window_5)

In [None]:
# Check for missing values in the dfs_window_5 again to make sure there is no missing values left
check_missing_values(dfs_window_5)

In [None]:
# Perform backward filling to handle the remaining missing values
dfs_window_5 = backward_fill(dfs_window_5)

In [None]:
# Check for missing values in the dfs_window_5 again to make sure there is no missing values left
check_missing_values(dfs_window_5)

In [None]:
# Calculate the volatility for each country
dfs_window_5 = calculate_volatility(dfs_window_5)

In [None]:
# Create volatility DataFrame for window 5
df_window_5 = create_volatility_df(dfs_window_5)

Peform ADF test

In [None]:
for country in df_window_5.drop(columns=['date']).columns:
    adf_test(df_window_5, country)

All of the variables are stationary. Therefore, no transformation is needed.

## VAR Modelling

In [None]:
# Create a list of windowed volatility DataFrames
windowed_vol_dfs = [df_window_1, df_window_2, df_window_3, df_window_4, df_window_5]

In [None]:
for i, df_volatility in enumerate(windowed_vol_dfs):
    window = f"window_{i+1}"
    print(f"Window: {window}")
    print(df_volatility.drop(columns=['date']).corr(method='pearson'))
    print()

##### Average Spillover Table

In [None]:
for i, df_volatility in enumerate(windowed_vol_dfs):
  window = f"window_{i+1}"
  print(f"Calculating spillover for {window}...")
  # Calculate the average spillover for each window
  spillovers_table, lag_order, forecast_horizon = calculate_avg_spillover_table(
      df_volatility.drop(columns=["date"])
  )
  print(f"Finished calculating spillover for {window}!")
  print(f"Lag order: {lag_order}")
  print(f"Forecast horizon: {forecast_horizon}")
  print(f"Saving spillover table for {window}...")
  # Save the spillover table to a CSV file
  spillovers_table.to_csv(
      f"output/var/{window}_spillover_table.csv", index=True
  )
  print(f"Finished saving spillover table for {window}!")
  print()

##### Net Pair-wise Spillover Table

In [None]:
for i in range(5):
  window = f"window_{i+1}"
  print(f"Calculating net pair-wise spillover for {window}...")
  # Read the spillover table for the current window
  spillover_table = pd.read_csv(f"output/var/{window}_spillover_table.csv", index_col=0)
  # Calculate the net pair-wise spillover for the current window
  normalized_spillover_table = spillover_table.drop(index=['Directional TO others', 'Total Spillover Index']).drop(columns=['Directional FROM others'])
  net_pairwise_spillover_table = calculate_net_pairwise_spillover_table(normalized_spillover_table)
  print(f"Finished calculating net pair-wise spillover for {window}!")

  print(net_pairwise_spillover_table)
  print()
  # print(f"Saving net pair-wise spillover table for {window}...")
  # # Save the net pair-wise spillover table to a CSV file
  # net_pairwise_spillover_table.to_csv(f"output/var/{window}_net_pairwise_spillover_table.csv", index=True)
  # print(f"Finished saving net pair-wise spillover table for {window}!")

  