In [44]:
import pandas as pd 
import numpy as np
import pyreadr

In [45]:
daily_data = pd.read_csv('data/daily_returns.csv')
weekly_data = pd.read_csv('data/weekly_returns.csv')

In [46]:
# set the date as the index
daily_data['Date'] = pd.to_datetime(daily_data['Date'])

daily_data = daily_data.set_index('Date')


# do the same for weekly
weekly_data['Date'] = pd.to_datetime(weekly_data['Date'])

weekly_data = weekly_data.set_index('Date')



In [47]:
daily_data.isna().sum() / daily_data.shape[0]

BSESN    0.054585
BVSP     0.049901
FTSE     0.031886
GDAXI    0.025581
GSPC     0.033507
HSCE     0.061070
IBEX     0.025221
JKSE     0.063412
MXX      0.036390
N225     0.058548
TWII     0.056386
VIX      0.033507
VLIC     0.033507
dtype: float64

In [48]:
weekly_data.isna().sum() / weekly_data.shape[0]

BSESN    0.229917
BVSP     0.229224
FTSE     0.229224
GDAXI    0.229224
GSPC     0.229224
HSCE     0.229224
IBEX     0.229224
JKSE     0.236842
MXX      0.229224
N225     0.230609
TWII     0.239612
VIX      0.229224
VLIC     0.229224
dtype: float64

In [49]:
# impute data with the mean of the previous and next day

daily_data_ffill = daily_data.ffill()
daily_data_bfill = daily_data.bfill()

weekly_data_ffill = weekly_data.ffill()
weekly_data_bfill = weekly_data.bfill()

weekly_data = (weekly_data_ffill + weekly_data_bfill) / 2
daily_data = (daily_data_ffill + daily_data_bfill) / 2

find and remove outliers

In [50]:
# find outliers with iqr
def remove_outliers(data):
    for col in data.columns:
        threshold = 2

        Q1 = data[col].quantile(0.25)
        Q3 = data[col].quantile(0.75)
        IQR = Q3 - Q1

        # replace outliers with NA
        data.loc[(data[col] < (Q1 - threshold * IQR)) | (data[col] > (Q3 + threshold * IQR)), col] = np.nan
    return data 

daily_data = remove_outliers(daily_data)
weekly_data = remove_outliers(weekly_data)

replace the outliers with the mean of previous and next values

In [51]:
# impute data with the mean of the previous and next day

daily_data_ffill = daily_data.ffill()
daily_data_bfill = daily_data.bfill()

weekly_data_ffill = weekly_data.ffill()
weekly_data_bfill = weekly_data.bfill()

weekly_data = (weekly_data_ffill + weekly_data_bfill) / 2
daily_data = (daily_data_ffill + daily_data_bfill) / 2

In [52]:
print(daily_data.shape)

# select subset of data
dI="2001-01-01"
dF="2003-01-01"

daily_data = daily_data.loc[dI:dF]
weekly_data = weekly_data.loc[dI:dF]

print(daily_data.shape)

(5551, 13)
(520, 13)


check na

In [53]:
daily_data.isna().sum() / daily_data.shape[0]

BSESN    0.0
BVSP     0.0
FTSE     0.0
GDAXI    0.0
GSPC     0.0
HSCE     0.0
IBEX     0.0
JKSE     0.0
MXX      0.0
N225     0.0
TWII     0.0
VIX      0.0
VLIC     0.0
dtype: float64

### Calculating EWMA Variance from EMA formula

In [None]:
# start with EMA formula
# EMA = Price(t) * k + EMA(y) * (1 – k)
# where:
# t = today
# y = yesterday
# N = number of days in EMA
# k = 2/(N+1)

# then calculate EWMA based on the EMA
# EWMA = Price(t) * k + EWMA(y) * (1 – k)
# where:
# t = today
# y = yesterday
# N = number of days in EWMA
# k = 2/(N+1)

In [55]:
# Function to calculate EWMA-based variance using recursion
def calculate_ewma_variance(returns, lambda_value):
    ewma_variances = [returns[0]**2]  # Initialize with the variance of the first return
    for i in range(1, len(returns)):
        ewma_variances.append(lambda_value * ewma_variances[-1] + (1 - lambda_value) * returns[i-1]**2)
    return np.array(ewma_variances)

In [56]:
# Set lambda value for EWMA
lambda_value = 0.94

# Calculate EWMA-based variance
ewma_variances = calculate_ewma_variance(daily_data, lambda_value)

# Calculate regular historical volatility estimation (e.g., using cumulative sum of square returns)
historical_volatility = np.sqrt(np.cumsum(daily_data**2))

KeyError: 0