In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
from datetime import date
from yahooquery import Ticker
from utils_VaR import plot_scatter, plot_time_series_histogram, Security
from scipy.stats import ttest_ind
from statsmodels.stats.diagnostic import acorr_ljungbox

In [None]:
""" visualization """
# T: time horizon (in days) for computing risk metrics
# confidence level: 1-p for VaR, ES 
T = 120; p=5

t0='1920-01-01'
t1=date.today()

sp500 = Security('^GSPC')
sp500.set_df_historical_data(t0=t0, t1=t1)
sp500.set_df_pct_change()
sp500.set_df_risk(T=T, p=p, rolling_T=1) # to generate plot, set rolling T to 1. re-sampling is done in plot function for now
df = sp500.get_df_risk()
# read start and end date of past US recessions
recession_periods = pd.read_csv('../data/recession_periods_NBER.csv') # use dates classified by NBER

In [None]:
# scatter plot
plot_scatter(df, t0='1960-1-1', t1='2010-6-30', T=1, recession_periods=recession_periods)
# times series histogram
plot_time_series_histogram(df, t0='2005-1-1', t1='2010-6-30', T=T, recession_periods=recession_periods, date_format='%B-%Y')

In [2]:
""" collect analytic results for different assets """
# read recession periods
recession_periods = pd.read_csv('../data/recession_periods_NBER.csv') # use dates classified by NBER
recession_periods = recession_periods.astype({'Peak':'datetime64', 'Trough':'datetime64'})
peaks = recession_periods['Peak'].values
troughs = recession_periods['Trough'].values

# set asset ticker and set time horizon and p level for computing VaR, ES
securities = dict(sp500='^GSPC', agg='agg', gold='GC=F', oil='CL=F', GBPUSD='GBPUSD=X', tech_sec='XLK', energy_sec='XLE', russell_2k='^RUT')
dict_Ts = dict(sp500=100, agg=50, gold=50, oil=50, GBPUSD=50, tech_sec=50, energy_sec=50, russell_2k=50)
p=5
# overall time horizon we want to consider
t0 = '1920-1-1'
t1 = date.today()

# compute VaR, ES for all securities
for i, key in enumerate(securities):
    # length of time period for computing VaR, ES
    T = dict_Ts[key]
    rolling_T = T
    # get historical data, compute VaR, ES
    securities[key] = Security(securities[key]) # input ticker
    securities[key].set_df_hist(t0=t0, t1=t1)
    securities[key].set_df_pct_change()
    securities[key].set_df_risk(T=dict_Ts[key], p=p, rolling_T=rolling_T)

In [3]:
""" by recession """
columns=['asset', 'VaR', 'ES', 'VaR/ES', 'recession','sample size', 't-statistic', 'p-value']
output = {}
for i, key in enumerate(securities):
    securities[key].label_recession_df(peaks=peaks, troughs=troughs, df='risk') # add recession (True/False) column
    df = securities[key].get_df_risk()
    
    r = df[df['recession'] == True][['VaR', 'ES']] # all VaR and ES in recession periods
    r_VaR_to_ESs = (r['VaR']/r['ES']).values # all VaR to ES ratios in recession periods
    nr = df[df['recession'] == False][['VaR', 'ES']]
    nr_VaR_to_ESs = (nr['VaR']/nr['ES']).values

    ttest_res = ttest_ind(a=r_VaR_to_ESs, b=nr_VaR_to_ESs, equal_var=False)

    output[i*2] = [key, np.mean(nr['VaR'].values), np.mean(nr['ES'].values), np.mean(nr_VaR_to_ESs), 'F', len(nr), '', '']
    output[i*2+1] = [key, np.mean(r['VaR'].values), np.mean(r['ES'].values), np.mean(r_VaR_to_ESs), 'T', len(r), ttest_res[0], ttest_res[1]]

df_tally = pd.DataFrame.from_dict(output, orient='index', columns=columns)
df_tally.to_csv(f'tally_by_recession_[{t0} - {t1}).csv', index=False)

In [4]:
""" by market vol levels """
columns=['asset', 'VaR', 'ES', 'VaR/ES', 'volatility level','sample size', 't-statistic', 'p-value']
output = {}
for i, key in enumerate(securities):

    securities[key].label_vol_level_df(T=dict_Ts[key]) # add vol level column

    df = securities[key].get_df_risk()
    
    low = df[df['vol level'] == 'low'][['VaR', 'ES']] 
    low_VaR_to_ESs = (low['VaR']/low['ES']).values 
    high = df[df['vol level'] == 'high'][['VaR', 'ES']]
    high_VaR_to_ESs = (high['VaR']/high['ES']).values

    ttest_res = ttest_ind(a=low_VaR_to_ESs, b=high_VaR_to_ESs, equal_var=False)

    output[i*2] = [key, np.mean(low['VaR'].values), np.mean(low['ES'].values), np.mean(low_VaR_to_ESs), 'Low', len(low), '', '']
    output[i*2+1] = [key, np.mean(high['VaR'].values), np.mean(high['ES'].values), np.mean(high_VaR_to_ESs), 'High', len(high), ttest_res[0], ttest_res[1]]

df_tally = pd.DataFrame.from_dict(output, orient='index', columns=columns)
df_tally.to_csv(f'tally_by_vol_[{t0} - {t1}).csv', index=False)

In [4]:
df_tally

Unnamed: 0,asset,VaR,ES,VaR/ES,volatility level,sample size,t-statistic,p-value
0,sp500,0.009551,0.013698,0.708926,Low,116,,
1,sp500,0.021823,0.031304,0.714947,High,116,-0.385158,0.700479
2,agg,0.002542,0.003223,0.797191,Low,42,,
3,agg,0.00488,0.006913,0.747781,High,42,2.03401,0.0452974
4,gold,0.010506,0.013981,0.758077,Low,62,,
5,gold,0.018412,0.025874,0.723223,High,62,1.62246,0.107561
6,oil,0.023141,0.030137,0.774727,Low,62,,
7,oil,0.043456,0.084452,0.753011,High,62,0.978506,0.330005
8,GBPUSD,0.006709,0.008522,0.793667,Low,43,,
9,GBPUSD,0.010873,0.014144,0.79172,High,43,0.0861109,0.931587


Taking non-overlapping time periods leaves us with fewer data points but it's pretty much in keeping with the assumption that
two samples are independent when doing a t-test. However, we are left with small sample size (<30), so that the mean of a sample should follow a normal distribution may not hold. 

Taking over-lapping time periods significant violated t-test assumption due to strong auto-correlations. Currently, historical prices for all securities except sp500 are only available from 2000 to today, on Yahoo finance. For some securities, such as GBPUSD, more data seems to be available elsewhere, which can be 'manually' fed into the program. 

In [None]:
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#     display(df_tally)