In [1]:
from itertools import product

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from download_data import get_price, get_mpd, get_fx, get_commodity, get_bond, get_inflation
from data_analysis.explore_data import (
    calc_rolling_stat,
    merge_prob_stat,
    rename_stat_df,
    eval_large_change_prob,
    plot_change_scatter,
    prep_regression_stat,
    compare_regressions,
    compare_regression_eval
)

In [2]:
mpd = get_mpd()
start_date, end_date = mpd["idt"].min(), mpd["idt"].max()

equity = get_price(use_cache=False, tickers=["BAC", "C", "IYR", "SPY"], 
                  start_date=start_date, 
                  end_date=end_date, 
                  )

fx = get_fx(use_cache=True, tickers=["GBP", "EUR", "JPY"],
            start_date=start_date,
            end_date=end_date,
            )

commodity = get_commodity(use_cache=True, 
                          start_date=start_date, 
                          end_date=end_date,)

inflation = get_inflation()
bond = get_bond(use_cache=True)

In [3]:
mpd["market"].unique()

array(['bac', 'citi', 'corn', 'euro', 'gold', 'infl1y', 'infl2y',
       'infl5y', 'iyr', 'LR3y3m', 'LR5y3m', 'oil', 'pound', 'silver',
       'soybns', 'sp12m', 'sp6m', 'tr10yr', 'tr5yr', 'wheat', 'yen'],
      dtype=object)

In [4]:
stat = calc_rolling_stat(
    pd.concat([fx, equity, commodity, bond, inflation], axis=1), 
    month=6
)
stat

Unnamed: 0,Unnamed: 1,ret,vol,ret_max,ret_min
10 YR,2003-01-02,0.005420,0.219611,0.218786,-0.197656
10YR_Price,2003-01-02,-0.000904,1.993647,0.029876,-0.040575
5 YR,2003-01-02,0.009852,0.224761,0.434735,-0.352441
5YR_Price,2003-01-02,-0.000477,1.115278,0.014309,-0.026163
7 YR,2003-01-02,-0.032790,0.219328,0.239779,-0.281251
...,...,...,...,...,...
corn2,2023-09-11,-0.074382,4.636325,0.013710,-0.074604
gold,2023-09-11,0.055265,70.753250,0.084631,-0.050744
silver,2023-09-11,-0.010733,0.954580,0.096936,-0.103269
soybean,2023-09-11,-0.088117,31.870263,0.030203,-0.088117


In [5]:
market_list = ["bac", "citi", "iyr", "sp6m", "sp12m", "euro", "pound", "yen"] + ['silver','corn', 'soybns','gold','wheat'] + ["infl1y", "infl2y", "infl5y", "tr5yr", "tr10yr"] 
ticker_list = ["BAC", "C", "IYR", "SPY", "SPY", "GBP", "EUR", "JPY"] + ['silver','corn1', 'soybean','gold','wheat'] + ['EXPINF1YR', 'EXPINF2YR', 'EXPINF5YR', '5YR_Price', '10YR_Price']
stat_rename = rename_stat_df(stat, dict(zip(market_list, ticker_list)))

In [6]:
df = merge_prob_stat(stat_rename, mpd)

In [7]:
import numpy as np
from scipy.stats import chi2

def kupiec_pof_test(total_observations, exceedances, confidence_level: float = 0.99):
    """
    Calculate the Kupiec POF test statistic.

    Parameters:
    total_observations (int): Total number of observations in the dataset.
    exceedances (int): Number of observed exceedances (VaR failures).
    confidence_level (float): The confidence level used for the VaR calculation (e.g., 0.99 for 99%).

    Returns:
    float: The Kupiec POF test statistic.
    float: The p-value from the chi-squared distribution.
    """
    # Probability of exceedance
    p = 1 - confidence_level

    # Expected number of exceedances
    expected_exceedances = total_observations * p

    # Actual proportion of exceedances
    proportion_exceedances = exceedances / total_observations

    # Kupiec test statistic
    LR_POF = -2 * np.log(((1 - p) ** (total_observations - exceedances)) * (p ** exceedances)) + 2 * np.log(((1 - proportion_exceedances) ** (total_observations - exceedances)) * (proportion_exceedances ** exceedances))

    # Calculate the p-value
    p_value = 1 - chi2.cdf(LR_POF, 1)

    return LR_POF, p_value

# Example usage
total_observations = 250  # Total number of days in the dataset
exceedances = 5  # Number of days when the loss exceeded the VaR
confidence_level = 0.99  # 99% confidence level

LR_POF, p_value = kupiec_pof_test(total_observations, exceedances, confidence_level)

print(f"Kupiec POF Test Statistic: {LR_POF}")
print(f"P-Value: {p_value}")

Kupiec POF Test Statistic: 1.956809788230622
P-Value: 0.1618549171960425


In [8]:
def calc_hit_rate(df: pd.DataFrame, idx: list[str] = market_list):
    p_col = ["p10", "p50", "p90"]
    for c in p_col:
        df.loc[:, f"{c}_hit"] = df[c] < df["ret"]
        
    hit_ratio = {}
    k_test = {}
    k_pvalue = {}
    for name, g in df.filter(regex="_hit$").groupby("market"):
        hit_ratio[name] = g.mean()
        k_test[name], k_pvalue[name] = kupiec_pof_test(len(g), g.sum())
    
    
    df = pd.concat([pd.DataFrame(hit_ratio), 
                    pd.DataFrame(k_test), 
                    pd.DataFrame(k_pvalue)], keys=["hit_ratio", "kupiec_stat", "kupiec_pvalue"]).T
    return df.reindex(idx)

calc_hit_rate(df)

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0_level_0,hit_ratio,hit_ratio,hit_ratio,kupiec_stat,kupiec_stat,kupiec_stat,kupiec_pvalue,kupiec_pvalue,kupiec_pvalue
Unnamed: 0_level_1,p10_hit,p50_hit,p90_hit,p10_hit,p50_hit,p90_hit,0,1,2
bac,0.877076,0.586379,0.209302,inf,inf,552.38168,0.0,0.0,0.0
citi,0.901993,0.528239,0.162791,inf,inf,377.844635,0.0,0.0,0.0
iyr,0.941924,0.627949,0.161525,inf,inf,341.714277,0.0,0.0,0.0
sp6m,0.977712,0.619614,0.020802,inf,inf,6.049118,0.0,0.0,0.01391325
sp12m,0.991085,0.566122,0.0,inf,inf,13.527752,0.0,0.0,0.0002350614
euro,0.903169,0.503521,0.213028,inf,inf,535.054949,0.0,0.0,0.0
pound,0.943662,0.552817,0.160211,inf,inf,347.866225,0.0,0.0,0.0
yen,0.917254,0.577465,0.198944,inf,inf,483.124634,0.0,0.0,0.0
silver,0.957935,0.443595,0.072658,inf,inf,87.300521,0.0,0.0,0.0
corn,0.967619,0.481905,0.04381,inf,inf,33.067444,0.0,0.0,8.90168e-09


In [11]:
calc_hit_rate(df).to_csv("unconditional_test.csv")

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [10]:
df.index.get_level_values("idt").unique()

DatetimeIndex(['2010-01-15', '2010-01-29', '2010-02-12', '2010-02-26',
               '2010-03-15', '2010-03-31', '2010-04-15', '2010-04-30',
               '2010-05-14', '2010-05-28',
               ...
               '2009-07-15', '2009-07-31', '2009-08-14', '2009-08-31',
               '2009-09-15', '2009-09-30', '2009-10-30', '2009-11-13',
               '2009-11-30', '2009-12-15'],
              dtype='datetime64[ns]', name='idt', length=877, freq=None)

In [12]:
calc_hit_rate(df)

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0_level_0,hit_ratio,hit_ratio,hit_ratio,kupiec_stat,kupiec_stat,kupiec_stat,kupiec_pvalue,kupiec_pvalue,kupiec_pvalue
Unnamed: 0_level_1,p10_hit,p50_hit,p90_hit,p10_hit,p50_hit,p90_hit,0,1,2
bac,0.877076,0.586379,0.209302,inf,inf,552.38168,0.0,0.0,0.0
citi,0.901993,0.528239,0.162791,inf,inf,377.844635,0.0,0.0,0.0
iyr,0.941924,0.627949,0.161525,inf,inf,341.714277,0.0,0.0,0.0
sp6m,0.977712,0.619614,0.020802,inf,inf,6.049118,0.0,0.0,0.01391325
sp12m,0.991085,0.566122,0.0,inf,inf,13.527752,0.0,0.0,0.0002350614
euro,0.903169,0.503521,0.213028,inf,inf,535.054949,0.0,0.0,0.0
pound,0.943662,0.552817,0.160211,inf,inf,347.866225,0.0,0.0,0.0
yen,0.917254,0.577465,0.198944,inf,inf,483.124634,0.0,0.0,0.0
silver,0.957935,0.443595,0.072658,inf,inf,87.300521,0.0,0.0,0.0
corn,0.967619,0.481905,0.04381,inf,inf,33.067444,0.0,0.0,8.90168e-09
