In [1]:
from statsmodels.sandbox import distributions
import pandas as pd
import numpy  as np
from utils import *
from math import sqrt

def create_bins(max_val, min_val, n_bins=100):
    """ Create bins for the given data and return the bin edges. """
    bins = np.linspace(min_val, max_val, n_bins + 1)  # +1 for inclusive edge
    return bins

def bin_data(data, bins):
    """ Bin the data and return the count in each bin. """
    bin_counts = np.histogram(data, bins)[0]
    return bin_counts

def find_bin_count(input_value, bins, bin_counts):
    """ Find the count of the bin that the input value belongs to. """
    bin_index = np.digitize(input_value, bins) - 1  # -1 because np.digitize starts from 1
    return bin_counts[bin_index]

def get_ret_vol(df):
    df["return"] = (df["Adj Close"].shift(periods=-1)/df["Adj Close"]).apply(np.log)
    df["return_t"] = (df["Adj Close"].shift(periods=-21*6)/df["Adj Close"]).apply(np.log)
    df["vol_20d"] = df["return"].rolling(window=20).std()*sqrt(252)
    df["vol_30d"] = df["return"].rolling(window=30).std()*sqrt(252)
    df["vol_1y"] = df["return"].rolling(window=252).std()*sqrt(252)
    df["vol_3y"] = df["return"].rolling(window=756).std()*sqrt(252)
    df = df.dropna()
    return df

  return jit(*jit_args, **jit_kwargs)(fun)


In [3]:
r = pd.read_csv("data/risk_free.csv", index_col=0)
r.index = pd.to_datetime(r.index)
r["DTB3"] = r["DTB3"].replace(".", np.nan)
r["DTB3"] = r["DTB3"].ffill()
r["DTB3"] = pd.to_numeric(r["DTB3"])


data = pd.read_csv('mpd_stats.csv')
data['maturity_target'] = data['maturity_target'].ffill()
num_sample = 100000

hist_vols = ["vol_20d", "vol_30d", "vol_1y", "vol_3y"]
sets = data.groupby(['market']).indices


result = {}
for i in sets:
    if i in ["LR3y3m", "LR5y3m", "infl1y", "infl2y", "infl5y"]:
        continue

    underlying = pd.read_csv(f"data/{i}.csv", index_col=0)
    underlying.index = pd.to_datetime(underlying.index)
    underlying = get_ret_vol(underlying)
    
    underlying = pd.merge(underlying, r, how='left', left_index=True, right_index=True)
    underlying.rename(columns={"DTB3":"risk_free_rate"}, inplace=True)

    
    maturity = data.loc[data['market']==i].groupby(['maturity_target']).indices
    for m in maturity:
        rnp = pd.read_csv(f"data/{i}_{int(m)}.csv", index_col=0)
        rnp.rename(columns={"idt":"Date"}, inplace=True)
        rnp["Date"] = pd.to_datetime(rnp["Date"])
        rnp.set_index("Date", inplace=True)
        rnp.head()


        count = {"vol_20d":0, "vol_30d":0, "vol_1y":0, "vol_3y":0}
        for j in rnp.index:
            rnp_j = rnp.loc[j]
            mean = rnp_j["mu"]
            std = rnp_j["sd"]
            skew = rnp_j["skew"]
            kurt = rnp_j["kurt"]


            try:
                coeff = fit_fleishman_from_sk(skew, kurt)
                sim = (generate_fleishman(-coeff[1],*coeff,N=num_sample))*std+mean

                underlying_j = underlying.loc[j]
                s0 = underlying_j["Close"]
                alpha = underlying_j["risk_free_rate"]
                for vol in hist_vols:
                    sigma = underlying_j[vol]
                    true_retun = underlying_j["return_t"]
                    T = m/12
                    stratified = True
                    gbm = SimuStockPath(s0, alpha, sigma, T, num_sample, stratified)
                    gbm = np.log(gbm/s0)

                    
                    bins = create_bins(max(max(sim), max(gbm)), min(min(sim), min(gbm)))

                    rnp_counts = bin_data(sim, bins)
                    gbm_counts = bin_data(gbm, bins)

                    count[vol] += find_bin_count(true_retun, bins, rnp_counts)-find_bin_count(true_retun, bins, gbm_counts)
            except:
                print(f"Error at {i}_{m}_{j}_{vol}")
                print(f"{i} contains data: {j in underlying.index}")
                print(f"Simulation success: {coeff != None}")
        for vol in hist_vols:
            count[vol] = count[vol]/num_sample
        result[f"{i}_{int(m)}"] = count

Error at bac_3.0_2018-11-22 00:00:00_vol_3y
bac contains data: False
Simulation success: True
Error at bac_3.0_2020-03-18 00:00:00_vol_3y
bac contains data: True
Simulation success: False
Error at bac_3.0_2023-08-09 00:00:00_vol_3y
bac contains data: False
Simulation success: True
Error at bac_3.0_2023-08-16 00:00:00_vol_3y
bac contains data: False
Simulation success: True
Error at bac_3.0_2023-08-23 00:00:00_vol_3y
bac contains data: False
Simulation success: True
Error at bac_3.0_2023-08-30 00:00:00_vol_3y
bac contains data: False
Simulation success: True
Error at bac_3.0_2023-09-06 00:00:00_vol_3y
bac contains data: False
Simulation success: True
Error at bac_3.0_2023-09-07 00:00:00_vol_3y
bac contains data: False
Simulation success: True
Error at bac_3.0_2023-09-13 00:00:00_vol_3y
bac contains data: False
Simulation success: True
Error at bac_3.0_2023-09-20 00:00:00_vol_3y
bac contains data: False
Simulation success: True
Error at bac_3.0_2023-09-27 00:00:00_vol_3y
bac contains dat

In [4]:
result

{'bac_3': {'vol_20d': 7.11975,
  'vol_30d': 7.28233,
  'vol_1y': 7.52585,
  'vol_3y': 9.09717},
 'citi_3': {'vol_20d': 7.63537,
  'vol_30d': 7.8795,
  'vol_1y': 8.555,
  'vol_3y': 10.08878},
 'corn_6': {'vol_20d': 10.06134,
  'vol_30d': 10.21221,
  'vol_1y': 10.09689,
  'vol_3y': 10.93411},
 'euro_3': {'vol_20d': 16.22853,
  'vol_30d': 16.15229,
  'vol_1y': 16.644,
  'vol_3y': 16.56084},
 'gold_6': {'vol_20d': 14.46521,
  'vol_30d': 14.63813,
  'vol_1y': 15.06105,
  'vol_3y': 15.36124},
 'iyr_3': {'vol_20d': 12.05981,
  'vol_30d': 12.14495,
  'vol_1y': 12.29549,
  'vol_3y': 12.9595},
 'oil_6': {'vol_20d': 5.84923,
  'vol_30d': 5.69202,
  'vol_1y': 5.49247,
  'vol_3y': 5.60135},
 'pound_3': {'vol_20d': 15.23059,
  'vol_30d': 15.25123,
  'vol_1y': 15.2959,
  'vol_3y': 15.63555},
 'silver_6': {'vol_20d': 12.54711,
  'vol_30d': 12.6388,
  'vol_1y': 13.00386,
  'vol_3y': 13.05192},
 'soybns_6': {'vol_20d': 14.65328,
  'vol_30d': 14.68087,
  'vol_1y': 14.68139,
  'vol_3y': 14.89508},
 'sp12m

In [5]:
df = pd.DataFrame.from_dict(result)
df

Unnamed: 0,bac_3,citi_3,corn_6,euro_3,gold_6,iyr_3,oil_6,pound_3,silver_6,soybns_6,sp12m_12,sp6m_6,tr10yr_3,tr10yr_6,tr5yr_3,wheat_6,yen_3
vol_20d,7.11975,7.63537,10.06134,16.22853,14.46521,12.05981,5.84923,15.23059,12.54711,14.65328,27.11729,24.81248,4.5715,-0.03311,7.01815,9.71357,13.2247
vol_30d,7.28233,7.8795,10.21221,16.15229,14.63813,12.14495,5.69202,15.25123,12.6388,14.68087,27.66513,25.0402,4.85035,-0.02986,8.48942,9.75437,13.48564
vol_1y,7.52585,8.555,10.09689,16.644,15.06105,12.29549,5.49247,15.2959,13.00386,14.68139,30.13512,26.98827,4.95281,-0.04049,5.99997,9.42112,13.81908
vol_3y,9.09717,10.08878,10.93411,16.56084,15.36124,12.9595,5.60135,15.63555,13.05192,14.89508,31.49978,27.60585,6.39953,-0.01575,10.40611,9.7597,14.04458


In [6]:
df.to_csv("Result.csv")