In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pysabr import Hagan2002LognormalSABR
from pysabr import hagan_2002_lognormal_sabr as sabr
import xgboost as xgb
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [None]:
# prepare data for ir dataframe modeling

In [2]:
def create_features(df):
    df=df.copy()
    df['rhol1']=df.shift(1)['rho']
    df['volvoll1']=df.shift(1)['volvol']
    df['Forward Swap Ratel1']=df.shift(1)['Forward Swap Rate']
    df['rhol2']=df.shift(2)['rho']
    df['volvoll2']=df.shift(2)['volvol']
    df['Forward Swap Ratel2']=df.shift(2)['Forward Swap Rate']
    df['rholrm2']=df['rho'].shift(1).rolling(2).mean()
    df['volvollrm2']=df['volvol'].shift(1).rolling(2).mean()
    df['Forward Swap Ratelrm2']=df['Forward Swap Rate'].shift(1).rolling(2).mean()
    df['hour'] = df.index.hour
    df['dayofweek'] = df.index.dayofweek
    df['quarter'] = df.index.quarter
    df['month'] = df.index.month
    df['year'] = df.index.year
    df['dayofyear'] = df.index.dayofyear
    df['dayofmonth'] = df.index.day
    df['weekofyear'] = df.index.isocalendar().week
    return df
def create_featuresa(df):
    df=df.copy()
    df['alphal1']=df.shift(1)['alpha']
    df['Forward Swap Ratel1']=df.shift(1)['Forward Swap Rate']
    df['alphal2']=df.shift(2)['alpha']
    df['Forward Swap Ratel2']=df.shift(2)['Forward Swap Rate']
    df['alphalrm2']=df['alpha'].shift(1).rolling(2).mean()
    df['Forward Swap Ratelrm2']=df['Forward Swap Rate'].shift(1).rolling(2).mean()
    df['hour'] = df.index.hour
    df['dayofweek'] = df.index.dayofweek
    df['quarter'] = df.index.quarter
    df['month'] = df.index.month
    df['year'] = df.index.year
    df['dayofyear'] = df.index.dayofyear
    df['dayofmonth'] = df.index.day
    df['weekofyear'] = df.index.isocalendar().week
    df['alphal3']=df.shift(3)['alpha']
    return df

In [3]:
swaption=pd.read_csv("sabr.csv")
swaption['Date']=pd.to_datetime(swaption['Date'],format='%Y-%m-%d')
swaption=swaption.drop('Unnamed: 0',axis=1)
swaption_u=swaption.copy()
swaption_u=swaption_u.drop(['error','abs(e_size)>0.005'],axis=1)
swaption_u.set_index('Date',inplace=True)

In [4]:
ir=pd.read_csv('../data/trade-price-ir-vegas.csv')
irdate=ir['Value Date'].drop_duplicates().reset_index(drop=True)
unique_combos=pd.read_csv('unique_combo.csv')
unique_combos=unique_combos.drop('Unnamed: 0',axis=1)

In [5]:
datasets1 = {name: group for name, group in swaption_u.groupby(['Expiry', 'Tenor'])}
datasets=datasets1.copy()

In [6]:
#extract data useful for ir datasets
for i in range(len(unique_combos)):
    a,b=unique_combos.loc[i,:]
    ddf=datasets1[a,b].drop_duplicates('Forward Swap Rate')
    filtered_ddf = ddf[ddf.index.isin(irdate)]
    # Get the index of the first row that satisfies the condition
    first_filtered_index = filtered_ddf.index[0]
    ddf_index = ddf.index.to_list()
    before_indices = [idx for idx in ddf_index if idx < first_filtered_index and idx not in irdate][-3:]
    combined_indices = before_indices + list(filtered_ddf.index)
    reindexed_ddf = ddf.reindex(combined_indices)
    reindexed_ddf=create_features(reindexed_ddf)
    datasets[a,b]=create_featuresa(reindexed_ddf)

In [21]:
for i in range(len(unique_combos)):
    a,b=unique_combos.loc[i,:]
    df=datasets[a,b]
    x_testa=df[['Forward Swap Rate','alphal1','Forward Swap Ratel1','alphal2','Forward Swap Ratel2','alphalrm2','Forward Swap Ratelrm2','hour','dayofweek','quarter','month','year','dayofyear','dayofmonth','weekofyear','alphal3']]
    reg_new = xgb.XGBRegressor()
    reg_new.load_model(f'model/{a}_{b}_alphamodel.json')
    df['pre']=reg_new.predict(x_testa)
    x_test=df[['Forward Swap Rate','rhol1','volvoll1','Forward Swap Ratel1','rhol2','volvoll2','Forward Swap Ratel2','rholrm2','volvollrm2','Forward Swap Ratelrm2','hour','dayofweek','quarter','month','year','dayofyear','dayofmonth','weekofyear']]
    reg_new = xgb.XGBRegressor()
    reg_new.load_model(f'model/{a}_{b}_rhovolmodel.json')
    df['prer']=reg_new.predict(x_test)[:,0]
    df['prev']=reg_new.predict(x_test)[:,1]
    df['vol'] = 100*df.apply(lambda row: sabr.lognormal_vol(row['Forward Swap Rate'],row['Forward Swap Rate'],a,row['pre'],0.5, row['prer'],row['prev']), axis=1)
    datasets[a,b]=df

In [24]:
for name, group in datasets.items():
    filename = f"model/{name[0]}_{name[1]}.csv"
    group.to_csv(filename, index=True)
