In [2]:
import pandas as pd
import numpy as np
import mibian
import math
from scipy.stats import norm
import matplotlib.pyplot as plt
import statsmodels.api as sm 
import matplotlib.pyplot as plt
from datetime import datetime
import datetime

### Training Model

In [3]:
def pre_train_process(df,startIdx,windowLength,isNormalized):
    tmp = df.copy()
    tmp.reset_index(inplace=True)
    train_start_index = startIdx
    train_window_length = windowLength
    train_end_index = train_start_index+train_window_length-1
    tmp = tmp.iloc[train_start_index:train_end_index]
    ## normalize the option price change and future price change
    if(isNormalized==False):
        tmp['opt_price_chg'] = tmp['opt_price'].diff()
        tmp['fut_price_chg'] = tmp['fut_price'].diff()
        tmp = tmp[tmp['fut_price_chg']!=0]
    else:
        tmp['opt_price_chg'] = tmp['opt_price'] / tmp['opt_price'].shift()
        tmp['fut_price_chg'] = tmp['fut_price'] / tmp['fut_price'].shift()
        tmp = tmp[tmp['fut_price_chg']!=1]
    tmp = tmp[['ts','opt_price_chg','fut_price_chg','delta','vega','days_to_expiry','fut_price']]
    tmp.reset_index(drop=True,inplace=True)
    train_set = pd.DataFrame()
    coeff = tmp['vega']*tmp['fut_price_chg']/tmp['fut_price']/np.sqrt(tmp['days_to_expiry']/252)
    train_set['y'] = (tmp['opt_price_chg'] - tmp['delta']*tmp['fut_price_chg'])
    train_set['x1'] = coeff*tmp['delta']
    train_set['x2'] = coeff*tmp['delta']**2
    train_set.drop(index=0,inplace=True)
    return train_set

In [4]:
def train_model(train_set):
    x = sm.add_constant(train_set.iloc[:,1:])
    y = train_set['y']
    model = sm.OLS(y,x)
    result = model.fit()
    return result

In [5]:
def predict_MV_delta(df,startIdx,windowLength,result):
    tmp = df.copy()
    test_start_index = startIdx
    test_window_length = windowLength
    test_end_index = test_start_index + test_window_length-1
    tmp = tmp.iloc[test_start_index:test_end_index]
    tmp = tmp[['ts','delta','vega','days_to_expiry','fut_price','IV']]
    b0 = result.params[0]
    b1 = result.params[1]
    b2 = result.params[2]
    tmp['y_hat'] = b0+b1*tmp['delta']+b2*tmp['delta']**2
    tmp['MV_delta'] = tmp['delta']+tmp['y_hat']*tmp['vega']/tmp['fut_price']/np.sqrt(tmp['days_to_expiry']/252)
    tmp.reset_index(inplace=True)
    return tmp

In [6]:
def generate_regression_model(df,Date,trainPeriod,isNormalized):
    contract = df.copy()
    contract.reset_index(inplace=True,drop=True)
    idx1 = contract.index[contract['ts']<=Date.strftime("%Y-%m-%d, %H:%M:%S")].tolist()[-1]
    startDate = Date - datetime.timedelta(trainPeriod)
    idx0 = Calls.index[Calls['ts']<=startDate.strftime("%Y-%m-%d, %H:%M:%S")].tolist()[-1]
    if idx1==idx0:
        return None
    train_set = pre_train_process(contract,idx0,idx1-idx0,isNormalized)
    result = train_model(train_set)
    return result

In [7]:
def generate_MV_delta3(allcontracts,contractName,Date,result):
    contract = allcontracts[allcontracts['opt_symbol']==contractName].copy()
    contract.reset_index(inplace=True,drop=True)
    idx = contract.index[contract['ts']<=Date].tolist()[-1]
    predict_set = predict_MV_delta(contract,idx,2,result)
    predict_set['R_square'] = result.rsquared
    return predict_set

In [8]:
def generate_deltas2(allcontracts,contractName,startDate,nDays,result):
    date = startDate
    res = pd.DataFrame(columns=['index','ts','delta','vega','days_to_expiry','fut_price','y_hat','MV_delta'])
    for i in range(0,nDays):
        predicted = generate_MV_delta3(allcontracts,contractName,date.strftime("%Y-%m-%d, %H:%M:%S"),result)
        res = res.append(predicted)
        date = date+datetime.timedelta(1)
    return res

In [9]:
def calculate_PnL(df):
    PnLs = pd.DataFrame(columns=['Delta_PnL','MV_delta_PnL'])
    for index, row in df.iterrows():
        if(index>0):
            delta_PnL = (row['fut_price'] - df.iloc[index-1]['fut_price'])*df.iloc[index-1]['delta']
            MV_delta_PnL = (row['fut_price'] - df.iloc[index-1]['fut_price'])*df.iloc[index-1]['MV_delta']
            df_tmp = pd.DataFrame({'Delta_PnL':[delta_PnL],'MV_delta_PnL':[MV_delta_PnL]})
            PnLs = PnLs.append(df_tmp)
    return PnLs

In [10]:
def SingleNumericalExperiment2(df,contractName,testDate,
                              holdingPeriod,trainPeriod,isNormalized):
    startDate = testDate
    result = generate_regression_model(df = df,Date = startDate,
                                   trainPeriod=trainPeriod,isNormalized=isNormalized)
    if result is None: return None
    contract = df[df['opt_symbol']==contractName]
    deltas = generate_deltas2(allcontracts=df,contractName=contractName,
                          startDate=startDate,nDays=holdingPeriod+1,
                              result=result)
    deltas = deltas.reset_index()
    pnls = calculate_PnL(deltas)
    res = {'contract name':contractName,'start date':testDate.strftime("%Y-%m-%d, %H:%M:%S"),
           'delta':deltas.iloc[0]['delta'],'MV delta':deltas.iloc[0]['MV_delta'],
           'days_to_expiry':deltas.iloc[0]['days_to_expiry'],'iv':deltas.iloc[0]['IV'],
           'BS delta PnL':pnls['Delta_PnL'].sum(),'MV delta PnL':pnls['MV_delta_PnL'].sum(),
           'holding period':holdingPeriod,'r square':result.rsquared}
    return res

In [11]:
def BatchNumericalExperiment2(df,contractName,startDate,endDate,holdingPeriod,
                             trainPeriod=1,isNormalized=True):
    currentDate = startDate
    result = pd.DataFrame(columns=['contract name','start date','delta','MV delta',
                                   'days_to_expiry','iv',
                                   'BS delta PnL','MV delta PnL','holding period',
                                  'r square'])
    while(currentDate<endDate):
        tmp_res = SingleNumericalExperiment2(df=df,contractName=contractName,
                                             testDate=currentDate,
                                             holdingPeriod=holdingPeriod,
                                             trainPeriod=trainPeriod,
                                             isNormalized=isNormalized)
        if tmp_res is None:
            currentDate+=datetime.timedelta(1)
            continue
        result = result.append(tmp_res,ignore_index=True)
        currentDate+=datetime.timedelta(1)
    return result

In [130]:
contractName = 'NIFTY22JAN17450CE'
startDate = datetime.datetime(2022,1,1,15,25)
endDate = datetime.datetime(2022,1,20,15,25)
tmp_res = BatchNumericalExperiment2(Calls,contractName,startDate,endDate,1,1,True)
tmp_res

Unnamed: 0,contract name,start date,delta,MV delta,days_to_expiry,iv,BS delta PnL,MV delta PnL,holding period,r square
0,NIFTY22JAN17450CE,"2022-01-03, 15:25:00",0.642103,27.144925,24.001042,0.129904,105.080209,4442.266996,1,0.225913
1,NIFTY22JAN17450CE,"2022-01-04, 15:25:00",0.733483,24.973931,23.001042,0.125969,66.160157,2252.648584,1,0.146364
2,NIFTY22JAN17450CE,"2022-01-05, 15:25:00",0.750206,30.832068,22.001042,0.145166,-113.431076,-4661.808646,1,0.154852
3,NIFTY22JAN17450CE,"2022-01-06, 15:25:00",0.688125,26.098533,21.001042,0.144385,39.360744,1492.83607,1,0.195785
4,NIFTY22JAN17450CE,"2022-01-07, 15:25:00",0.722373,20.271645,20.001042,0.141923,0.0,0.0,1,0.118766
5,NIFTY22JAN17450CE,"2022-01-10, 15:25:00",0.824863,38.518525,17.001042,0.143329,24.828376,1159.4076,1,0.116677
6,NIFTY22JAN17450CE,"2022-01-11, 15:25:00",0.853086,41.774753,16.001042,0.137399,121.0956,5929.926144,1,0.16222
7,NIFTY22JAN17450CE,"2022-01-12, 15:25:00",0.849328,27.108632,15.001042,0.176432,53.167935,1697.000375,1,0.105315
8,NIFTY22JAN17450CE,"2022-01-13, 15:25:00",0.910208,50.349024,14.001042,0.150546,-6.325944,-349.925714,1,0.162218
9,NIFTY22JAN17450CE,"2022-01-14, 15:25:00",0.929887,73.621876,13.001042,0.14066,0.0,0.0,1,0.157874


In [12]:
Calls = pd.read_csv('nifty_311221_190122_1min_calls_py_vollib.csv')
contractList = Calls['opt_symbol'].unique()

In [14]:
%%time
contractList1 = contractList[0:70]
contractList2 = contractList[71:]
#result = pd.DataFrame(columns=['contract name','start date','delta','days_to_expiry','iv','BS delta PnL','MV delta PnL','holding period','r square'])
result = pd.DataFrame(columns=['contract name','start date','delta','MV delta','days_to_expiry','iv',
                                       'BS delta PnL','MV delta PnL','holding period','r square'])
startDate = datetime.datetime(2022,1,1,15,25)
endDate = datetime.datetime(2022,1,20,15,25)
for contractName in contractList1:
    #print(contractName)
    contract_res = BatchNumericalExperiment2(Calls,contractName,startDate,endDate,1,1,True)
    result = result.append(contract_res,ignore_index=True)
result.to_csv('generalize regression model result.csv')

Wall time: 2min 26s
