In [1]:
# Required Python packages
from datetime import date, datetime, timedelta
import time
import calendar
import pandas as pd
import numpy as np
import itertools
import copy
from numpy.linalg import multi_dot
from scipy.stats import norm
from scipy.stats import bernoulli
from scipy.optimize import fmin_slsqp as min

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm
from statsmodels.api import OLS

from sklearn.linear_model import LassoCV
from yellowbrick.datasets import load_concrete
from yellowbrick.regressor import AlphaSelection

from scipy.interpolate import CubicSpline

import matplotlib.pyplot as plt
import math
import pickle

#Import Date opertative user defined functions
from ipynb.fs.full.user_defined_vik_functions import get_all_monthly_option_expiries, \
                                                     find_last_thurs_date_of_month, \
                                                     prev_workday_if_holiday, find_wkly_expries,\
                                                     date_of_prev_thurs

# Import dataframe naming functions 
from ipynb.fs.full.user_defined_vik_functions import get_mthly_df_name_from_expiry

#Import data loading functions
from ipynb.fs.full.user_defined_vik_functions import load_all_mthly_data

#import traded options parameters and info
from ipynb.fs.full.user_defined_vik_functions import generate_weekly_strikes, generate_mthly_strikes

#Import pricing functions
from ipynb.fs.full.user_defined_vik_functions import BSM_call_vec_with_div, BSM_put_vec_with_div

#Import implied volatility processing functions
from ipynb.fs.full.user_defined_vik_functions import generate_weekly_iv, gen_interpolated_iv

#Import Risk-free interest rate generation functions
from ipynb.fs.full.user_defined_vik_functions import generate_weekly_ir

#Import Stochatic process simulation functions
from ipynb.fs.full.user_defined_vik_functions import gbm1W_simulation, generate_covariance_from_correlation

# Import risk free interest rate function
from ipynb.fs.full.user_defined_vik_functions import get_risk_free_rate_from_exact_date


In [2]:
# Stock Index of Interest
stock_ident = "BANKNIFTY"
# stock_ident = "NIFTY"

#Static hedging performed at different moneyness regions 
#i.e. moneyness is used to select the option with nearest moneyness match
# ATM - At the Money, ITM - In the money, OTM - Out of the Money
prod_moneyness = "ATM"

#Product type to hedge: either "CE" or "PE"
prod_type = "CE"
# prod_type = "PE"

# The scope of this code is to hedge one option 
# and scope will be extended to a portfolio
no_of_assets = 1
cor_mat = [[1]]

#Path to refer data
source_path = "/home/jupyter-partha/Vikranth - Chapter 2/"
input_sub_path = "Input Data/mkt_data_covid_region/"
output_sub_path = "Output Data/"
input_data_path = source_path + input_sub_path
output_data_path = source_path + output_sub_path


# Periods of interest will be a dictionary
#Key is the year, value is a list of months 1-12, 1- Jan, 2 - Feb,...12 - Dec
# For E.g., periods_of_interest = {2020: [3], 2019: [11, 12]}
periods_of_interest = {2019: [8, 9, 10, 11, 12], 2020: [1, 2, 3, 4, 5, 6, 7]}


#List of holidays
holidays_list = [date(2019, 3, 4), date(2019, 3, 21),\
                 date(2019, 4, 17), date(2019, 4, 19), date(2019, 4, 29),\
                 date(2019, 5, 1),\
                 date(2019, 6, 5),\
                 date(2019, 8, 12), date(2019, 8, 15),\
                 date(2019, 9, 2), date(2019, 9, 10), \
                 date(2019, 10, 2), date(2019, 10, 8), date(2019, 10, 21), date(2019, 10, 28), \
                 date(2019, 11, 12), \
                 date(2019, 12, 25), \
                 date(2020, 2, 21), \
                 date(2020, 3, 10), \
                 date(2020, 4, 2), date(2020, 4, 6), date(2020, 4, 10), date(2020,4, 14), \
                 date(2020, 5, 1), date(2020, 5, 25), \
                 date(2020, 10, 2), date(2020, 11, 16), date(2020, 11, 30), date(2020, 12, 25)]

#Simulation Parameters
no_of_paths = 5000

#Number of options
no_opt=1


In [3]:
#########################
#User-defined functions #
#########################

def generate_weekly_static_pfl_weights(no_of_paths, mthly_expiries_list, \
                                       dict_wkly_expiries_each_month, \
                                       dict_ce_wkly_strikes, dict_pe_wkly_strikes, \
                                       dict_wkly_spots, dict_wkly_iv, \
                                       prod_type, prod_moneyness, output_path, \
                                       option_type = "CE", no_of_assets=1, cor_mat=[[1]],\
                                       batch_size_divisor=10, no_of_epochs = 1000, stock_ident="BANKNIFTY"):
    ce_pfl_weights = []
    pe_pfl_weights = [] 
    cash_weights = []
    for each_month in mthly_expiries_list:
        strike = dict_mthly_strikes[each_month.strftime("%d-%b-%Y")]
        weekly_pfl_weights = []
        ce_dict = {}
        pe_dict = {}
        cash_dict = {}
        week_list = dict_wkly_expiries_each_month[each_month.strftime("%d-%b-%Y")]
        no_of_weeks = len(week_list)
        
        for week in range(0, no_of_weeks):
#         for week in range(0, 1):
            s_t = dict_wkly_spots[each_month.strftime("%d-%b-%Y")][week] 
            K = strike          
            r = np.array([dict_wkly_ir[each_month.strftime("%d-%b-%Y")][week]])
            
            if (week == no_of_weeks-1):
                sim_start_date = dict_wkly_expiries_each_month[each_month.strftime("%d-%b-%Y")][week]
                sim_end_date = each_month
            else:
                sim_start_date = dict_wkly_expiries_each_month[each_month.strftime("%d-%b-%Y")][week]
                sim_end_date = dict_wkly_expiries_each_month[each_month.strftime("%d-%b-%Y")][week + 1]
                
            df_full_iv = pd.read_csv(output_path + "A3_" + stock_ident + "_implied_vol_surface.csv")
            df_date = df_full_iv[df_full_iv['Date'] == sim_start_date.strftime("%d-%b-%Y")]
            
            option_moneyness = float(s_t/float(K))
            sim_moneyness = 1.0
            price_moneyness = option_moneyness
            
            # Vol Surface is already sorted when created in code A3 - This is just to make sure again
            df_date = df_date.sort_values(['T', 'Moneyness'], ascending=[False, True]).reset_index()
            
            # np.unique also sorts the array - we need to re-sort in descending order
            # This is required as last week has only one tenor and when filtering, we need to filter as 
            # tenor_list[0]
            
#             t_list = -np.sort(np.unique(np.array(-df_date['T'])))
            # COrrected the above bug - we need vol as of 1week for simulation.
    
            t_list = np.sort(np.unique(np.array(df_date['T'])))
            tenor_list = [str(int(round(t* 365))) + "D" for t in t_list]
            print(each_month, " Week: ", week)
            print(tenor_list)
            
            if (len(tenor_list) > 2):
                print("There are more than 2 tenors in the vol surface - Please check!")
                
            df_tenor = df_date[df_date['Tenor'] == tenor_list[0]]

            x = np.array(df_tenor["Moneyness"])
            y = np.array(df_tenor["Impl_Vol"])
            vol_list = [np.interp(sim_moneyness, x, y)]
            
            sim_stock_mat = gbm1W_simulation(no_of_paths, sim_start_date, sim_end_date, \
                                             no_of_assets, s_t, r, vol_list, cor_mat)
            
            call_strikes = np.sort(np.array(dict_ce_wkly_strikes[each_month.strftime("%d-%b-%Y")][week])).reshape(1,-1)
            put_strikes = np.sort(np.array(dict_pe_wkly_strikes[each_month.strftime("%d-%b-%Y")][week])).reshape(1,-1)
            no_of_calls = call_strikes.size
            no_of_puts = put_strikes.size
            no_ind_vars = no_of_calls + no_of_puts
                        
            stock_vec = sim_stock_mat[:, 1]
            r_f = get_risk_free_rate_from_exact_date(sim_start_date)
            dt = float((each_month - sim_end_date).days) / 365

            # Need higher tenor in the first element to avoiding tenor_list[1] as missing when 2 tenors are not available
            t_list = -np.sort(np.unique(np.array(-df_date['T'])))
            tenor_list = [str(int(round(t* 365))) + "D" for t in t_list]            
            df_tenor = df_date[df_date['Tenor'] == tenor_list[0]]

            x = np.array(df_tenor["Moneyness"])
            y = np.array(df_tenor["Impl_Vol"])
            vol_list = [np.interp(price_moneyness, x, y)]
            
            vol = vol_list[0]
                       
            opt_strike = strike

            if (option_type == "CE" and week != no_of_weeks-1):
                option_value = BSM_call_vec_with_div(stock_vec, opt_strike, r, r_f, vol, dt)
            elif (option_type == "PE" and week != no_of_weeks-1):
                option_value = BSM_put_vec_with_div(stock_vec, opt_strike, r, r_f, vol, dt) 
            elif (option_type == "CE" and week == no_of_weeks-1):
                option_value = np.maximum(stock_vec - opt_strike, 0) 
            else:
                option_value = np.maximum(opt_strike - stock_vec, 0)
    
            option_value = np.asarray(option_value)
            option_value = option_value.reshape(-1, 1) 
            
            y = option_value
            stock_vec = stock_vec.reshape(-1,1)
            x = np.concatenate((np.maximum(stock_vec-call_strikes,0), np.maximum(put_strikes-stock_vec,0)), axis=1) 
            
           
            
#             print("SKLearn Linear regression - OLS")
#             lin_model = LinearRegression().fit(x, y)
#             pfl_weights = lin_model.coef_
#             pfl_weights = pfl_weights.reshape(-1)
#             cash_weight = lin_model.intercept_
#             print("R-Square: ", lin_model.score(x,y))
#             print("Coefficients:")
#             print(pfl_weights)
#             print(cash_weight)           
            
#             print("SKLearn Linear regression - Ridge")
#             lin_model_ridge = Ridge(alpha=1.0).fit(x, y)
#             pfl_weights = lin_model_ridge.coef_
#             pfl_weights = pfl_weights.reshape(-1)
#             cash_weight = lin_model_ridge.intercept_
#             print("R-Square: ", lin_model_ridge.score(x, y))
#             print("Coefficients:")
#             print(pfl_weights)
#             print(cash_weight)
            
            
            print("SKLearn Linear regression - Lasso")
            lin_model_lasso = Lasso(alpha=1.0).fit(x, y)
            pfl_weights = lin_model_lasso.coef_
            pfl_weights = pfl_weights.reshape(-1)
            cash_weight = lin_model_lasso.intercept_
            print("R-Square: ", lin_model_lasso.score(x, y))
            print("Coefficients:")
            print(pfl_weights)
            print(cash_weight)
            
            
            
            
            
#          
#             #Find Optimal Alpha
#             alphas = np.array([-0.5, 0.0, 0.25, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 5.0])
#             model = LassoCV(alphas=alphas)
#             visualizer = AlphaSelection(model)
#             visualizer.fit(x, y)
#             visualizer.show()
            
#             print("Stats Model _ OLS")
#             x = x
#             x = sm.add_constant(x)
#             lin_model2 = OLS(y,x).fit()
#             print(lin_model2.summary())
# #             print(lin_model2.params)
# #           pfl_weights = np.array(lin_model2.params).reshape(-1)
            
            
            ######################################################################################
            ### Portfolio weights of shorter term options from neural network of European options ###
            ######################################################################################
            
            # Call Pfl weights
            call_strikes = call_strikes.reshape(-1) 
            ce_dates_list = np.array([sim_start_date.strftime("%d-%b-%Y") for i in range(0, no_of_calls)]).reshape(-1)
            ce_wt_list = pfl_weights[0:no_of_calls]
            ce_dict = {"Date": ce_dates_list, "Strike": call_strikes, "Pfl_weights": ce_wt_list}
            df_ce = pd.DataFrame(ce_dict)
            ce_pfl_weights.append(df_ce)
    
            
            #Put Pfl Weights
            put_strikes = put_strikes.reshape(-1) 
            pe_dates_list = np.array([sim_start_date.strftime("%d-%b-%Y") for i in range(0, no_of_puts)]).reshape(-1)
            pe_wt_list = pfl_weights[no_of_calls:no_ind_vars]
            pe_dict = {"Date": pe_dates_list, "Strike":put_strikes, "Pfl_weights": pe_wt_list}
            df_pe = pd.DataFrame(pe_dict)
            pe_pfl_weights.append(df_pe)
            
            # Cash component
            cash_date = [sim_start_date.strftime("%d-%b-%Y")]
            cash_dict = {"Date":cash_date, "Pfl_weights":cash_weight} 
            df_cash = pd.DataFrame(cash_dict)
            cash_weights.append(df_cash)
            
            
        df_ce_pfl_weights = pd.concat(ce_pfl_weights, axis=0)
        df_pe_pfl_weights = pd.concat(pe_pfl_weights, axis=0)
        df_cash_pfl_weights = pd.concat(cash_weights, axis=0)

#         df_ce_pfl_weights.to_csv(output_path  + "B1A_LR_" + stock_ident + "_" + prod_moneyness + "_" + prod_type + "_CE_Pfl_weights.csv", index = False)
#         df_pe_pfl_weights.to_csv(output_path + "B1A_LR_" + stock_ident + "_" + prod_moneyness + "_" + prod_type + "_PE_Pfl_weights.csv", index = False)
#         df_cash_pfl_weights.to_csv(output_path  + "B1A_LR_" + stock_ident + "_" + prod_moneyness + "_" + prod_type + "_Cash_Pfl_weights.csv", index = False)
    
    return df_ce_pfl_weights, df_pe_pfl_weights, df_cash_pfl_weights

def output_strikes_hedged(prod_type, prod_moneyness, dict_mthly_strikes, stock_ident, output_path):
    list_of_dates = []
    list_of_prod_type = []
    list_of_strikes = []
    for each_month in dict_mthly_strikes.keys():
        list_of_dates.append(each_month)
        list_of_strikes.append(dict_mthly_strikes[each_month])
        list_of_prod_type.append(prod_type)
    dict_hedged_prod = {"Date":list_of_dates, "prod_type":list_of_prod_type, "Strike":list_of_strikes}
    df_hedged_prod = pd.DataFrame(dict_hedged_prod) 
#     df_hedged_prod.to_csv(output_path + "B1A_LR_" + stock_ident + "_" + prod_moneyness + "_" + prod_type + "_strikes_hedged.csv", index = False)
    return(df_hedged_prod)


In [5]:

#Find the monthly strikes of option from mkt data to find the option to be hedged
#Every month, an option is hedged
mthly_expiries_list = get_all_monthly_option_expiries(periods_of_interest, holidays_list)
dict_wkly_expiries_each_month = find_wkly_expries(mthly_expiries_list, holidays_list)

# #Load all monthly mkt data
mthly_mkt_data = load_all_mthly_data(mthly_expiries_list, input_data_path, holidays_list, prod_type_lists=["FUT", "CE", "PE"], stock_ident=stock_ident)

# #Load weekly strikes of short term options used for hedging monthly options 
dict_ce_wkly_strikes, dict_pe_wkly_strikes, dict_wkly_spots = generate_weekly_strikes(dict_wkly_expiries_each_month, input_data_path, stock_ident = stock_ident)     

#Load monthly strikes of interest as of required moneyness on the begining of month
dict_mthly_strikes = generate_mthly_strikes(mthly_mkt_data, prod_moneyness, prod_type, holidays_list, stock_ident=stock_ident)

#Load weekly IV for Black-Scholes model input and monte-carlo simulation of stock paths
dict_wkly_iv = generate_weekly_iv(dict_wkly_expiries_each_month, dict_wkly_spots, dict_mthly_strikes, prod_type, output_path=output_data_path, atm_ind=0, stock_ident=stock_ident)

#Load weekly risk free interest rates from futures
#This function to be changed based on updates on risk-free-interest - now kept it as cosntant
dict_wkly_ir = generate_weekly_ir(dict_wkly_expiries_each_month, mthly_mkt_data, stock_ident)

# # #Load Static portfolio weights built by neural network
start_time = time.time()

ce_pfl_wts, pe_pfl_wts, cash_pfl_wts = generate_weekly_static_pfl_weights(no_of_paths, mthly_expiries_list, dict_wkly_expiries_each_month, \
                                                        dict_ce_wkly_strikes, dict_pe_wkly_strikes, dict_wkly_spots, \
                                                        dict_wkly_iv, prod_type, prod_moneyness, output_data_path, \
                                                        option_type = prod_type, no_of_assets=no_of_assets, \
                                                        cor_mat=cor_mat, stock_ident=stock_ident) 
print("--- %s seconds ---" % (time.time() - start_time))
# # # The strike correspnding to the option that is hedged
df_hedged_prod = output_strikes_hedged(prod_type, prod_moneyness, dict_mthly_strikes, stock_ident, output_data_path)



2019-08-29  Week:  0
['7D', '35D']
SKLearn Linear regression - Lasso
R-Square:  0.9999847677675474
Coefficients:
[ 0.67396978 -0.02639166  0.0064575   0.00208379  0.01118675  0.02804569
  0.02964511  0.02775923  0.02569051  0.02393743  0.02243571  0.02067181
  0.01936691  0.01488105  0.02213806  0.03462052  0.08580527  0.0964854
  0.00908345  0.0063352   0.04959398  0.04652432 -0.          0.02173328
  0.15317369 -0.15602126 -0.48152423 -0.00982884  0.03976916  0.03026668
  0.02256979]
[444.30492767]
2019-08-29  Week:  1
['7D', '28D']
SKLearn Linear regression - Lasso
R-Square:  0.9999567932296343
Coefficients:
[ 0.43754168 -0.03199484  0.01526065  0.03510589  0.0359873   0.03394218
  0.03354802  0.03314216  0.03294181  0.03217784  0.02985931  0.02766049
  0.03101306  0.03041672  0.          0.12142074  0.06995949  0.
  0.00278676  0.          0.0312068   0.04867064  0.01256232 -0.01724649
  0.02705551  0.09561982  0.04591224 -0.13940134 -0.22172981 -0.01445725
  0.02675201]
[243.87629

R-Square:  0.9999745075649601
Coefficients:
[ 0.59973377 -0.03590902  0.00356661  0.00434592  0.01065953  0.02059369
  0.02377754  0.02294177  0.02198459  0.0208217   0.02059352  0.02194419
  0.02265233  0.0171899   0.00720625  0.00727484  0.06080598  0.04255954
  0.04662093  0.01506261  0.0668904   0.06918282  0.00167934  0.03292753
  0.06576973  0.02752168 -0.02967626 -0.02377788  0.06622551  0.11794066
  0.01393758 -0.2182944  -0.30368672 -0.05564606  0.03471607  0.03136516
  0.02115459  0.01962888]
[470.49095068]
2019-11-28  Week:  1
['7D', '21D']
SKLearn Linear regression - Lasso
R-Square:  0.9999919639093521
Coefficients:
[ 7.02794596e-01 -2.51399120e-02  2.44377008e-02  3.15246873e-02
  2.66565213e-02  2.32245072e-02  2.15216809e-02  2.03208803e-02
  1.92265544e-02  1.79660741e-02  1.57244800e-02  1.30601446e-02
  1.33084283e-02  2.05325400e-02  7.52245603e-03  4.13631315e-02
  1.12358848e-01  2.71482696e-02  7.10975180e-02  1.33281475e-03
  4.25457130e-03  6.57646693e-02  5.733

R-Square:  0.9999857796936934
Coefficients:
[ 8.56319970e-01 -3.53216943e-02  2.75802099e-03  0.00000000e+00
  0.00000000e+00  6.62334531e-03  1.98995359e-02  2.14303130e-02
  1.88379401e-02  1.63782127e-02  1.45087225e-02  1.25954104e-02
  1.12792830e-02  9.84443463e-03  2.35520221e-02  1.50482486e-02
  0.00000000e+00  0.00000000e+00  9.17211800e-02  5.08946192e-02
  1.97834020e-02  5.54821208e-02  3.74310855e-02  4.62465906e-04
  5.29420993e-02  8.02864628e-02  1.66290948e-02 -3.59058224e-02
  1.07199441e-01  1.55527422e-01 -2.49832231e-01 -5.64486063e-01
 -4.22632740e-02  4.27510216e-02  3.51520793e-02  2.50279241e-02
  2.18743476e-02]
[609.2556442]
2020-02-27  Week:  2
['7D', '14D']
SKLearn Linear regression - Lasso
R-Square:  0.9999789457103263
Coefficients:
[ 0.94097098 -0.03956079  0.00809012  0.00185653  0.00257308  0.00800176
  0.01752753  0.01591472  0.01185091  0.00889094  0.00690738  0.00498368
  0.00305281  0.00418259  0.00244358  0.          0.17543489  0.03773645
  0.028

  positive)


R-Square:  0.9996871092269858
Coefficients:
[-1.17334331e-04 -3.83330336e-05 -3.62210458e-05  5.16350566e-05
  1.83125834e-04  4.75016368e-05  1.36553056e-04  5.06647117e-05
  7.29603246e-06 -7.81375426e-06 -1.65865504e-04 -1.55880611e-04
 -3.41078153e-04 -7.63567019e-05  7.32316401e-04  7.97267704e-04
 -4.64642460e-04 -2.51110181e-03 -8.07139440e-04  5.59101825e-04
  1.19869430e-03  4.24664002e-03  1.76009212e-03 -9.55130844e-04
 -7.39145480e-03 -4.06817436e-03 -1.50847069e-03  1.49127694e-02
  1.52336553e-02  1.68267808e-03 -1.16570317e-02 -1.18767077e-01
  6.51550842e-01  6.05002394e-01 -1.49586674e-01 -0.00000000e+00
 -0.00000000e+00 -0.00000000e+00  0.00000000e+00 -8.61287323e-05
 -2.96032718e-05  1.51651788e-04  1.08538579e-04  2.02475946e-05
  1.60338141e-05  1.43928538e-06 -4.70367704e-05 -5.77799989e-05
 -2.97007600e-05 -2.53483181e-05 -1.95902933e-05 -1.04351368e-05
 -7.75932190e-07]
[0.1094059]
2020-04-30  Week:  0
['6D', '35D']
SKLearn Linear regression - Lasso
R-Square:  0

R-Square:  0.9999445106594039
Coefficients:
[ 9.94483803e-01 -9.62735098e-02 -3.43499071e-02 -3.88844231e-03
  7.38516661e-03  1.26757391e-02  1.35820241e-02  1.27998007e-02
  1.12969942e-02  9.51310564e-03  7.90917653e-03  6.93588314e-03
  6.35783073e-03  5.94539355e-03  5.83652356e-03  5.65592289e-03
  9.15191061e-03  8.08803723e-03  2.85032425e-03  6.80167455e-03
  3.37066790e-03  1.66241007e-03  6.89139861e-04  6.88256637e-02
  6.36872021e-02  1.19859143e-01  8.79824253e-02  2.19208528e-01
 -2.74380391e-02 -4.24068986e-02  5.24011323e-02  1.19324721e-01
  2.31967622e-01  2.02745921e-02 -1.03960344e-01 -2.22512269e-01
 -2.78427928e-01 -2.30368428e-01 -9.08292433e-02 -1.28046439e-02]
[1439.99983813]
2020-06-25  Week:  3
['7D']
SKLearn Linear regression - Lasso
R-Square:  0.9999416291527571
Coefficients:
[ 1.16965277e+00 -1.26072780e-01 -4.17148412e-02 -1.08240492e-02
  1.06966039e-03  2.84221973e-03  2.86725681e-03  1.76420111e-03
  5.77300685e-04  0.00000000e+00 -0.00000000e+00 -0.0