# **Option Event Study**
Code Written by: `Aman Agrawal` <br>
Email: aagrawal2@babson.edu

## Imports

In [2]:
import pandas as pd
from numpy import log as ln
from numpy import NaN as nan
from statsmodels.formula.api import ols
import os
import glob
from numpy import sqrt
pd.options.mode.chained_assignment = None
source = os.path.abspath(os.getcwd())                                           #Getting folder directory

In [2]:
marketdata = pd.read_excel("MktData_MLM.xlsx")                                  #Importing market log returns data
marketdata = marketdata.rename(columns={"date":"DataDate"})                     #Renaming date column for consistency
marketdata = marketdata[marketdata.DataDate.isin(pd.date_range(start = "2012-12-30",end="2017-12-31"))]

In [3]:
blr = pd.read_excel("BB BLR Index.xlsx", skiprows = 5)\
    .drop(columns = ["Date","PX_LAST.1","PX_LAST",
        "Unnamed: 2"]).rename(columns={"Date.1":
            "DataDate","rate":"blr"})                                           #Importing bank loan rate data from an excel
fred = pd.read_excel("DGS1 1Yr Constant Mat Treasury.xlsx",skiprows=10)\
    .rename(columns={"observation_date":"DataDate","DGS1":"fred"})              #Importing fred rate data from an excel
fred.fred = fred.fred/100                                                       #Formatting the fred data for consistency

## Synthetic Stock Filters

In [4]:
volume_filter = False
oi_filter = False
zero_price_filter = True

These filters are used to create the synthetic stock price. In order to use them, set their value as `True`.
| Filter | Description|
|---------|-----------|
|`volume_filter`     | Removes zero volume contracts from the database |
|`oi_filter`         | Removes zero open interest contracts from the database |
|`zero_price_filter` | Removes contracts with zero price value. This happens when exchange provides no data for a specific contract | 

## Functions

### Regression

In [5]:
def regression(df, dtype):
    data_type = dtype + "LogRet"                                                #Adding a suffix to signify Log Returns 
    formula = data_type + " ~  Mkt_Lnrtn"                                       #Creating the formula for Regression
    years = df.index.year.unique().tolist()                                     #Creating a list of years for yearly regression

    if 2012 in years:
        years.remove(2012)

    df_reg = pd.DataFrame()                                                     #Empty df to concat yearly data to 

    for y in years:                                                             #Running a for loop for yearly regressions
        df_year = df.loc[f"{y}"]                                                #Filtering for a specific year
        if len(df_year) <= 1:
            continue
        fitted = ols(formula, data = df_year).fit()                             #Running the regression on the filtered year  
        expected = fitted.predict(exog = df_year)                               #Calculating the expected returns 
        df_year.loc[:,"ExpectedReturn"] = expected                              #Assigned a column to the expected returns 
        df_year.loc[:,"ResRtn"] = df_year[data_type] - expected                 #Calculating Residual Returns
        df_year.loc[:,"Tstat"] = df_year.ResRtn/sqrt(fitted.scale)              #Calculating Tstat
        df_year.loc[:,"Sig"] = abs(df_year.Tstat) > 1.96                        #Checking significance
        df_reg = pd.concat([df_reg,df_year])                                    #Joining the yearly regressions  
    return(df_reg)

### Common Stock Cleaning

In [6]:
def common_stock(com_df):
    com_df.PRC = abs(com_df.PRC)                                             #DECISION PENDING
    com_df = com_df.assign(CompanyLogRet=lambda x: \
        ln((x.PRC*(x.CFACPR.shift(1)/x.CFACPR))/x.PRC.shift(1)))                #Calculating Split Adjusted Log-returns
    com_df = com_df[['CompanyLogRet','DataDate']]                               #Getting rid of extra data
    com_df.DataDate = pd.to_datetime(com_df.DataDate) 
    com_df = com_df.merge(marketdata,on='DataDate',how="left")\
        .rename(columns= {'vwretd_ln':'Mkt_Lnrtn'})                             #Joining common stock returns with market returns
    com_df["DataDate"] = pd.to_datetime(com_df["DataDate"])                     #Converting dates from str to datetime
    com_df = com_df.set_index("DataDate").dropna()                              #Making DataDate as an index column

    com_df_reg = regression(com_df,dtype = "Company")                           #Running yearly regressions
    return(com_df_reg)

### Syn Cleaning

In [None]:
def synthetic_stock(syn_df):
    
    if volume_filter == True:   
        syn_df = syn_df[syn_df.Volume != 0]                                     #Filtering for volume if volume_filter is True
    
    if oi_filter == True:
        syn_df = syn_df[syn_df.OpenInterest != 0]                               #Filtering for open interest if oi_filter is True

    syn_df.DataDate = pd.to_datetime(syn_df.DataDate)                           #Converting dates from str to datetime
    syn_df.Expiration = pd.to_datetime(syn_df.Expiration)                       #Converting dates from str to datetime

    syn_df = syn_df.merge(fred, on = 'DataDate', how = "left")                  #Joining fred data with df
    syn_df = syn_df.merge(blr, on = 'DataDate', how = "left")                   #Joining blr data with df
    
    syn_df.loc[:,"Tau"] = (syn_df.Expiration - syn_df.DataDate).dt.days         #Calculating Tau
    syn_df.loc[:,"YearFrac"] =  syn_df.Tau/365                                  #Converting Tau into years
    syn_df.loc[:,"fred_nan"] = syn_df.fred/syn_df.fred                          #Returns a NAN value if fred has a missing data. Will be used to calculate Buy Discount.

    syn_df.loc[:,"BuyDiscount"] = ((1 + syn_df.fred) ** syn_df.YearFrac)*\
        syn_df.fred_nan                                                         #Calculating Buy Discount. (Without fred_nan, Tau with a value of "0" would result into buy discount of "1". Currently it would result into a NaN value.)
    syn_df.loc[:,"SellDiscount"] = (1 + syn_df.blr) ** syn_df.YearFrac          #Calculating Sell Discount
    
    call = syn_df[syn_df.Type == "call"].drop(columns = "Type")                 #Splitting the df into calls and puts
    call = call.rename(columns={'Last':'CallLast', 'Bid': "CallBid",\
        'Ask':"CallAsk", 'Volume':"CallVolume", 'OpenInterest':\
            'CallOpenInterest', 'IV':"CallIV", 'Delta':'CallDelta',\
                'Gamma':'CallGamma', 'Theta':'CallTheta', 'Vega':'CallVega'})   #Renaming the columns
    
    put = syn_df[syn_df.Type == "put"].drop(columns = "Type")                   #Splitting the df into calls and puts
    put = put.rename(columns={'Last':'PutLast', 'Bid': "PutBid", \
        'Ask':"PutAsk", 'Volume':"PutVolume", 'OpenInterest':\
            'PutOpenInterest', 'IV':"PutIV", 'Delta':'PutDelta',\
                'Gamma':'PutGamma', 'Theta':'PutTheta', 'Vega':'PutVega'})      #Renaming the columns
    
    syn_df = call.merge(put,how = "left", on=["Expiration","DataDate",\
        "Strike", "UnderlyingSymbol","UnderlyingPrice","BuyDiscount",\
            "SellDiscount","YearFrac","Tau","blr","fred","fred_nan"])           #Merging the call and put df to make them parallel

    if zero_price_filter == True:                                               #Filtering for zero price quotes if zero_price_filter is True 
        syn_df = syn_df[syn_df["CallBid"] != 0]
        syn_df = syn_df[syn_df["PutBid"] != 0]
        syn_df = syn_df[syn_df["CallAsk"] != 0]
        syn_df = syn_df[syn_df["PutAsk"] != 0]

    syn_df = syn_df[syn_df["CallBid"] != 9999]
    syn_df = syn_df[syn_df["PutBid"] != 9999]
    syn_df = syn_df[syn_df["CallAsk"] != 9999]
    syn_df = syn_df[syn_df["PutAsk"] != 9999]

    syn_df.loc[:,"Buy"] = syn_df.Strike/syn_df.BuyDiscount                      #Calculating Buy price for the bond
    
    syn_df.loc[:,"Sell"] = syn_df.Strike/syn_df.SellDiscount                    #Calculating Sell price for the bond
    
    syn_df.loc[:,"SynthAsk"] = syn_df.CallAsk - \
        syn_df.PutBid + syn_df.Buy                                              #Calculating Synthetic stock's ask price
    
    syn_df.loc[:,"SynthBid"] = syn_df.CallBid - \
        syn_df.PutAsk + syn_df.Sell                                             #Calculating Synthetic stock's sell price
    
    syn_df.loc[:,"SynthPrice"] = (syn_df.SynthAsk + syn_df.SynthBid)/2          #Calculating Synthetic stock's price for the specific strike price on a day

    syn_df = syn_df.groupby("DataDate").mean()                                  #Calculating Synthetic stock's price on a day
    
    syn_df = marketdata.merge(syn_df,on='DataDate',how = "left")\
        .rename(columns= {'vwretd_ln':'Mkt_Lnrtn'})                             #Joining Synthetic stock returns with market returns
    
    syn_df.loc[:,"SynthLogRet"] = \
        ln((syn_df.SynthPrice/syn_df.SynthPrice.shift(1)))                      #Calculating Synthetic Stock's Log returns

    syn_df = syn_df[['DataDate',"SynthLogRet",'Mkt_Lnrtn',"SynthPrice"]]\
        .set_index("DataDate").dropna(subset="SynthLogRet")                     #Dropping the NA values based upon Synth Log returns 

    syn_df_reg = regression(syn_df,dtype = "Synth")                             #Running yearly regressions 
    
    return(syn_df_reg)

## Summary

In [None]:
ivol_paths = glob.glob(f"{source}\\ind_1\\*_ind.csv")

In [None]:
error = []
for i in ivol_paths:
    tic = i.split("\\")[-1].split("_")[0]
    print(tic)
    
    try:
        com_path = glob.glob(f"{source}/crsp_ind/{tic}*_ind.csv")[0]
    except:
        error.append(f"{tic} : No crsp data")
        continue

    ivol = pd.read_csv(i).drop(columns="Unnamed: 0")                                        #Importing option data
    com_return = pd.read_csv(com_path).drop(columns="Unnamed: 0") #Importing common stock daily data

    com_return=com_return.rename(columns = {'date':'DataDate'})                     #Renaming date column for consistency
    
    common_regression = common_stock(com_return).reset_index()                         #Cleaning common stock data and returning regression
    synthetic_regression = synthetic_stock(ivol).reset_index()                   #Cleaning synthetic stock data and returning regression
    
    if len(synthetic_regression) == 0:
        error.append(f"{tic} : Insufficient Syn Data.")
        continue
    
    synthetic_regression = synthetic_regression\
        [["DataDate","ResRtn","Tstat","Sig"]]                                   #Filtering for required data
    
    common_regression = common_regression[["DataDate","ResRtn","Tstat","Sig"]]  #Filtering for required data
    
    synthetic_regression = synthetic_regression.rename(columns=\
        {"ResRtn":"SynResRtn","Tstat":"SynTstat","Sig":"SynSig"})               #Renaming columns for joining the dfs
    
    common_regression = common_regression.rename(columns=\
        {"ResRtn":"ComResRtn","Tstat":"ComTstat","Sig":"ComSig"})               #Renaming columns for joining the dfs
    
    summary = synthetic_regression.merge(common_regression,\
        on="DataDate",how = "right")                                                          #Joining the common and synthetic regression df based on "DataDate"(index)
    
    summary.loc[:,"Equal"] = summary.SynSig == summary.ComSig                   #Checking if both, syn and common, Tstats are significant
    summary.loc[:,"Direction"] = (summary.SynTstat/summary.ComTstat) >= 0       #Checking if the direction of both Tstats are similar
    
    for i in summary.index:                                                     #Checking for Redflags
        
        if summary.loc[i,"Equal"] == False:                                     #Redflag = True, if significance is not equal 
            summary.loc[i,"Redflag"] = True
        
        elif summary.loc[i,"Equal"] == True and (summary.loc[i,"SynSig"]\
            == False or summary.loc[i,"ComSig"] == False):                      #Redflag = False, if significance is equal but both are not significant
            summary.loc[i,"Redflag"] = False
        
        elif summary.loc[i,"Equal"] == True and \
            summary.loc[i,"Direction"] == False:                                #Redflag = True, if significance is equal (both significant) but are in opposite directions
            summary.loc[i,"Redflag"] = True
        
        else:                                                                   #Redflag = False, if significance is equal and the direction is same
            summary.loc[i,"Redflag"] = False
    
    print(tic,"Done",summary.__len__())
    
    summary_path = f'{source}/summary'                                          #File path for export of summary
    if not os.path.exists(summary_path):                                        #Creating the folder summary if it doesn't exist in the filepath
        os.makedirs(f"{source}/summary")
    
    summary.to_csv(f"{source}/summary/{tic}_summary.csv")                       #Exporting a csv file for the summary

In [None]:
error_tic = []
error_reason = []

for i in error:
    error_tic.append(i.split(":")[0].strip())
    error_reason.append(i.split(":")[1].strip())

error_df = pd.DataFrame({"tic":error_tic,"reason":error_reason},index=range(1,1+len(error)))
error_df.to_csv("error.csv")

# DEBUG

In [None]:
syn_df = pd.read_csv(f"{source}\\ind_1\\ACUR_ind.csv").drop(columns="Unnamed: 0")

if volume_filter == True:   
    syn_df = syn_df[syn_df.Volume != 0]                                     #Filtering for volume if volume_filter is True

if oi_filter == True:
    syn_df = syn_df[syn_df.OpenInterest != 0]                               #Filtering for open interest if oi_filter is True

syn_df.DataDate = pd.to_datetime(syn_df.DataDate)                           #Converting dates from str to datetime
syn_df.Expiration = pd.to_datetime(syn_df.Expiration)                       #Converting dates from str to datetime

syn_df = syn_df.merge(fred, on = 'DataDate', how = "left")                  #Joining fred data with df
syn_df = syn_df.merge(blr, on = 'DataDate', how = "left")                   #Joining blr data with df

syn_df.loc[:,"Tau"] = (syn_df.Expiration - syn_df.DataDate).dt.days         #Calculating Tau
syn_df.loc[:,"YearFrac"] =  syn_df.Tau/365                                  #Converting Tau into years
syn_df.loc[:,"fred_nan"] = syn_df.fred/syn_df.fred                          #Returns a NAN value if fred has a missing data. Will be used to calculate Buy Discount.

syn_df.loc[:,"BuyDiscount"] = ((1 + syn_df.fred) ** syn_df.YearFrac)*\
    syn_df.fred_nan                                                         #Calculating Buy Discount. (Without fred_nan, Tau with a value of "0" would result into buy discount of "1". Currently it would result into a NaN value.)
syn_df.loc[:,"SellDiscount"] = (1 + syn_df.blr) ** syn_df.YearFrac          #Calculating Sell Discount

call = syn_df[syn_df.Type == "call"].drop(columns = "Type")                 #Splitting the df into calls and puts
call = call.rename(columns={'Last':'CallLast', 'Bid': "CallBid",\
    'Ask':"CallAsk", 'Volume':"CallVolume", 'OpenInterest':\
        'CallOpenInterest', 'IV':"CallIV", 'Delta':'CallDelta',\
            'Gamma':'CallGamma', 'Theta':'CallTheta', 'Vega':'CallVega'})   #Renaming the columns

put = syn_df[syn_df.Type == "put"].drop(columns = "Type")                   #Splitting the df into calls and puts
put = put.rename(columns={'Last':'PutLast', 'Bid': "PutBid", \
    'Ask':"PutAsk", 'Volume':"PutVolume", 'OpenInterest':\
        'PutOpenInterest', 'IV':"PutIV", 'Delta':'PutDelta',\
            'Gamma':'PutGamma', 'Theta':'PutTheta', 'Vega':'PutVega'})      #Renaming the columns

syn_df = call.merge(put,how = "left", on=["Expiration","DataDate",\
    "Strike", "UnderlyingSymbol","UnderlyingPrice","BuyDiscount",\
        "SellDiscount","YearFrac","Tau","blr","fred","fred_nan"])           #Merging the call and put df to make them parallel

if zero_price_filter == True:                                               #Filtering for zero price quotes if zero_price_filter is True 
    syn_df = syn_df[syn_df["CallBid"] != 0]
    syn_df = syn_df[syn_df["PutBid"] != 0]
    syn_df = syn_df[syn_df["CallAsk"] != 0]
    syn_df = syn_df[syn_df["PutAsk"] != 0]

syn_df = syn_df[syn_df["CallBid"] != 9999]
syn_df = syn_df[syn_df["PutBid"] != 9999]
syn_df = syn_df[syn_df["CallAsk"] != 9999]
syn_df = syn_df[syn_df["PutAsk"] != 9999]

syn_df.loc[:,"Buy"] = syn_df.Strike/syn_df.BuyDiscount                      #Calculating Buy price for the bond

syn_df.loc[:,"Sell"] = syn_df.Strike/syn_df.SellDiscount                    #Calculating Sell price for the bond

syn_df.loc[:,"SynthAsk"] = syn_df.CallAsk - \
    syn_df.PutBid + syn_df.Buy                                              #Calculating Synthetic stock's ask price

syn_df.loc[:,"SynthBid"] = syn_df.CallBid - \
    syn_df.PutAsk + syn_df.Sell                                             #Calculating Synthetic stock's sell price

syn_df.loc[:,"SynthPrice"] = (syn_df.SynthAsk + syn_df.SynthBid)/2          #Calculating Synthetic stock's price for the specific strike price on a day

syn_df = syn_df.groupby("DataDate").mean()                                  #Calculating Synthetic stock's price on a day

syn_df = marketdata.merge(syn_df,on='DataDate',how = "left")\
    .rename(columns= {'vwretd_ln':'Mkt_Lnrtn'})                             #Joining Synthetic stock returns with market returns


In [None]:
syn_df[syn_df.SynthPrice < 0]

In [None]:

syn_df.loc[:,"SynthLogRet"] = \
    ln((syn_df.SynthPrice/syn_df.SynthPrice.shift(1)))                      #Calculating Synthetic Stock's Log returns


In [None]:

syn_df = syn_df[['DataDate',"SynthLogRet",'Mkt_Lnrtn',"SynthPrice"]]\
    .set_index("DataDate").dropna(subset="SynthLogRet")                     #Dropping the NA values based upon Synth Log returns 

syn_df_reg = regression(syn_df,dtype = "Synth")                             #Running yearly regressions 
