# **Option Event Study**
Code Written by: `Aman Agrawal` <br>
Email: aagrawal2@babson.edu

## Imports

In [None]:
import pandas as pd
from numpy import log as ln
import numpy as np
from statsmodels.formula.api import ols
import os
import glob
from numpy import sqrt
from subprocess import Popen
#pd.options.mode.chained_assignment = None
source = os.path.abspath(os.getcwd())                                           #Getting folder directory

In [None]:
marketdata = pd.read_excel("MktData_MLM.xlsx")                                  #Importing market log returns data
marketdata = marketdata.rename(columns={"date":"DataDate"})                     #Renaming date column for consistency
marketdata = marketdata[marketdata.DataDate.isin(pd.date_range(start = "2012-12-30",end="2017-12-31"))]

In [None]:
blr = pd.read_excel("BB BLR Index.xlsx", skiprows = 5)\
    .drop(columns = ["Date","PX_LAST.1","PX_LAST",
        "Unnamed: 2"]).rename(columns={"Date.1":
            "DataDate","rate":"blr"})                                           #Importing bank loan rate data from an excel
fred = pd.read_excel("DGS1 1Yr Constant Mat Treasury.xlsx",skiprows=10)\
    .rename(columns={"observation_date":"DataDate","DGS1":"fred"})              #Importing fred rate data from an excel
fred.fred = fred.fred/100                                                       #Formatting the fred data for consistency
fred = fred.fillna(method="ffill")                                              #Filling NaN values with previous available data

## Counters ##

In [None]:
syn_before_cleaning = 0
com_before_cleaning = 0
syn_after_cleaning = 0
com_after_cleaning = 0
syn_before_regression = 0
com_before_regression = 0
syn_after_regression = 0
com_after_regression = 0

## Synthetic Stock Filters

In [None]:
volume_filter = False
oi_filter = False
zero_price_filter = True
filter_9999 = True

These filters are used to create the synthetic stock price. In order to use them, set their value as `True`.
| Filter | Description|
|---------|-----------|
|`volume_filter`     | Removes zero volume contracts from the database |
|`oi_filter`         | Removes zero open interest contracts from the database |
|`zero_price_filter` | Removes contracts with zero price value. This happens when exchange provides no data for a specific contract | 


## Functions

In [None]:
def regression(df, dtype):
    data_type = dtype + "LogRet"                                                #Adding a suffix to signify Log Returns 
    formula = data_type + " ~  Mkt_Lnrtn"                                       #Creating the formula for Regression
    years = df.index.year.unique().tolist()                                     #Creating a list of years for yearly regression

    if 2012 in years:
        years.remove(2012)

    df_reg = pd.DataFrame()                                                     #Empty df to concat yearly data to 

    for y in years:                                                             #Running a for loop for yearly regressions
        df_year = df.loc[f"{y}"]                                                #Filtering for a specific year
        if len(df_year) < 252:                                                  #Removing incomplete years 
            continue
        fitted = ols(formula, data = df_year).fit()                             #Running the regression on the filtered year  
        expected = fitted.predict(exog = df_year)                               #Calculating the expected returns 
        df_year.loc[:,"ExpectedReturn"] = expected                              #Assigned a column to the expected returns 
        df_year.loc[:,"ResRtn"] = df_year[data_type] - expected                 #Calculating Residual Returns
        df_year.loc[:,"Tstat"] = df_year.ResRtn/sqrt(fitted.scale)              #Calculating Tstat
        df_year.loc[:,"Sig"] = abs(df_year.Tstat) > 1.96                        #Checking significance
        df_reg = pd.concat([df_reg,df_year])                                    #Joining the yearly regressions  
    return(df_reg)

In [9]:
ivol_paths = glob.glob(f"{source}\\ind_1\\*_ind.csv")
c_error = []
error = []
for i in ivol_paths:
    tic = i.split("\\")[-1].split("_")[0]
    print(tic)

    #Importing ivol and crsp data
    syn_df = pd.read_csv(i).drop(columns="Unnamed: 0")
    
    try:
        com_df = pd.read_csv(f"{source}\\crsp\\{tic}_ind.csv").drop(columns="Unnamed: 0")
    except:
        print("Not found ",tic)
        error.append(f"{tic} : No crsp data")
        continue

    #Keeping count
    com_before_cleaning += len(com_df)

    #Cleaning common stock data
    com_df.PRC = abs(com_df.PRC)
    com_df=com_df.rename(columns = {'date':'DataDate'})
    com_df.DataDate = pd.to_datetime(com_df.DataDate)
    com_df.DISTCD = com_df.DISTCD.fillna(value="nan")
    #Duplicate date check
    dupe = com_df[com_df.DataDate.duplicated(keep=False)]
    
    if len(dupe) != 0:
        # print(f"{tic} has duplicate dates.")
        # os.startfile(f"{source}\\crsp\\{tic}_ind.csv")
        # input("Press enter to continue...")
        print("                        ", tic)

    cfacpr = com_df[['DataDate','CFACPR']]

    #Calculating Common Stock's Log returns
    #com_df.loc[:,"SynthLogRet"] = ln((com_df.PRC*(com_df.CFACPR.shift(1)/com_df.CFACPR))/com_df.PRC.shift(1))
    try:
        print(tic)
        if com_df.RET[0] == "C":
            print(tic)
            com_df.loc[0,"RET"] = 0
        print(com_df.RET[:5])
        com_df.loc[:,"SynthLogRet"] = ln(com_df.RET + 1)
        continue
    except:
        print("error")
        c_error.append(tic)
        # os.startfile(f"{source}\\crsp\\{tic}_ind.csv")
        # input("Press enter to continue...")
        # continue
    
    #Keeping count
    syn_before_cleaning += len(syn_df)
    
    #filtering out the dataframe 
    if volume_filter == True:   
        syn_df = syn_df[syn_df.Volume != 0]

    if oi_filter == True:
        syn_df = syn_df[syn_df.OpenInterest != 0]

    if zero_price_filter == True:
        syn_df = syn_df[syn_df["Bid"] != 0]
        syn_df = syn_df[syn_df["Ask"] != 0]

    if filter_9999 == True:
        syn_df = syn_df[syn_df["Bid"] != 9999]
        syn_df = syn_df[syn_df["Ask"] != 9999]

    #Converting dates from str to datetime
    syn_df.DataDate = pd.to_datetime(syn_df.DataDate)
    syn_df.Expiration = pd.to_datetime(syn_df.Expiration)

    #Joining fred and blr data with df
    syn_df = syn_df.merge(fred, on = 'DataDate', how = "left")
    syn_df = syn_df.merge(blr, on = 'DataDate', how = "left")
    
    #keeping count
    syn_after_cleaning += len(syn_df)
    
    #Calculating Buy and Sell Discount with the help of Tau and Fraction of years
    syn_df.loc[:,"Tau"] = (syn_df.Expiration - syn_df.DataDate).dt.days
    syn_df.loc[:,"YearFrac"] =  syn_df.Tau/365
    syn_df.loc[:,"BuyDiscount"] = ((1 + syn_df.fred) ** syn_df.YearFrac)
    syn_df.loc[:,"SellDiscount"] = (1 + syn_df.blr) ** syn_df.YearFrac

    #Reorganizing the data into desirable form.  
    call = syn_df[syn_df.Type == "call"].drop(columns = "Type")
    call = call.rename(columns={'Last':'CallLast', 'Bid': "CallBid",\
        'Ask':"CallAsk", 'Volume':"CallVolume", 'OpenInterest':\
            'CallOpenInterest', 'IV':"CallIV", 'Delta':'CallDelta',\
                'Gamma':'CallGamma', 'Theta':'CallTheta', 'Vega':'CallVega'}) 

    put = syn_df[syn_df.Type == "put"].drop(columns = "Type")
    put = put.rename(columns={'Last':'PutLast', 'Bid': "PutBid", \
        'Ask':"PutAsk", 'Volume':"PutVolume", 'OpenInterest':\
            'PutOpenInterest', 'IV':"PutIV", 'Delta':'PutDelta',\
                'Gamma':'PutGamma', 'Theta':'PutTheta', 'Vega':'PutVega'})

    syn_df = call.merge(put,how = "left", on=["Expiration","DataDate",\
        "Strike", "UnderlyingSymbol","UnderlyingPrice","BuyDiscount",\
            "SellDiscount","YearFrac","Tau","blr","fred"])

    #Calculating average synth ask price
    syn_df.loc[:,"Buy"] = syn_df.Strike/syn_df.BuyDiscount
    syn_df.loc[:,"Sell"] = syn_df.Strike/syn_df.SellDiscount

    syn_df.loc[:,"SynthAsk"] = syn_df.CallAsk - syn_df.PutBid + syn_df.Buy
    syn_df.loc[:,"SynthBid"] = syn_df.CallBid - syn_df.PutAsk + syn_df.Sell

    syn_df.loc[:,"SynthPrice"] = (syn_df.SynthAsk + syn_df.SynthBid)/2
    syn_df = syn_df.groupby("DataDate").mean()

    #Joining the Syn data with market returns on MktRtn's DataDate. And bringing in the adjustment factor for splits.
    syn_df = marketdata.merge(syn_df,on='DataDate',how = "left").rename(columns= {'vwretd_ln':'Mkt_Lnrtn'})
    syn_df = syn_df.merge(cfacpr, on = "DataDate", how="right")

    #Calculating Synthetic Stock's Log returns
    syn_df.loc[:,"SynthLogRet"] = ln((syn_df.SynthPrice*(syn_df.CFACPR.shift(1)/syn_df.CFACPR))/syn_df.SynthPrice.shift(1))

    #Cleaning the data by removing unwanted columns and dropping NA values in Synth Log Return column.
    syn_df = syn_df[['DataDate',"SynthLogRet",'Mkt_Lnrtn',"SynthPrice","UnderlyingPrice"]].set_index("DataDate").dropna(subset="SynthLogRet")

    #Keeping count
    syn_before_regression += len(syn_df)


In [None]:
for tic in c_error:
        os.startfile(f"{source}\\crsp\\{tic}_ind.csv")
        input("Press enter to continue...")

In [None]:
com_df = com_df[['CompanyLogRet','DataDate']]                               #Getting rid of extra data
com_df.DataDate = pd.to_datetime(com_df.DataDate) 
com_df = com_df.merge(marketdata,on='DataDate',how="left")\
    .rename(columns= {'vwretd_ln':'Mkt_Lnrtn'})                             #Joining common stock returns with market returns
com_df["DataDate"] = pd.to_datetime(com_df["DataDate"])                     #Converting dates from str to datetime
com_df = com_df.set_index("DataDate").dropna()                              #Making DataDate as an index column