# **Option Event Study**
Code Written by: `Aman Agrawal` <br>
Email: aagrawal2@babson.edu

## Imports

In [None]:
import pandas as pd
from numpy import log as ln
from numpy import nan
from statsmodels.formula.api import ols
import os
import glob
from numpy import sqrt
from subprocess import Popen
pd.options.mode.chained_assignment = None
source = os.path.abspath(os.getcwd())

In [None]:
def import_csv(path,low_memory_=True):
    df = pd.read_csv(path,low_memory=low_memory_)
    
    if 'Unnamed: 0' in df.columns:
        df = df.drop(columns= 'Unnamed: 0')
    
    if 'date' in df.columns:
        df = df.rename(columns= {"date":"DataDate"})

    df.DataDate = pd.to_datetime(df.DataDate)

    return df

In [91]:
def dupe_check(df,path_crsp,path_ivol):
    dupes = df[df.DataDate.duplicated(keep=False)]
    if len(dupes) == 0:
        return df
    
    # if dupes.DIVAMT.isnull().values.any():
    if len(dupes) > len(df)*0.03:
        os.startfile(path_crsp)
        os.startfile(path_ivol)
        input("Press enter to continue...")
        corrected_df = import_csv(path_crsp)
    
    else:
        corrected_df = df.groupby('DataDate').last()
        divamt = df.groupby('DataDate').sum().DIVAMT
        corrected_df.DIVAMT = divamt
        corrected_df = corrected_df.reset_index()
    
    return(corrected_df)

In [None]:
def return_check(df,path):
    df = df.sort_values(by="DataDate")
    df = df.reset_index(drop=True)
    if df['RET'].iloc[0] == "C":
        df.loc[0,'RET'] = nan
    if "C" in df.RET.tolist():
        os.startfile(path)
        input("Press enter to continue...")
        df = import_csv(path)
    return(df)

In [None]:
crsp = import_csv("CRSP.csv",low_memory_=False)

In [None]:
marketdata = pd.read_excel("MktData_MLM.xlsx")                                  #Importing market log returns data
marketdata = marketdata.rename(columns={"date":"DataDate"})                     #Renaming date column for consistency
marketdata = marketdata[marketdata.DataDate.isin(pd.date_range(start = "2012-12-30",end="2017-12-31"))]

In [None]:
blr = pd.read_excel("BB BLR Index.xlsx", skiprows = 5)\
    .drop(columns = ["Date","PX_LAST.1","PX_LAST",
        "Unnamed: 2"]).rename(columns={"Date.1":
            "DataDate","rate":"blr"})                                           #Importing bank loan rate data from an excel
fred = pd.read_excel("DGS1 1Yr Constant Mat Treasury.xlsx",skiprows=10)\
    .rename(columns={"observation_date":"DataDate","DGS1":"fred"})              #Importing fred rate data from an excel
fred.fred = fred.fred/100                                                       #Formatting the fred data for consistency
fred = fred.fillna(method="ffill")                                              #Filling NaN values with previous available data

## Counters ##

In [None]:
syn_before_cleaning = 0
com_before_cleaning = 0
syn_after_cleaning = 0
com_after_cleaning = 0
syn_before_regression = 0
com_before_regression = 0
syn_after_regression = 0
com_after_regression = 0
c_error = []
error = []
syn_data = []
volume_filter = False
oi_filter = False
zero_price_filter = True
filter_9999 = True

In [None]:
def regression(df, dtype):
    data_type = dtype + "LogRet"                                                #Adding a suffix to signify Log Returns 
    formula = data_type + " ~  Mkt_Lnrtn"                                       #Creating the formula for Regression
    years = df.index.year.unique().tolist()                                     #Creating a list of years for yearly regression

    if 2012 in years:
        years.remove(2012)

    df_reg = pd.DataFrame()                                                     #Empty df to concat yearly data to 

    for y in years:                                                             #Running a for loop for yearly regressions
        df_year = df.loc[f"{y}"]                                                #Filtering for a specific year
        if len(df_year) < 251:                                                  #Removing incomplete years 
            continue
        fitted = ols(formula, data = df_year).fit()                             #Running the regression on the filtered year  
        explained_rtn = fitted.predict(exog = df_year)                               #Calculating the expected returns 
        df_year.loc[:,"ExpectedReturn"] = explained_rtn                              #Assigned a column to the expected returns 
        df_year.loc[:,"ResRtn"] = df_year[data_type] - explained_rtn                 #Calculating Residual Returns
        df_year.loc[:,"Tstat"] = df_year.ResRtn/sqrt(fitted.scale)              #Calculating Tstat
        df_year.loc[:,"Sig"] = abs(df_year.Tstat) > 1.96                        #Checking significance
        df_year.loc[:,"StdErr"] = fitted.bse
        df_reg = pd.concat([df_reg,df_year])                                    #Joining the yearly regressions  
    if len(df_reg) == 0:
        return("Lack of data")
    return(df_reg)

In [None]:
def synthetic_stock(syn_df,cfacpr):
    
    if volume_filter == True:   
        syn_df = syn_df[syn_df.Volume != 0]                                     #Filtering for volume if volume_filter is True
    
    if oi_filter == True:
        syn_df = syn_df[syn_df.OpenInterest != 0]                               #Filtering for open interest if oi_filter is True

    syn_df.DataDate = pd.to_datetime(syn_df.DataDate)                           #Converting dates from str to datetime
    syn_df.Expiration = pd.to_datetime(syn_df.Expiration)                       #Converting dates from str to datetime

    syn_df = syn_df.merge(fred, on = 'DataDate', how = "left")                  #Joining fred data with df
    syn_df = syn_df.merge(blr, on = 'DataDate', how = "left")                   #Joining blr data with df
    
    syn_df.loc[:,"Tau"] = (syn_df.Expiration - syn_df.DataDate).dt.days         #Calculating Tau
    syn_df.loc[:,"YearFrac"] =  syn_df.Tau/365                                  #Converting Tau into years

    syn_df.loc[:,"BuyDiscount"] = ((1 + syn_df.fred) ** syn_df.YearFrac)        #Calculating Buy Discount
    syn_df.loc[:,"SellDiscount"] = (1 + syn_df.blr) ** syn_df.YearFrac          #Calculating Sell Discount
    
    call = syn_df[syn_df.Type == "call"].drop(columns = "Type")                 #Splitting the df into calls and puts
    call = call.rename(columns={'Last':'CallLast', 'Bid': "CallBid",\
        'Ask':"CallAsk", 'Volume':"CallVolume", 'OpenInterest':\
            'CallOpenInterest', 'IV':"CallIV", 'Delta':'CallDelta',\
                'Gamma':'CallGamma', 'Theta':'CallTheta', 'Vega':'CallVega'})   #Renaming the columns
    
    put = syn_df[syn_df.Type == "put"].drop(columns = "Type")                   #Splitting the df into calls and puts
    put = put.rename(columns={'Last':'PutLast', 'Bid': "PutBid", \
        'Ask':"PutAsk", 'Volume':"PutVolume", 'OpenInterest':\
            'PutOpenInterest', 'IV':"PutIV", 'Delta':'PutDelta',\
                'Gamma':'PutGamma', 'Theta':'PutTheta', 'Vega':'PutVega'})      #Renaming the columns
    
    syn_df = call.merge(put,how = "left", on=["Expiration","DataDate",\
        "Strike", "UnderlyingSymbol","UnderlyingPrice","BuyDiscount",\
            "SellDiscount","YearFrac","Tau","blr","fred"])           #Merging the call and put df to make them parallel

    if zero_price_filter == True:                                               #Filtering for zero price quotes if zero_price_filter is True 
        syn_df = syn_df[syn_df["CallBid"] != 0]
        syn_df = syn_df[syn_df["PutBid"] != 0]
        syn_df = syn_df[syn_df["CallAsk"] != 0]
        syn_df = syn_df[syn_df["PutAsk"] != 0]

    syn_df = syn_df[syn_df["CallBid"] != 9999]
    syn_df = syn_df[syn_df["PutBid"] != 9999]
    syn_df = syn_df[syn_df["CallAsk"] != 9999]
    syn_df = syn_df[syn_df["PutAsk"] != 9999]

    syn_df.loc[:,"Buy"] = syn_df.Strike/syn_df.BuyDiscount                      #Calculating Buy price for the bond
    
    syn_df.loc[:,"Sell"] = syn_df.Strike/syn_df.SellDiscount                    #Calculating Sell price for the bond
    
    syn_df.loc[:,"SynthAsk"] = syn_df.CallAsk - \
        syn_df.PutBid + syn_df.Buy                                              #Calculating Synthetic stock's ask price
    
    syn_df.loc[:,"SynthBid"] = syn_df.CallBid - \
        syn_df.PutAsk + syn_df.Sell                                             #Calculating Synthetic stock's sell price
    
    syn_df.loc[:,"SynthPrice"] = (syn_df.SynthAsk + syn_df.SynthBid)/2          #Calculating Synthetic stock's price for the specific strike price on a day

    syn_df = syn_df.groupby("DataDate").mean()                                  #Calculating Synthetic stock's price on a day
    
    syn_df = marketdata.merge(syn_df,on='DataDate',how = "left")\
        .rename(columns= {'vwretd_ln':'Mkt_Lnrtn'})                             #Joining Synthetic stock returns with market returns
    
    syn_df = syn_df.merge(cfacpr,on='DataDate',how='left')
    
    syn_df.loc[:,"SynthLogRet"] = \
        ln(((syn_df.SynthPrice*(syn_df.CFACPR.shift(1)/syn_df.CFACPR))/syn_df.SynthPrice.shift(1)))                      #Calculating Synthetic Stock's Log returns

    syn_df = syn_df[['DataDate',"SynthLogRet",'Mkt_Lnrtn',"SynthPrice"]]\
        .set_index("DataDate").dropna(subset="SynthLogRet")                     #Dropping the NA values based upon Synth Log returns 

    syn_df_reg = regression(syn_df,dtype = "Synth")                             #Running yearly regressions 
    
    return(syn_df_reg)

## 

In [95]:
for i in list(crsp.dropna(subset="PERMCO").PERMCO.unique()):
    df = crsp[crsp.PERMCO == i]
    df = df.sort_values("DataDate").dropna(subset = "PRC").dropna(subset = "TICKER").reset_index().drop(columns= "index")
    tic = list(df.TICKER.unique())
    if len(tic) == 0:
        continue
    df.to_csv(f"{source}\\crsp_\\{tic[0]}_crsp.csv")

In [None]:
for path_crsp in glob.glob(f"{source}\\crsp_\\*_crsp.csv"):
    raw_df = import_csv(path_crsp)
    if len(raw_df) < 250:
        continue

    tickers = raw_df.TICKER.unique().tolist()
    
    for tic in tickers:
        print(tic)
        path_ivol = f"{source}\\ivol\\{tic}_ind.csv"
        
        if not os.path.exists(path_ivol):
            continue
        
        df = raw_df[raw_df.TICKER == tic].reset_index(drop = True)

        df = dupe_check(df,path_crsp,path_ivol)

        df = return_check(df,path_crsp)

        df = marketdata.merge(df,on = "DataDate", how = "left").rename(columns= {'vwretd_ln':'Mkt_Lnrtn'}).sort_index()

        try:
            df.loc[:,"CompanyLogRet"] = ln(df.RET.astype(float) + 1)
        except:
            os.startfile(path_crsp)
            continue
        
        cfacpr_ = df[['CFACPR','DataDate']]
        
        com_df = df[['CompanyLogRet','Mkt_Lnrtn','DataDate']].dropna()
        com_df.set_index("DataDate",inplace=True)
        com_reg = regression(com_df,"Company")
        
        ivol = import_csv(path_ivol)
        ivol_ = synthetic_stock(ivol,cfacpr_)
        
        if type(ivol_) == str:
            continue
        #Starting with summarization
        syn_df_reg = ivol_[["ResRtn","Tstat","Sig"]]                                   #Filtering for required data
        
        com_df_reg = com_reg[["ResRtn","Tstat","Sig"]]  #Filtering for required data
        
        syn_df_reg = syn_df_reg.rename(columns=\
            {"ResRtn":"SynResRtn","Tstat":"SynTstat","Sig":"SynSig"})               #Renaming columns for joining the dfs
        
        com_df_reg = com_df_reg.rename(columns=\
            {"ResRtn":"ComResRtn","Tstat":"ComTstat","Sig":"ComSig"})               #Renaming columns for joining the dfs
        
        summary = syn_df_reg.merge(com_df_reg,\
            on="DataDate",how = "right")                                                          #Joining the common and synthetic regression df based on "DataDate"(index)
        
        summary.loc[:,"Equal"] = summary.SynSig == summary.ComSig                   #Checking if both, syn and common, Tstats are significant
        summary.loc[:,"Direction"] = (summary.SynTstat/summary.ComTstat) >= 0       #Checking if the direction of both Tstats are similar
        
        for i in summary.index:                                                     #Checking for Redflags
            
            if summary.loc[i,"Equal"] == False:                                     #Redflag = True, if significance is not equal 
                summary.loc[i,"Redflag"] = True
            
            elif summary.loc[i,"Equal"] == True and (summary.loc[i,"SynSig"]\
                == False or summary.loc[i,"ComSig"] == False):                      #Redflag = False, if significance is equal but both are not significant
                summary.loc[i,"Redflag"] = False
            
            elif summary.loc[i,"Equal"] == True and \
                summary.loc[i,"Direction"] == False:                                #Redflag = True, if significance is equal (both significant) but are in opposite directions
                summary.loc[i,"Redflag"] = True
            
            else:                                                                   #Redflag = False, if significance is equal and the direction is same
                summary.loc[i,"Redflag"] = False
        
        print(tic,": Done",summary.dropna().__len__())
        
        summary_path = f'{source}/summary_'                                          #File path for export of summary
        if not os.path.exists(summary_path):                                        #Creating the folder summary if it doesn't exist in the filepath
            os.makedirs(f"{source}/summary_")
        summary.dropna(subset = "SynResRtn").to_csv(f"{source}/summary_/{tic}_summary_.csv")                       #Exporting a csv file for the summary

AAAP
AAC
AAL
AAN
AAOI
AAON
AAPL
AAP
AAT
AAU
AAVL
ADVM
AAV
AAWW
AA
ARNC
ABAX
ABBV
ABB
ABCB
ABCO
ABC
ABDC
ABFS
ARCB
ABG
ABMD
ABM
ABR
ABTL
AUTO
ABT
ABV
ABEV
ABX
ABY
AY
AB
ACAD
ACAS
ACAT
ACCL
ACCO
ACC
ACET
ACE
CB
ACFN
ACGL
ACHC
ACHN
ACH
ACIA
ACIW
ACI
ARCH
ACLS
ACMP
WPZ
ACM
ACN
ACOR
ACO
ACPW
PIOI
ACRE
ACRS
ACRX
ACSF
ACTG
ACUR
ACW
ACXM
ADAP
ADBE
ADC
ADEP
ADES
ADHD
ARCT
ADI
ADK
RHE
ADMP
ADMS
ADM
ADNC
ADNT
ADPT
ADP
ADRO
ADSK
ADS
ADTN
ADT
ADUS
ADVS
ADXS
AEC
AEE
AEGN
AEGR
AEG
AEIS
AEL


KeyboardInterrupt: 