# 1. Imports

In [None]:
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("data_final.csv")
df.head()

# 2. Features correlation matrix

In [None]:
corr_mat = df.corr()

In [None]:
plt.rcParams["figure.figsize"] = (10, 8)
corr_mat = df.corr()
sns.heatmap(corr_mat, annot=False, cmap='coolwarm', linewidths=.5)

# 3. Standard linear models

## 3.1. Features and model names

In [None]:
features = [['vwretd'], ['vwretd', 'RET_peers'], ['vwretd','RET_neigh'],
           ['vwretd', 'SMB','HML'],
           ['vwretd','SMB','HML','RMW','CMA']
           ]

names_linear = ['MM', 'MMP', 'MMN', 'MMFF3', 'MMFF5']

## 3.2. Model training

In [None]:
PERMNO_list = pd.unique(df['PERMNO'])

output = np.empty((len(df), len(names_linear))) # for storage of ARs

from sklearn.linear_model import LinearRegression
LinearModel = LinearRegression(fit_intercept=True)

for model in # iterate over features sets
    
    ARs_linear = list() # for storage of current model's AR over all stocks
    
    for i in # iterate over all stocks
        
        cur_PERMNO = # current stock's PERMNO
        cur_PERMNO_data = # select current stock's data
            
        feat = # current features set
    
        X, y  = # current features, # current labels. y must be a column vector => reshape X and y accordingly
        X_train, y_train = # train features, # train labels

        estimModel = # fit regression
        stock_ARs = # compute abnormal returns for current stock on all dates (estimation window + event date)

        ARs_linear.extend(stock_ARs)

    output[# store ARs for current model in output array

ARs_linear = # create pandas dataframe from ouput matrix. Column headers correspond to model names 
df = # concatenate ARs_linear with dataframe df

## 3.2. Abnormal returns

In [None]:
AR_estim = # ARs on estimation period for all linear models
AR_estim.head()

In [None]:
plt.rcParams["figure.figsize"] = (9, 5) 
plt.boxplot(AR_estim.values, labels = AR_estim.columns, showmeans=True)
plt.show()

In [None]:
AR_event = # ARs on event date for all models

In [None]:
AR_event.head()

In [None]:
plt.rcParams["figure.figsize"] = (9, 5) 
plt.boxplot(AR_event.values, labels = AR_event.columns, showmeans=True)
plt.show()

## 4. Alternative models

## 4.1. Model definitions

In [None]:
def get_models():
    models, names = list(), list()
    # Ridge
    models.append(RidgeCV())
    names.append('Ridge')
    
    # Lasso
    models.append(LassoCV())
    names.append('Lasso')
    
    # ElasticNet
    models.append(ElasticNetCV())
    names.append('ElasticNet')    
    
    # DT
    models.append(DecisionTreeRegressor(max_depth=5, min_samples_split=0.4))
    names.append('DT')
    
    # RF
    models.append(RandomForestRegressor(max_depth=5, min_samples_split=0.4))
    names.append('RandForest')
    
    return models, names

## 4.2. Model training

In [None]:
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LassoCV
from sklearn.linear_model import ElasticNetCV

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor


models, names = get_models()

output = np.empty((len(df), len(models))) # for storage of ARs

features = # list of features 
   
for model in range(len(models)):
    
    ARs_model = list()

    for stock in range(len(PERMNO_list)):

        cur_PERMNO = PERMNO_list[stock]
        cur_PERMNO_data = df[df['PERMNO']==cur_PERMNO]

        X, y  = # features, # labels
        X_train, y_train = # train features, # train labels 

        estimModel = # fit model
        ARs = # compute stock's ARs (estimation window + event date)

        ARs_model = ARs_model + ARs

    output[# store ARs for current model in output array

ARs_altModels = # create pandas dataframe from ouput matrix. Column headers correspond to model names 
df_all = # concatenate ARs_altModels with dataframe df

In [None]:
df_all.head()

In [None]:
model_names_all = names_linear + names

## 4.3. Abnormal returns (standard + alternative models)

In [None]:
ARs_estim = # estimation window ARs for all  models (linear + alternative models)
plt.rcParams["figure.figsize"] = (9, 5) 
plt.boxplot(ARs_estim.values, labels=model_names_all, showmeans=True)
plt.show()

In [None]:
AR0s = # event date ARs for all  models (linear + alternative models)

In [None]:
plt.rcParams["figure.figsize"] = (9, 5) 
plt.boxplot(AR0s.values, labels=model_names_all, showmeans=True)
plt.show()

In [None]:
ARs_estim = # ARs on estimation window for all models. Select column PERMNO + volumns of the ARs for all models (liner + alternative)
RMSEs_estim = # compute estimation window RMSE by PERMNO
RMSEs_estim.head()

In [None]:
ARs_test = # ARs on event window for all models. Select columns PERMNO + those of the ARs of all models (liner + alternative)
ARs_test.head()

## 4.4. Model evaluation

- For each model a x% abnormal return is created by adding x% shock to the date 0 actual return.
- Shock values are: -0.1, -0.05, -0.02, -0.01, 0, 0.01, 0.02, 0.05, 0.1
- Shock value 0 aims at testing the specification of the various models (Type 1 error)
- Other shock values aim at testing the power of the various models (Type 2 error)

The null hypothesis $AR^{(m)}_{i,0}=0$ is rejected for stock $i$ and model $(m)$ if $\frac{AR^{(m)}_{i,0}}{RMSE_i^{(m)}}>z_{\alpha}$

In [None]:
shocks = # list of shock values
from scipy.stats import norm
alpha = norm.ppf(0.975) # 5% level

In [None]:
rejection_rates = # empty matrix:  number of rows = number of thresholds / number of columns = number of models

In [None]:
for th in range(len(thresholds)):
    z_score = # compute z score
    T_spec = # count number of rejections
    freq_reject = # compute rejection frequency
    reject_rates[th,] = freq_reject

In [None]:
res = pd.DataFrame(reject_rates, columns=model_names_all)
res.index = thresholds
res

## 4.5. Confidence interval for rejection rate

Wald confidence interval for a binomial proportion ($p$):
$$
p \pm z \times \sqrt{\frac{p(1-p)}{n}}
$$
where $z$ is the confidence level (1.96 for a 95% confidence interval).

In [None]:
import math

# Sample data
number_rejections = int(5/100*256)+1 
total_trials = 256

# Sample proportion
p = number_rejections / total_trials

confidence_level = 0.95

# Z score based on confidence level
Z = norm.ppf((1 + confidence_level) / 2)

# Standard error
standard_error = math.sqrt((p * (1 - p)) / total_trials)

# Margin of error
margin_of_error = Z * standard_error

# Confidence interval
lower_limit = round((p - margin_of_error)*100,1)
upper_limit = round((p + margin_of_error)*100,1)

print(f"Sample Proportion: {p_hat}")
print(f"Confidence Interval: [{lower_limit}, {upper_limit}]")