In [240]:
import pandas as pd
import numpy as np
from finance_byu import fama_macbeth
import warnings
warnings.filterwarnings("ignore")

In [241]:
ab_pos = pd.read_csv('Final Variables/abnormal_positive_ratio.csv', index_col=0)
ab_neg = pd.read_csv('Final Variables/abnormal_negative_ratio.csv', index_col=0)
pos = pd.read_csv('Final Variables/positive_ratio.csv', index_col=0)
neg = pd.read_csv('Final Variables/negative_ratio.csv', index_col=0)

variables = ['NR', 'ABNR', 'PR', 'ABPR']
ab_pos_lag = ab_pos[ab_pos.index <= '2022-11']
ab_pos_lead = ab_pos.iloc[1:(len(ab_pos_lag)+1)]

ab_neg_lag = ab_neg[ab_neg.index <= '2022-11']
ab_neg_lead = ab_neg.iloc[1:(len(ab_neg_lag)+1)]

pos_lag = pos[pos.index <= '2022-11']
pos_lead = pos.iloc[1:(len(pos_lag)+1)]

neg_lag = neg[neg.index <= '2022-11']
neg_lead = neg.iloc[1:(len(neg_lag)+1)]

ab_pos_means = ab_pos_lag.mean()
ab_neg_means = ab_neg_lag.mean()
pos_means = pos_lag.mean()
neg_means = neg_lag.mean()

def summary_stats(df):
    
    column_stats = pd.DataFrame([df.mean(), df.std(), df.skew(), df.kurt(), df.min(), df.quantile(0.01), df.quantile(0.05), df.quantile(0.1), df.quantile(0.25), df.quantile(0.5), df.quantile(0.75), df.quantile(0.9), df.quantile(0.95), df.quantile(0.99), df.max()])
    column_stats.index = ['Mean', 'SD', 'Skew', 'Kurt', 'Min', '1%', '5%', '10%', '25%', 'Median', '75%', '90%', '95%', '99%', 'Max']
    
    return column_stats

summary_stats = pd.concat([summary_stats(neg_means), summary_stats(ab_neg_means), summary_stats(pos_means), summary_stats(ab_pos_means)], keys = variables, axis=1).transpose()
summary_stats.index = summary_stats.index.droplevel(1)
summary_stats.to_csv('summary statistic/table1.csv')

ab_pos_means_df = pd.DataFrame(ab_pos_means, columns=['ABPR'])
ab_neg_means_df = pd.DataFrame(ab_neg_means, columns=['ABNR'])
pos_means_df = pd.DataFrame(pos_means, columns=['PR'])
neg_means_df = pd.DataFrame(neg_means, columns=['NR'])

lead_ab_pos_means_df = pd.DataFrame(ab_pos_lead.mean(), columns=['Lead_ABPR'])
lead_ab_neg_means_df = pd.DataFrame(ab_neg_lead.mean(), columns=['Lead_ABNR'])
lead_pos_means_df = pd.DataFrame(pos_lead.mean(), columns=['Lead_R'])
lead_neg_means_df = pd.DataFrame(neg_lead.mean(), columns=['Lead_NR'])

data_df = pd.concat([neg_means_df, ab_neg_means_df, pos_means_df, ab_pos_means_df, lead_neg_means_df, lead_ab_neg_means_df, lead_pos_means_df, lead_ab_pos_means_df], axis=1)
correlation_matrix = data_df.corr()
correlation_matrix.to_csv('summary statistic/table2.csv')

In [242]:
def fama(data, shift_factor, control = []):

    future = data.shift(shift_factor)
    
    comb = []
    comb_w_control = []

    for ticker in data:
        comb.append(pd.concat([data[ticker], future[ticker].rename('future')], axis = 1)[:shift_factor])
        if control != []:
            comb_w_control.append(pd.concat([data[ticker], future[ticker].rename('future')], axis = 1)[:shift_factor])

    if control != []:
        comb = comb_w_control

    for i in range(len(comb)):
        comb[i] = comb[i].reset_index()
        comb[i]['index'] = pd.to_datetime(comb[i]['index'])

    results = []
    tickers = data.columns

    for i in range(len(comb)):
        results.append(fama_macbeth.fama_macbeth_master(comb[i], t = 'index', yvar = 'future', xvar = [tickers[i]], intercept=True))

    alpha = []
    beta = []
    for i in range(len(results)):
        alpha.append(results[i]['intercept'])
        beta.append(results[i][tickers[i]])

    return alpha, beta, results

In [243]:
neg1_alpha, neg1_beta, neg1_results = fama(ab_neg, -1)
neg2_alpha, neg2_beta, neg2_results = fama(ab_neg, -2)
neg3_alpha, neg3_beta, neg3_results = fama(ab_neg, -3)
pos1_alpha, pos1_beta, pos1_results = fama(ab_pos, -1)
pos2_alpha, pos2_beta, pos2_results = fama(ab_pos, -2)
pos3_alpha, pos3_beta, pos3_results = fama(ab_pos, -3)

In [244]:
d_beta = {'neg1' : neg1_beta, 'neg2' : neg2_beta, 'neg3' : neg3_beta, 
          'pos1' : pos1_beta, 'pos2' : pos2_beta, 'pos3' : pos3_beta}
d_alpha = {'neg1' : neg1_alpha, 'neg2' : neg2_alpha, 'neg3' : neg3_alpha, 
          'pos1' : pos1_alpha, 'pos2' : pos2_alpha, 'pos3' : pos3_alpha}

betas = np.mean(pd.DataFrame(data = d_beta))
alphas = np.mean(pd.DataFrame(data = d_alpha))

In [246]:
results_no_control = pd.concat([alphas.rename('Intercept'), betas.rename('Beta')], axis = 1)

In [264]:
results_no_control

Unnamed: 0,Intercept,Beta
neg1,0.046513,0.147757
neg2,0.048533,0.109448
neg3,0.046977,0.1399
pos1,0.061355,0.148461
pos2,0.062203,0.137457
pos3,0.060225,0.161317


In [262]:
size = pd.read_csv('Final Variables/monthly_size.csv', index_col=0)
bm = 1/(pd.read_csv('Final Variables/monthly_pb.csv', index_col=0))
ret_6m = pd.read_csv('Final Variables/monthly_cum_sum.csv', index_col=0)
gpa = pd.read_csv('Final Variables/annual_gross_profit.csv', index_col=0)
#tot_assets = pd.read_csv('Final Variables/monthly_tot_asset.csv', index_col=0)

In [263]:
bm

Unnamed: 0,VRSN,CMI,WDC,CSX,KMB,BMY,REGN,VFC,VMC,MNST,...,GE,CAG,KR,HUM,ROK,AME,VTR,MKC,STZ,BKNG
2014-01-31,,0.296792,0.406742,0.306613,0.017272,0.152206,0.060688,0.176363,0.481728,0.083375,...,0.504265,0.394287,0.293585,0.448910,0.176964,0.255054,0.405609,0.187783,0.321067,0.144678
2014-02-28,,0.296792,0.406742,0.306613,0.017272,0.152206,0.060688,0.176363,0.481728,0.083375,...,0.504265,0.394287,0.293585,0.448910,0.176964,0.255054,0.405609,0.187783,0.321067,0.144678
2014-03-31,,0.296792,0.406742,0.306613,0.017272,0.152206,0.060688,0.176363,0.481728,0.083375,...,0.504265,0.394287,0.293585,0.448910,0.176964,0.255054,0.405609,0.187783,0.321067,0.144678
2014-04-30,,0.296792,0.406742,0.306613,0.017272,0.152206,0.060688,0.176363,0.481728,0.083375,...,0.504265,0.394287,0.293585,0.448910,0.176964,0.255054,0.405609,0.187783,0.321067,0.144678
2014-05-31,,0.296792,0.406742,0.306613,0.017272,0.152206,0.060688,0.176363,0.481728,0.083375,...,0.504265,0.394287,0.293585,0.448910,0.176964,0.255054,0.405609,0.187783,0.321067,0.144678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-08-31,,0.262154,0.893524,0.197060,0.011939,0.208049,0.286613,0.160804,0.297721,0.132475,...,0.398438,0.552197,0.299500,0.239194,0.109989,0.232604,0.563855,0.205031,0.286528,0.036459
2022-09-30,,0.262154,0.893524,0.197060,0.011939,0.208049,0.286613,0.160804,0.297721,0.132475,...,0.398438,0.552197,0.299500,0.239194,0.109989,0.232604,0.563855,0.205031,0.286528,0.036459
2022-10-31,,0.262154,0.893524,0.197060,0.011939,0.208049,0.286613,0.160804,0.297721,0.132475,...,0.398438,0.552197,0.299500,0.239194,0.109989,0.232604,0.563855,0.205031,0.286528,0.036459
2022-11-30,,0.262154,0.893524,0.197060,0.011939,0.208049,0.286613,0.160804,0.297721,0.132475,...,0.398438,0.552197,0.299500,0.239194,0.109989,0.232604,0.563855,0.205031,0.286528,0.036459
