In [77]:
import pandas as pd
import numpy as np
from finance_byu import fama_macbeth
import warnings
warnings.filterwarnings("ignore")

In [78]:
ab_pos = pd.read_csv('Final Variables/abnormal_positive_ratio.csv', index_col=0)
ab_neg = pd.read_csv('Final Variables/abnormal_negative_ratio.csv', index_col=0)
pos = pd.read_csv('Final Variables/positive_ratio.csv', index_col=0)
neg = pd.read_csv('Final Variables/negative_ratio.csv', index_col=0)

variables = ['NR', 'ABNR', 'PR', 'ABPR']
ab_pos_lag = ab_pos[ab_pos.index <= '2022-11']
ab_pos_lead = ab_pos.iloc[1:(len(ab_pos_lag)+1)]

ab_neg_lag = ab_neg[ab_neg.index <= '2022-11']
ab_neg_lead = ab_neg.iloc[1:(len(ab_neg_lag)+1)]

pos_lag = pos[pos.index <= '2022-11']
pos_lead = pos.iloc[1:(len(pos_lag)+1)]

neg_lag = neg[neg.index <= '2022-11']
neg_lead = neg.iloc[1:(len(neg_lag)+1)]

ab_pos_means = ab_pos_lag.mean()
ab_neg_means = ab_neg_lag.mean()
pos_means = pos_lag.mean()
neg_means = neg_lag.mean()

def summary_stats(df):
    
    column_stats = pd.DataFrame([df.mean(), df.std(), df.skew(), df.kurt(), df.min(), df.quantile(0.01), df.quantile(0.05), df.quantile(0.1), df.quantile(0.25), df.quantile(0.5), df.quantile(0.75), df.quantile(0.9), df.quantile(0.95), df.quantile(0.99), df.max()])
    column_stats.index = ['Mean', 'SD', 'Skew', 'Kurt', 'Min', '1%', '5%', '10%', '25%', 'Median', '75%', '90%', '95%', '99%', 'Max']
    
    return column_stats

summary_stats = pd.concat([summary_stats(neg_means), summary_stats(ab_neg_means), summary_stats(pos_means), summary_stats(ab_pos_means)], keys = variables, axis=1).transpose()
summary_stats.index = summary_stats.index.droplevel(1)
summary_stats.to_csv('summary statistic/table1.csv')

ab_pos_means_df = pd.DataFrame(ab_pos_means, columns=['ABPR'])
ab_neg_means_df = pd.DataFrame(ab_neg_means, columns=['ABNR'])
pos_means_df = pd.DataFrame(pos_means, columns=['PR'])
neg_means_df = pd.DataFrame(neg_means, columns=['NR'])

lead_ab_pos_means_df = pd.DataFrame(ab_pos_lead.mean(), columns=['Lead_ABPR'])
lead_ab_neg_means_df = pd.DataFrame(ab_neg_lead.mean(), columns=['Lead_ABNR'])
lead_pos_means_df = pd.DataFrame(pos_lead.mean(), columns=['Lead_R'])
lead_neg_means_df = pd.DataFrame(neg_lead.mean(), columns=['Lead_NR'])

data_df = pd.concat([neg_means_df, ab_neg_means_df, pos_means_df, ab_pos_means_df, lead_neg_means_df, lead_ab_neg_means_df, lead_pos_means_df, lead_ab_pos_means_df], axis=1)
correlation_matrix = data_df.corr()
correlation_matrix.to_csv('summary statistic/table2.csv')

In [86]:
def fama(data, shift_factor):

    future = data.shift(shift_factor)
    
    comb = []

    for ticker in data:
        data[ticker].index = pd.to_datetime(data[ticker].index)
        comb.append(pd.concat([data[ticker], future[ticker].rename('future')], axis = 1)[:shift_factor])

    for i in range(len(comb)):
        comb[i] = comb[i].reset_index()
        comb[i]['index'] = pd.to_datetime(comb[i]['index'])

    results = []
    tickers = data.columns

    for i in range(len(comb)):
        results.append(fama_macbeth.fama_macbeth_master(comb[i], t = 'index', yvar = 'future', xvar = [tickers[i]], intercept=True))

    alpha = []
    beta = []
    for i in range(len(results)):
        alpha.append(results[i]['intercept'])
        beta.append(results[i][tickers[i]])

    return alpha, beta, results

In [87]:
neg1_alpha, neg1_beta, neg1_results = fama(ab_neg, -1)
neg2_alpha, neg2_beta, neg2_results = fama(ab_neg, -2)
neg3_alpha, neg3_beta, neg3_results = fama(ab_neg, -3)
pos1_alpha, pos1_beta, pos1_results = fama(ab_pos, -1)
pos2_alpha, pos2_beta, pos2_results = fama(ab_pos, -2)
pos3_alpha, pos3_beta, pos3_results = fama(ab_pos, -3)

InvalidIndexError: []

In [None]:
d_beta = {'neg1' : neg1_beta, 'neg2' : neg2_beta, 'neg3' : neg3_beta, 
          'pos1' : pos1_beta, 'pos2' : pos2_beta, 'pos3' : pos3_beta}
d_alpha = {'neg1' : neg1_alpha, 'neg2' : neg2_alpha, 'neg3' : neg3_alpha, 
          'pos1' : pos1_alpha, 'pos2' : pos2_alpha, 'pos3' : pos3_alpha}

betas = np.mean(pd.DataFrame(data = d_beta))
alphas = np.mean(pd.DataFrame(data = d_alpha))

In [81]:
results_no_control = pd.concat([alphas.rename('Intercept'), betas.rename('Beta')], axis = 1)

In [16]:
results_no_control

Unnamed: 0,Intercept,Beta
neg1,0.046513,0.147757
neg2,0.048533,0.109448
neg3,0.046977,0.1399
pos1,0.061355,0.148461
pos2,0.062203,0.137457
pos3,0.060225,0.161317


In [82]:
#ret_co_m = pd.read_csv('Final Variables/ret_co_m.csv', index_col=0)
#ret_oc_m = pd.read_csv('Final Variables/ret_oc_m.csv', index_col=0)
size = pd.read_csv('Final Variables/monthly_size.csv', index_col=0)
bm = 1/(pd.read_csv('Final Variables/monthly_pb.csv', index_col=0))
ret_6m = pd.read_csv('Final Variables/monthly_cum_sum.csv', index_col=0)
gpa = pd.read_csv('Final Variables/annual_gross_profit.csv', index_col=0)
#asset_growth = pd.read_csv('Final Variables/monthly_asset_growth.csv', index_col=0)
#turnover = pd.read_csv('Final Variables/monthly_turnover.csv', index_col=0)
#illiq = pd.read_csv('Final Variables/illiquidity.csv', index_col=0)

In [85]:
control = [size, bm, ret_6m, gpa]

neg1_alpha, neg1_beta, neg1_results = fama(ab_neg, -1, control)

                REGN  future          size        bm    ret_6m       gpa
2014-01-01  0.024816     NaN           NaN       NaN  0.044492  0.072103
2014-02-01  0.039341     NaN           NaN       NaN  0.036307  0.062831
2014-03-01  0.043265     NaN           NaN       NaN  0.007856  0.147125
2014-04-01  0.015897     NaN           NaN       NaN  0.010918  0.103408
2014-05-01  0.041762     NaN           NaN       NaN  0.000464  0.108880
...              ...     ...           ...       ...       ...       ...
2022-07-31       NaN     NaN  7.907530e+10  0.286613       NaN       NaN
2022-08-31       NaN     NaN  7.907530e+10  0.286613       NaN       NaN
2022-09-30       NaN     NaN  7.907530e+10  0.286613       NaN       NaN
2022-10-31       NaN     NaN  7.907530e+10  0.286613       NaN       NaN
2022-11-30       NaN     NaN  7.907530e+10  0.286613       NaN       NaN

[323 rows x 6 columns]


ValueError: batch_size must be 'auto' or a positive integer, got: 0

In [69]:
bm.isnull().sum()

VRSN    108
CMI       0
WDC       0
CSX       0
KMB       0
       ... 
AME       0
VTR       0
MKC       0
STZ       0
BKNG      0
Length: 351, dtype: int64

In [73]:
ab_neg.index = pd.to_datetime(ab_neg.index)

In [74]:
ab_neg

Unnamed: 0,REGN,BKNG,LKQ,LUV,EQIX,LH,MPWR,RVTY,AME,BBY,...,HOLX,GPN,BWA,NWS,HST,PODD,J,PTC,CRL,GOOGL
2014-01-01,0.024816,0.047957,0.040018,0.000000,0.088861,0.040363,0.069235,0.024718,0.034110,0.056561,...,0.074412,0.024970,0.022316,0.052350,0.058410,0.054401,0.022229,0.046007,0.053429,0.008346
2014-02-01,0.039341,0.044251,0.087099,0.077380,0.101447,0.126351,0.091827,0.025474,0.076786,0.052095,...,0.053906,0.128348,0.089561,0.049982,0.093614,0.122101,0.023645,0.063563,0.089965,0.044970
2014-03-01,0.043265,0.039885,0.021287,0.037474,0.078333,0.041594,0.013639,0.033296,0.043783,0.038350,...,0.011588,0.024180,0.058636,0.061251,0.059271,0.078943,0.074413,0.020585,0.064619,0.007731
2014-04-01,0.015897,0.028909,0.009570,0.000000,0.037079,0.041130,0.068410,0.043089,0.057649,0.028298,...,0.047934,0.047055,0.075001,0.065975,0.061289,0.039008,0.043381,0.032037,0.012211,0.007740
2014-05-01,0.041762,0.045691,0.072460,0.053274,0.115180,0.093865,0.091145,0.041062,0.078605,0.037147,...,0.096418,0.055426,0.047252,0.013384,0.087488,0.048540,0.049931,0.039246,0.083371,0.028932
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-08-01,0.064297,0.098425,0.064640,0.063994,0.036963,0.044644,0.052911,0.065446,0.050458,0.041030,...,0.030313,0.062198,0.061172,0.037252,0.037409,0.044799,0.066536,0.042420,0.069890,0.076899
2022-09-01,0.072688,0.066947,0.053403,0.046726,0.059591,0.075612,0.057950,0.072813,0.033725,0.070808,...,0.055334,0.099899,0.093865,0.075369,0.099570,0.074732,0.078164,0.058075,0.052787,0.053584
2022-10-01,0.092739,0.070348,0.081378,0.085739,0.062992,0.063954,0.057950,0.036406,0.044967,0.044938,...,0.045779,0.062437,0.099912,0.043824,0.067342,0.076999,0.032365,0.073091,0.051842,0.043623
2022-11-01,0.095384,0.034040,0.059072,0.048994,0.069795,0.077879,0.034203,0.036406,0.033158,0.057117,...,0.069802,0.066216,0.051468,0.083305,0.050850,0.012455,0.044665,0.061854,0.053731,0.045891
