In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
pd.set_option('display.max_columns', 100)

In [27]:
house = pd.read_csv('house_returns.csv')
house.rename(columns = {col:col.lower() for col in house.columns}, inplace = True)
house['house'] = [1] * len(house)

senate = pd.read_csv('senate_returns.csv')
senate.rename(columns = {col:col.lower() for col in senate.columns}, inplace = True)
senate['house'] = [0] * len(senate)

features = ['type', 
            'party',
            'age',
            'amount_discrete',
            'market_cap_discrete',
            'important_committee',
            'house']
returns = ['return_1d',
           'return_index_1d', 
           'return_abnormal_1d',
           'return_5d',
           'return_index_5d',
           'return_abnormal_5d',
           'return_1m',
           'return_index_1m',
           'return_abnormal_1m', 
           'return_prev1d', 
           'return_index_prev1d',
           'return_abnormal_prev1d', 
           'return_prev5d',
           'return_index_prev5d',
           'return_abnormal_prev5d', 
           'return_prev1m', 
           'return_index_prev1m',
           'return_abnormal_prev1m']

In [48]:
data = house[features + returns].append(senate[features + returns])
# print(len(data))
# print(set(data['party']))
# print(sum(data['party'] == 'Democratic'))
# print(sum(data['party'] == 'Republican'))
data = data[(data['party'] == 'Democratic') | (data['party'] == 'Republican')]
# print(len(data))

6288
{'Republican', 'Independent', 'Democratic', 'Libertarian'}
3845
2436
6281


In [50]:
formula = 'return_abnormal_1m ~ return_abnormal_prev1m + type + party + age + market_cap_discrete + important_committee + house'
model = smf.ols(formula, data = data).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:     return_abnormal_1m   R-squared:                       0.016
Model:                            OLS   Adj. R-squared:                  0.015
Method:                 Least Squares   F-statistic:                     14.73
Date:                Tue, 10 Aug 2021   Prob (F-statistic):           3.55e-19
Time:                        14:59:48   Log-Likelihood:                 1251.6
No. Observations:                6281   AIC:                            -2487.
Df Residuals:                    6273   BIC:                            -2433.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
Intercept                  0

In [52]:
formula = 'return_abnormal_1d ~ return_abnormal_prev1d + type + party + important_committee + house'
model = smf.ols(formula, data = data).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:     return_abnormal_1d   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                     3.672
Date:                Tue, 10 Aug 2021   Prob (F-statistic):            0.00255
Time:                        15:02:35   Log-Likelihood:                 11282.
No. Observations:                6281   AIC:                        -2.255e+04
Df Residuals:                    6275   BIC:                        -2.251e+04
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
Intercept                  0