In [6]:
import pandas as pd
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression

In [11]:
merged_data = pd.read_csv('merged_county_district_data.csv')

#drop remaining NaN's to run regression
merged_data.dropna(inplace = True)

# convert bools in each incumbency categorization to 1's and 0's
merged_data['one_incumb'] = merged_data['one_incumb'].astype(int)
merged_data['zero_incumbs'] = merged_data['zero_incumbs'].astype(int)
merged_data['multiple_incumbs'] = merged_data['multiple_incumbs'].astype(int)

ind_var = merged_data['treatment']



In [12]:
# lin reg 1: dependent = one_incumb
dep_var_reg1 = merged_data['one_incumb']
regression1 = sm.OLS(dep_var_reg1, ind_var).fit()
print(regression1.summary())

                                 OLS Regression Results                                
Dep. Variable:             one_incumb   R-squared (uncentered):                   0.454
Model:                            OLS   Adj. R-squared (uncentered):              0.454
Method:                 Least Squares   F-statistic:                          5.041e+04
Date:                Mon, 20 Jan 2025   Prob (F-statistic):                        0.00
Time:                        15:45:33   Log-Likelihood:                         -46655.
No. Observations:               60729   AIC:                                  9.331e+04
Df Residuals:                   60728   BIC:                                  9.332e+04
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [13]:
# lin reg 2: dependent = zero_incumbs
dep_var_reg2 = merged_data['zero_incumbs']
regression2 = sm.OLS(dep_var_reg2, ind_var).fit()
print(regression2.summary())

                                 OLS Regression Results                                
Dep. Variable:           zero_incumbs   R-squared (uncentered):                   0.437
Model:                            OLS   Adj. R-squared (uncentered):              0.437
Method:                 Least Squares   F-statistic:                          4.708e+04
Date:                Mon, 20 Jan 2025   Prob (F-statistic):                        0.00
Time:                        15:45:37   Log-Likelihood:                         -47054.
No. Observations:               60729   AIC:                                  9.411e+04
Df Residuals:                   60728   BIC:                                  9.412e+04
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [14]:
# lin reg 3: dependent = multiple_incumbs
dep_var_reg3 = merged_data['multiple_incumbs']
regression3 = sm.OLS(dep_var_reg3, ind_var).fit()
print(regression3.summary())

                                 OLS Regression Results                                
Dep. Variable:       multiple_incumbs   R-squared (uncentered):                   0.010
Model:                            OLS   Adj. R-squared (uncentered):              0.010
Method:                 Least Squares   F-statistic:                              631.7
Date:                Mon, 20 Jan 2025   Prob (F-statistic):                   1.12e-138
Time:                        15:45:39   Log-Likelihood:                          47487.
No. Observations:               60729   AIC:                                 -9.497e+04
Df Residuals:                   60728   BIC:                                 -9.496e+04
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [54]:
# adding year fixed effects to ind_var_fixed


# create dummies for year fixed effects
year_dummies = pd.get_dummies(merged_data['year'], drop_first = True)

# ensures year_dummies are in fact integers
year_dummies = year_dummies.astype(int)

# combines treatment and year into one data frame 
ind_var_yr_fixed = pd.concat([merged_data[['treatment']], year_dummies], axis=1)


In [57]:
# adding year fixed effects for lin reg 1: one_incumb and treatment

#dependent variable
dep_var_yr_fixed1 = merged_data['one_incumb']

regression_yr_fixed1 = sm.OLS(dep_var_yr_fixed1, ind_var_yr_fixed).fit()
print(regression_yr_fixed1.summary())

                                 OLS Regression Results                                
Dep. Variable:             one_incumb   R-squared (uncentered):                   0.477
Model:                            OLS   Adj. R-squared (uncentered):              0.477
Method:                 Least Squares   F-statistic:                              5027.
Date:                Mon, 20 Jan 2025   Prob (F-statistic):                        0.00
Time:                        16:36:20   Log-Likelihood:                         -45347.
No. Observations:               60729   AIC:                                  9.072e+04
Df Residuals:                   60718   BIC:                                  9.081e+04
Df Model:                          11                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [58]:
# adding year fixed effects to lin reg 2: zero_incumbs and treatment

#dependent variable
dep_var_yr_fixed2 = merged_data['zero_incumbs']

regression_yr_fixed2 = sm.OLS(dep_var_yr_fixed2, ind_var_yr_fixed).fit()
print(regression_yr_fixed2.summary())

                                 OLS Regression Results                                
Dep. Variable:           zero_incumbs   R-squared (uncentered):                   0.470
Model:                            OLS   Adj. R-squared (uncentered):              0.470
Method:                 Least Squares   F-statistic:                              4904.
Date:                Mon, 20 Jan 2025   Prob (F-statistic):                        0.00
Time:                        16:36:20   Log-Likelihood:                         -45177.
No. Observations:               60729   AIC:                                  9.038e+04
Df Residuals:                   60718   BIC:                                  9.047e+04
Df Model:                          11                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [59]:
# adding year fixed effects to lin reg 3: multiple_incumbs and treatment

#dependent variable
dep_var_yr_fixed3 = merged_data['multiple_incumbs']

regression_yr_fixed3 = sm.OLS(dep_var_yr_fixed3, ind_var_yr_fixed).fit()
print(regression_yr_fixed3.summary())

                                 OLS Regression Results                                
Dep. Variable:       multiple_incumbs   R-squared (uncentered):                   0.018
Model:                            OLS   Adj. R-squared (uncentered):              0.018
Method:                 Least Squares   F-statistic:                              99.56
Date:                Mon, 20 Jan 2025   Prob (F-statistic):                   7.47e-226
Time:                        16:36:21   Log-Likelihood:                          47716.
No. Observations:               60729   AIC:                                 -9.541e+04
Df Residuals:                   60718   BIC:                                 -9.531e+04
Df Model:                          11                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [63]:
# adding state fixed effects to ind_var_fixed


# create dummies for year fixed effects
state_dummies = pd.get_dummies(merged_data['state'], drop_first = True)

# ensures year_dummies are in fact integers
state_dummies = state_dummies.astype(int)

# combines treatment and year into one data frame 
ind_var_state_fixed = pd.concat([merged_data[['treatment']], state_dummies], axis=1)

In [64]:
# adding state fixed effects for lin reg 1: one_incumb and treatment

#dependent variable
dep_var_state_fixed1 = merged_data['one_incumb']

regression_state_fixed1 = sm.OLS(dep_var_state_fixed1, ind_var_state_fixed).fit()
print(regression_state_fixed1.summary())

                                 OLS Regression Results                                
Dep. Variable:             one_incumb   R-squared (uncentered):                   0.492
Model:                            OLS   Adj. R-squared (uncentered):              0.492
Method:                 Least Squares   F-statistic:                              1253.
Date:                Mon, 20 Jan 2025   Prob (F-statistic):                        0.00
Time:                        16:37:33   Log-Likelihood:                         -44414.
No. Observations:               60729   AIC:                                  8.892e+04
Df Residuals:                   60682   BIC:                                  8.935e+04
Df Model:                          47                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [69]:
# adding year fixed effects to lin reg 2: zero_incumbs and treatment

#dependent variable
dep_var_state_fixed2 = merged_data['zero_incumbs']

regression_state_fixed2 = sm.OLS(dep_var_state_fixed2, ind_var_state_fixed).fit()
print(regression_fixed2.summary())

                                 OLS Regression Results                                
Dep. Variable:           zero_incumbs   R-squared (uncentered):                   0.485
Model:                            OLS   Adj. R-squared (uncentered):              0.485
Method:                 Least Squares   F-statistic:                              1218.
Date:                Mon, 20 Jan 2025   Prob (F-statistic):                        0.00
Time:                        16:42:07   Log-Likelihood:                         -44304.
No. Observations:               60729   AIC:                                  8.870e+04
Df Residuals:                   60682   BIC:                                  8.913e+04
Df Model:                          47                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [68]:
# adding state fixed effects for lin reg 3: multiple_incumbs and treatment

#dependent variable
dep_var_state_fixed3 = merged_data['multiple_incumbs']

regression_state_fixed3 = sm.OLS(dep_var_state_fixed3, ind_var_state_fixed).fit()
print(regression_state_fixed3.summary())

                                 OLS Regression Results                                
Dep. Variable:       multiple_incumbs   R-squared (uncentered):                   0.021
Model:                            OLS   Adj. R-squared (uncentered):              0.021
Method:                 Least Squares   F-statistic:                              28.19
Date:                Mon, 20 Jan 2025   Prob (F-statistic):                   8.10e-244
Time:                        16:42:00   Log-Likelihood:                          47829.
No. Observations:               60729   AIC:                                 -9.556e+04
Df Residuals:                   60682   BIC:                                 -9.514e+04
Df Model:                          47                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------