In [1]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

In [164]:
dem_df = pd.read_csv('../data/dem_candidates_cleaned.csv', encoding="ISO-8859-1").dropna(subset=['primary_pctg'])
rep_df = pd.read_csv('../data/rep_candidates_cleaned.csv', encoding="ISO-8859-1").dropna(subset=['primary_pctg'])
# dem_df['has_twitter'] = dem_df['twitter_handle'].notna()
# rep_df['has_twitter'] = rep_df['twitter_handle'].isna()
rep_df.columns

Index(['candidate', 'twitter_handle', 'twitter_handle2', 'state', 'district',
       'office_type', 'race_type', 'race_primary_election_date',
       'primary_status', 'primary_runoff_status', 'general_status',
       'primary_pctg', 'won_primary', 'rep_party_support', 'trump_endorsed',
       'bannon_endorsed', 'great_america_endorsed', 'nra_endorsed',
       'right_to_life_endorsed', 'susan_b_anthony_endorsed',
       'club_for_growth_endorsed', 'koch_support', 'house_freedom_support',
       'tea_party_endorsed', 'main_street_endorsed', 'chamber_endorsed',
       'no_labels_support', 'has_twitter', 'num_endorsements'],
      dtype='object')

In [165]:
rep_senators = rep_df[rep_df['office_type'] == 'Senator']
rep_house = rep_df[rep_df['office_type'] == 'Representative']
rep_govs = rep_df[rep_df['office_type'] == 'Governor']
dem_senators = dem_df[dem_df['office_type'] == 'Senator']
dem_house = dem_df[dem_df['office_type'] == 'Representative']
dem_govs = dem_df[dem_df['office_type'] == 'Governor']

In [166]:
print(len(rep_df[rep_df['has_twitter'] == False]) / len(rep_df))
print(len(rep_df[rep_df['has_twitter'] == True]) / len(rep_df))

0.7553956834532374
0.2446043165467626


## Encode data

In [167]:
drop_cols = ['primary_status', 'general_status', 'primary_runoff_status', 'twitter_handle', 'twitter_handle2']

# encode data
encode_dem_cols = ['won_primary', 'candidate','has_twitter', 'state',  'district'
                   , 'office_type', 'race_type', 'race', 'veteran', 'race_primary_election_date',
    'lgbtq', 'elected_official', 'self_funder', 'stem', 'obama_alum', 'dem_party_support', 'emily_endorsed',
    'gun_sense_candidate', 'biden_endorsed', 'warren_endorsed', 'sanders_endorsed', 'our_revolution_endorsed',
    'justice_dems_endorsed', 'pccc_endorsed', 'indivisible_endorsed', 'wfp_endorsed', 'votevets_endorsed', 'no_labels_support']

encode_rep_cols = ['won_primary','candidate', 'has_twitter', 'state',  'district'
                   , 'office_type', 'race_type', 'race_primary_election_date',
    'rep_party_support', 'trump_endorsed', 'bannon_endorsed', 'great_america_endorsed', 'nra_endorsed',
    'right_to_life_endorsed', 'susan_b_anthony_endorsed', 'club_for_growth_endorsed', 'koch_support', 'house_freedom_support',
    'tea_party_endorsed', 'main_street_endorsed','chamber_endorsed', 'no_labels_support']

model_dem_cols = encode_dem_cols.copy()
model_dem_cols.append('num_endorsements')
model_rep_cols = encode_dem_cols.copy()
model_rep_cols.append('num_endorsements')

label_encoder = LabelEncoder()
state_map = {}
def label_encode_data(df, encode_cols):
    for col in encode_cols:
        print(col)
        df[col] = label_encoder.fit_transform(df[col].values.astype(str))
        le_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
        if col == 'state':
            state_map = le_mapping
            print(state_map)
        if col not in ['candidate', 'state', 'district', 'race_primary_election_date']:
            print(le_mapping)

    return df

dem_df_enc = label_encode_data(dem_df, encode_dem_cols)
rep_df_enc = label_encode_data(rep_df, encode_rep_cols)

# dem_df_enc.to_csv('../data/encoded_dem_data.csv', index=False)
# rep_df_enc.to_csv('../data/encoded_rep_data.csv', index=False)


won_primary
{'No': 0, 'Yes': 1, 'nan': 2}
candidate
has_twitter
{'False': 0, 'True': 1}
state
{'AL': 0, 'AR': 1, 'AZ': 2, 'CA': 3, 'CO': 4, 'GA': 5, 'IA': 6, 'ID': 7, 'IL': 8, 'IN': 9, 'KS': 10, 'KY': 11, 'MD': 12, 'ME': 13, 'MI': 14, 'MO': 15, 'MS': 16, 'MT': 17, 'NC': 18, 'ND': 19, 'NE': 20, 'NJ': 21, 'NM': 22, 'NV': 23, 'NY': 24, 'OH': 25, 'OK': 26, 'OR': 27, 'PA': 28, 'SC': 29, 'SD': 30, 'TN': 31, 'TX': 32, 'UT': 33, 'VA': 34, 'WV': 35}
district
office_type
{'Governor': 0, 'Representative': 1, 'Senator': 2}
race_type
{'Regular': 0, 'Special': 1}
race
{'Nonwhite': 0, 'White': 1, 'nan': 2}
veteran
{'No': 0, 'Yes': 1, 'nan': 2}
race_primary_election_date
lgbtq
{'No': 0, 'Yes': 1, 'nan': 2}
elected_official
{'No': 0, 'Yes': 1, 'nan': 2}
self_funder
{'No': 0, 'Yes': 1}
stem
{'No': 0, 'Yes': 1, 'nan': 2}
obama_alum
{'No': 0, 'Yes': 1, 'nan': 2}
dem_party_support
{'No': 0, 'Yes': 1, 'nan': 2}
emily_endorsed
{'No': 0, 'Yes': 1, 'nan': 2}
gun_sense_candidate
{'No': 0, 'Yes': 1, 'nan': 2}
bi

In [168]:
dem_df_enc['has_twitter']

0      0
1      1
2      1
3      1
4      1
5      1
6      1
7      1
8      1
9      1
10     1
11     1
12     1
13     1
14     1
15     1
16     1
17     1
18     1
19     1
20     1
21     1
22     1
23     0
24     1
25     1
26     1
27     1
28     1
29     1
      ..
737    1
738    1
739    1
740    1
741    1
742    1
743    1
744    1
745    1
746    1
747    1
748    1
749    1
750    1
751    1
752    1
753    1
754    1
755    1
756    1
757    1
758    1
759    1
760    0
761    1
762    1
763    1
764    1
765    1
766    0
Name: has_twitter, Length: 767, dtype: int64

In [169]:
dem_df_enc.columns

Index(['candidate', 'twitter_handle', 'twitter_handle2', 'state', 'district',
       'office_type', 'race_type', 'race_primary_election_date',
       'primary_status', 'primary_runoff_status', 'general_status',
       'partisan_lean', 'primary_pctg', 'won_primary', 'race', 'veteran',
       'lgbtq', 'elected_official', 'self_funder', 'stem', 'obama_alum',
       'dem_party_support', 'emily_endorsed', 'gun_sense_candidate',
       'biden_endorsed', 'warren_endorsed', 'sanders_endorsed',
       'our_revolution_endorsed', 'justice_dems_endorsed', 'pccc_endorsed',
       'indivisible_endorsed', 'wfp_endorsed', 'votevets_endorsed',
       'no_labels_support', 'has_twitter', 'num_endorsements'],
      dtype='object')

## Democratic - mixedlm

In [117]:
dem_x_cols = " + ".join(encode_dem_cols[2:])
dem_x_cols += ' + num_endorsements'
data_cols = encode_dem_cols.copy() # has won_primary and candidate
data_cols.extend(['num_endorsements'])
dem_model_data = dem_df_enc[data_cols]
print(len(dem_model_data.columns))
print(len(dem_x_cols.split('+')))
print(len(dem_model_data))
print(len(dem_model_data['candidate']))


model = smf.mixedlm("won_primary ~ " + dem_dep_cols, dem_df, groups=dem_df["candidate"]).fit()
print(model.summary())

29
27
767
767


In [118]:
model = smf.mixedlm("won_primary ~ " + dem_x_cols, dem_model_data, groups=dem_model_data["candidate"]).fit()
results_summary = model.summary()
dem_mixedlm_won_primary = results_summary.tables[1]
print(results_summary)

                Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     won_primary
No. Observations:     767         Method:                 REML       
No. Groups:           757         Scale:                  0.1814     
Min. group size:      1           Likelihood:             -586.1168  
Max. group size:      2           Converged:              Yes        
Mean group size:      1.0                                            
---------------------------------------------------------------------
                           Coef.  Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------------------
Intercept                  -1.250    0.276 -4.525 0.000 -1.792 -0.709
has_twitter                 0.130    0.057  2.299 0.022  0.019  0.242
state                      -0.001    0.005 -0.149 0.881 -0.010  0.009
district                    0.000    0.001  0.394 0.694 -0.002  0.003
office_type                 0.059   

In [151]:
dem_x_cols2 = " + ".join(encode_dem_cols[2:])
dem_x_cols2 += ' + num_endorsements'
data_cols2 = encode_dem_cols[1:].copy() # remove won_primary, has candidate
data_cols2.extend(['num_endorsements', 'primary_pctg'])
dem_model_data2 = dem_df_enc[data_cols2]

print(len(dem_x_cols2.split('+')))
print(len(dem_model_data2.columns))
print(len(dem_model_data2))
print(len(dem_model_data2['candidate']))
print(len(dem_model_data2['primary_pctg']))

27
29
767
767
767


In [152]:
model = smf.mixedlm("primary_pctg ~ " + dem_x_cols2, dem_model_data2, groups=dem_model_data2["candidate"]).fit()
results_summary = model.summary()
dem_mixedlm_primary_pctg = results_summary.tables[1]
print(model.summary())

                 Mixed Linear Model Regression Results
Model:                  MixedLM     Dependent Variable:     primary_pctg
No. Observations:       767         Method:                 REML        
No. Groups:             757         Scale:                  239.9164    
Min. group size:        1           Likelihood:             -3273.0814  
Max. group size:        2           Converged:              Yes         
Mean group size:        1.0                                             
------------------------------------------------------------------------
                            Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
------------------------------------------------------------------------
Intercept                  -52.247   10.503 -4.974 0.000 -72.833 -31.661
has_twitter                 13.107    2.153  6.088 0.000   8.887  17.326
state                       -0.127    0.179 -0.709 0.478  -0.478   0.224
district                     0.014    0.041  0.330 0.741  -0.068   0.

In [155]:
dem_df_enc[dem_df_enc['office_type'] == 1][c]

Unnamed: 0,won_primary,candidate,has_twitter,state,district,race_type,race,veteran,race_primary_election_date,lgbtq,...,warren_endorsed,sanders_endorsed,our_revolution_endorsed,justice_dems_endorsed,pccc_endorsed,indivisible_endorsed,wfp_endorsed,votevets_endorsed,no_labels_support,num_endorsements
6,0,458,1,0,21,0,0,0,8,0,...,2,2,2,2,2,2,2,2,2,0
7,1,609,1,0,21,0,0,1,8,0,...,2,2,2,2,2,2,2,2,2,0
8,0,55,1,0,22,0,0,1,8,0,...,2,2,2,2,2,2,2,2,2,0
9,1,687,1,0,22,0,1,0,8,0,...,2,2,2,2,2,2,2,2,2,0
10,0,12,1,0,23,0,0,0,8,0,...,2,2,2,2,2,2,2,2,2,1
11,1,472,1,0,23,0,1,0,8,0,...,2,2,2,2,2,2,2,2,2,1
12,1,440,1,0,24,0,1,0,8,0,...,2,2,2,2,2,2,2,2,2,1
13,0,600,1,0,24,0,1,1,8,0,...,2,2,2,2,2,2,2,2,2,0
16,1,121,1,1,26,0,1,0,4,0,...,2,2,2,2,2,2,2,2,2,2
17,0,255,1,1,26,0,1,1,4,0,...,2,2,2,2,2,2,2,2,2,1


In [170]:
#  {'Governor': 0, 'Representative': 1, 'Senator': 2}
exclude_cols = set(['office_type', 'primary_pctg'])
c = [e for e in model_dem_cols if e not in exclude_cols]
print(c)
model_cols = " + ".join(c)
print(model_cols)
dem_senators_model_data = dem_df_enc[dem_df_enc['office_type'] == 1][c]
model = smf.mixedlm("won_primary ~ " + model_cols, dem_senators_model_data, groups=dem_senators_model_data["candidate"]).fit()
results_summary = model.summary()
dem_mixedlm_won_primary = results_summary.tables[1]
print(results_summary)

['won_primary', 'candidate', 'has_twitter', 'state', 'district', 'race_type', 'race', 'veteran', 'race_primary_election_date', 'lgbtq', 'elected_official', 'self_funder', 'stem', 'obama_alum', 'dem_party_support', 'emily_endorsed', 'gun_sense_candidate', 'biden_endorsed', 'warren_endorsed', 'sanders_endorsed', 'our_revolution_endorsed', 'justice_dems_endorsed', 'pccc_endorsed', 'indivisible_endorsed', 'wfp_endorsed', 'votevets_endorsed', 'no_labels_support', 'num_endorsements']
won_primary + candidate + has_twitter + state + district + race_type + race + veteran + race_primary_election_date + lgbtq + elected_official + self_funder + stem + obama_alum + dem_party_support + emily_endorsed + gun_sense_candidate + biden_endorsed + warren_endorsed + sanders_endorsed + our_revolution_endorsed + justice_dems_endorsed + pccc_endorsed + indivisible_endorsed + wfp_endorsed + votevets_endorsed + no_labels_support + num_endorsements


KeyboardInterrupt: 

## Republican - mixedlm

In [171]:
rep_x_cols = " + ".join(encode_rep_cols[2:])
rep_x_cols += "+ num_endorsements"
data_cols = encode_rep_cols.copy()
data_cols.append('num_endorsements')
rep_model_data = rep_df_enc[data_cols]

In [172]:
model = smf.mixedlm("won_primary ~ " + rep_x_cols, rep_model_data, groups=rep_model_data["candidate"]).fit()
results_summary = model.summary()
rep_mixedlm_won_primary = results_summary.tables[1]
print(model.summary())

                Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     won_primary
No. Observations:     695         Method:                 REML       
No. Groups:           670         Scale:                  0.0000     
Min. group size:      1           Likelihood:             -213.4017  
Max. group size:      8           Converged:              Yes        
Mean group size:      1.0                                            
---------------------------------------------------------------------
                           Coef.  Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------------------
Intercept                  -0.514    0.138 -3.723 0.000 -0.785 -0.244
has_twitter                -0.178    0.035 -5.121 0.000 -0.246 -0.110
state                      -0.000    0.000 -0.125 0.901 -0.000  0.000
district                    0.000    0.000  0.125 0.901 -0.000  0.000
office_type                -0.000   

In [18]:
rep_x_cols = " + ".join(encode_rep_cols[2:])
rep_x_cols += "+ num_endorsements"
data_cols = encode_rep_cols[1:].copy()
data_cols.extend(['num_endorsements', 'primary_pctg'])
rep_model_data = rep_df_enc[data_cols]

In [175]:
model = smf.mixedlm("primary_pctg ~ " + rep_x_cols, rep_model_data, groups=rep_model_data["candidate"]).fit()
results_summary = model.summary()
rep_mixedlm_primary_pctg = results_summary.tables[1]
print(model.summary())

                 Mixed Linear Model Regression Results
Model:                  MixedLM     Dependent Variable:     primary_pctg
No. Observations:       695         Method:                 REML        
No. Groups:             670         Scale:                  26.3395     
Min. group size:        1           Likelihood:             -2966.6826  
Max. group size:        8           Converged:              Yes         
Mean group size:        1.0                                             
------------------------------------------------------------------------
                            Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
------------------------------------------------------------------------
Intercept                  -34.500    8.431 -4.092 0.000 -51.024 -17.976
has_twitter                -11.385    1.750 -6.506 0.000 -14.815  -7.955
state                       -0.067    0.100 -0.677 0.498  -0.263   0.128
district                     0.068    0.054  1.262 0.207  -0.037   0.

# Understanding Democratic Factors

- Examine factors with low p-values
- Of those factors, which ones actually have substantial data available?

In [177]:
print(dem_mixedlm_won_primary.columns)
# dem_mixedlm_won_primary.drop(dem_mixedlm_won_primary.tail(1).index,inplace=True) # drops last, useless column
dem_mixedlm_won_primary['P>|z|'] = dem_mixedlm_won_primary['P>|z|'].astype('float')
low_p = dem_mixedlm_won_primary[dem_mixedlm_won_primary['P>|z|'] < 0.05]
low_p.sort_values('Coef.', ascending=False)

Index(['Coef.', 'Std.Err.', 'z', 'P>|z|', '[0.025', '0.975]'], dtype='object')


Unnamed: 0,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
has_twitter,0.13,0.057,2.299,0.022,0.019,0.242
num_endorsements,0.125,0.018,6.872,0.0,0.089,0.161
biden_endorsed,0.11,0.052,2.093,0.036,0.007,0.212
race,0.09,0.029,3.086,0.002,0.033,0.147
votevets_endorsed,0.082,0.032,2.527,0.012,0.018,0.145
gun_sense_candidate,0.065,0.025,2.634,0.008,0.017,0.113
emily_endorsed,0.064,0.027,2.333,0.02,0.01,0.117
indivisible_endorsed,0.059,0.03,1.982,0.047,0.001,0.117
race_primary_election_date,0.024,0.008,3.068,0.002,0.009,0.039
Intercept,-1.25,0.276,-4.525,0.0,-1.792,-0.709


In [178]:
binary_col = ['has_twitter', 'veteran', 'lgbtq','elected_official', 'self_funder', 'stem',
       'obama_alum', 'emily_endorsed', 'race',
       'gun_sense_candidate', 'biden_endorsed', 'warren_endorsed',
       'sanders_endorsed', 'our_revolution_endorsed', 'justice_dems_endorsed',
       'pccc_endorsed', 'indivisible_endorsed', 'wfp_endorsed',
       'votevets_endorsed', 'no_labels_support']

results = []

for col in binary_col:
    num_yes = len(dem_df_enc[dem_df_enc[col] == 1])
    num_no = len(dem_df_enc[dem_df_enc[col] == 0])
    num_total = len(dem_df_enc)
    results.append([col
                    , round(num_yes / num_total, 3)
                    , round(num_no / num_total, 3)
                    , round((num_total - (num_yes + num_no))/num_total, 3)])

empty_ratio = pd.DataFrame(results, columns=['col_name', 'num_yes', 'num_no', 'num_none'])
empty_ratio.sort_values('num_none')

Unnamed: 0,col_name,num_yes,num_no,num_none
0,has_twitter,0.881,0.119,0.0
4,self_funder,0.053,0.947,0.0
6,obama_alum,0.043,0.956,0.001
1,veteran,0.154,0.832,0.014
2,lgbtq,0.048,0.937,0.014
3,elected_official,0.166,0.82,0.014
5,stem,0.175,0.811,0.014
8,race,0.553,0.25,0.197
9,gun_sense_candidate,0.237,0.228,0.535
13,our_revolution_endorsed,0.106,0.302,0.592


In [95]:
# low_p_cols = low_p.index.tolist() # ommitted "race_primary" col - all of it is filled out

# yes_ratio = []
# no_ratio = []
# none_ratio = []
# for c in low_p_cols:
#     print(c)
#     print(empty_ratio[empty_ratio['col_name'] == c].num_yes.values[0])
#     yes_ratio.append(empty_ratio[empty_ratio['col_name'] == c].num_yes.values[0])
#     no_ratio.append(empty_ratio[empty_ratio['col_name'] == c].num_no.values[0])
#     none_ratio.append(empty_ratio[empty_ratio['col_name'] == c].num_none.values[0])

In [96]:
# low_p['num_none'] = none_ratio
# low_p['num_yes'] = yes_ratio
# low_p['num_no'] = no_ratio
# low_p.sort_values('num_none')

In [97]:
# total_dems = len(dem_df)
# # print(total_dems)
# print('previously elected official & won: ', round(len(dem_df[(dem_df['elected_official'] == 1) & (dem_df['won_primary'] == 1)])  / total_dems, 2))
# print('previously elected official & lost: ', round(len(dem_df[(dem_df['elected_official'] == 1) & (dem_df['won_primary'] == 0)]) / total_dems, 2))
# print('not previously elected official & lost: ', round(len(dem_df[(dem_df['elected_official'] == 0) & (dem_df['won_primary'] == 0)]) / total_dems, 2))
# print('not prexviously elected official & won: ', round(len(dem_df[(dem_df['elected_official'] == 0) & (dem_df['won_primary'] == 1)]) / total_dems, 2))
# print('-----')
# print('stem & won: ', round(len(dem_df[(dem_df['stem'] == 1) & (dem_df['won_primary'] == 1)])  / total_dems, 2))
# print('stem & lost: ', round(len(dem_df[(dem_df['stem'] == 1) & (dem_df['won_primary'] == 0)]) / total_dems, 2))
# print('not stem & lost: ', round(len(dem_df[(dem_df['stem'] == 0) & (dem_df['won_primary'] == 0)]) / total_dems, 2))
# print('not stem & won: ', round(len(dem_df[(dem_df['stem'] == 0) & (dem_df['won_primary'] == 1)]) / total_dems, 2))
# print('-----')
# print('white & won: ', round(len(dem_df[(dem_df['race'] == 1) & (dem_df['won_primary'] == 1) ])  / total_dems, 2))
# print('white & lost: ', round(len(dem_df[(dem_df['race'] == 1) & (dem_df['won_primary'] == 0)]) / total_dems, 2))
# print('non-white & lost: ', round(len(dem_df[(dem_df['race'] == 0) & (dem_df['won_primary'] == 0)]) / total_dems, 2))
# print('non-white & won: ', round(len(dem_df[(dem_df['race'] == 0) & (dem_df['won_primary'] == 1)]) / total_dems, 2))

# print('-----')
# print('white & won: ', round(len(dem_df[(dem_df['race'] == 1) 
#                                         & (dem_df['won_primary'] == 1) 
#                                         & (dem_df['emily_endorsed'] == 1) ])  / total_dems, 2))
# print('white & lost: ', round(len(dem_df[(dem_df['race'] == 1) & (dem_df['won_primary'] == 0)
#                                         & (dem_df['emily_endorsed'] == 1)]) / total_dems, 2))
# print('non-white & lost: ', round(len(dem_df[(dem_df['race'] == 0) & (dem_df['won_primary'] == 0)
#                                             & (dem_df['emily_endorsed'] == 1)]) / total_dems, 2))
# print('non-white & won: ', round(len(dem_df[(dem_df['race'] == 0) & (dem_df['won_primary'] == 1)
#                                            & (dem_df['emily_endorsed'] == 1)]) / total_dems, 2))







In [98]:
# # emily endorsed?
# print("total emily endorsed: ", len(dem_df[(dem_df['emily_endorsed'] == 1)]))
# print("emily endorsed & won: ", len(dem_df[(dem_df['emily_endorsed'] == 1) & (dem_df['won_primary'] == 1)]))
# print('---')
# print("emily endorsed & white: ", len(dem_df[(dem_df['emily_endorsed'] == 1) & (dem_df['race'] == 0)]))
# print("emily endorsed & non-white: ", len(dem_df[(dem_df['emily_endorsed'] == 1) & (dem_df['race'] == 1)]))
# print('\n==========')
# print("total NOT emily endorsed: ", len(dem_df[(dem_df['emily_endorsed'] == 0)]))
# print("NOT emily endorsed & won: ", len(dem_df[(dem_df['emily_endorsed'] == 0) & (dem_df['won_primary'] == 1)]))

# print('\nstates where emily endorsed:')
# states = []
# print(state_map)
# for s in dem_df[(dem_df['emily_endorsed'] == 1)].state.unique():
#     states.append(state_map[s])
# print(states)
# # print(dem_df[(dem_df['emily_endorsed'] == 1)].state.unique())

### conclusions
- existence of twitter_handle is strongest signal
    - is this because it's the most filled out?
- elected official
    - non-previously elected officials (696) performed better than previous elected (150), though group size different
    - not a huge coef. makes sense
- stem
    - is stem bad..?
    - stem is bad :(
- emily endorsd
    - high win percentage if you're emily endorsed, high loss % if you were not
    - but only 30% of candidates were endorsed/not-endorsed

# Republican Factors

In [180]:
print(rep_mixedlm_won_primary.columns)
# rep_mixedlm_won_primary.drop(rep_mixedlm_won_primary.tail(1).index,inplace=True) # drops last, useless column
rep_mixedlm_won_primary['P>|z|'] = rep_mixedlm_won_primary['P>|z|'].astype('float')
low_p = rep_mixedlm_won_primary[rep_mixedlm_won_primary['P>|z|'] < 0.07]
low_p.sort_values('Coef.', ascending=False)

Index(['Coef.', 'Std.Err.', 'z', 'P>|z|', '[0.025', '0.975]'], dtype='object')


Unnamed: 0,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
num_endorsements,0.159,0.016,10.126,0.0,0.129,0.19
great_america_endorsed,0.08,0.008,10.02,0.0,0.064,0.096
rep_party_support,0.07,0.024,2.989,0.003,0.024,0.117
tea_party_endorsed,0.066,0.025,2.636,0.008,0.017,0.116
house_freedom_support,0.059,0.03,1.972,0.049,0.0,0.118
nra_endorsed,0.058,0.028,2.12,0.034,0.004,0.112
main_street_endorsed,0.052,0.026,2.011,0.044,0.001,0.103
Intercept,-0.514,0.138,-3.723,0.0,-0.785,-0.244
has_twitter,-0.178,0.035,-5.121,0.0,-0.246,-0.11


In [125]:
binary_col = [ 'has_twitter',
       'rep_party_support', 'trump_endorsed', 'bannon_endorsed',
       'great_america_endorsed', 'nra_endorsed', 'right_to_life_endorsed',
       'susan_b_anthony_endorsed', 'club_for_growth_endorsed', 'koch_support',
       'house_freedom_support', 'tea_party_endorsed', 'main_street_endorsed',
       'chamber_endorsed', 'no_labels_support']

results = []

for col in binary_col:
    num_yes = len(rep_df[rep_df[col] == 1])
    num_no = len(rep_df[rep_df[col] == 0])
    num_total = len(rep_df)
    results.append([col
                    , round(num_yes / num_total, 3)
                    , round(num_no / num_total, 3)
                    , round((num_total - (num_yes + num_no))/num_total,3)])

r = pd.DataFrame(results, columns=['col_name', 'num_yes', 'num_no', 'num_none'])
r.sort_values('num_none')

Unnamed: 0,col_name,num_yes,num_no,num_none
0,has_twitter,0.722,0.278,0.0
6,right_to_life_endorsed,0.102,0.324,0.574
7,susan_b_anthony_endorsed,0.035,0.155,0.81
1,rep_party_support,0.081,0.108,0.812
8,club_for_growth_endorsed,0.03,0.151,0.819
9,koch_support,0.03,0.127,0.843
11,tea_party_endorsed,0.023,0.121,0.856
12,main_street_endorsed,0.026,0.118,0.856
10,house_freedom_support,0.022,0.118,0.86
2,trump_endorsed,0.024,0.078,0.898


In [40]:
# dem_mixedlm_won_primary.to_csv('../mixed_lm_results/dem_ml_won_primary.csv')
# dem_mixedlm_primary_pctg.to_csv('../mixed_lm_results/dem_mixedlm_primary_pctg.csv')
# rep_mixedlm_won_primary.to_csv('../mixed_lm_results/rep_mixedlm_won_primary.csv')
# rep_mixedlm_primary_pctg.to_csv('../mixed_lm_results/rep_mixedlm_primary_pctg.csv')