In [1]:
# imports and loading clean data
import pandas as pd
import numpy as np
import itertools
import matplotlib.pyplot as plt

from IPython.display import display, Markdown

from modules.lv_utils import load_households, load_voters
from modules.ms1_utils import clean_df, show_vote_rate_and_summary, two_sample_perm_test_diff_frac_votes
from modules.ms1_utils import get_two_sample_ns, plot_hist_vote_rate_vs_field

# load the data
households = load_households('data_clean/20180725_fullset_households_district3.csv')
voters = load_voters('data_clean/20180725_fullset_voters_district3.csv')
elections = pd.read_csv('data_clean/20180621_election_data.csv')

# a couple of constants
outcols = ['votes_s0','elec_n0','rate_r0','votes_s1','elec_n1','rate_r1','emp_diff','perm_p']

In [18]:
print(voters.columns[:50])
print(voters.columns[50:])
print(households.columns)
elections.columns

Index(['Vid', 'Abbr', 'Precinct', 'PrecinctSub', 'Party', 'PartyMain',
       'RegDate', 'PAV', 'RegDateOriginal', 'E6_110816', 'E5_060716',
       'E4_110414', 'E3_060314', 'E2_110612', 'E1_060512', 'District',
       'VScore', 'VScorePos', 'VScorePct', 'BirthYear',
       'OldestInHouseBirthYear', 'IsOldestInHouse', 'havePhone',
       'BirthPlaceState', 'BirthPlaceStateRegion', 'BirthPlaceCountry',
       'BirthPlaceCountryRegion', 'Gender', 'sameMailAddress', 'MailCountry',
       'isApt', 'Zip', 'StreetType', 'EmailProvider', 'E5_060716BT',
       'E1_060512BT', 'Hid', 'cHid', 'E34_nVotesPos', 'E34_nVotes',
       'E34_nVotesPct', 'E56_nVotesPos', 'E56_nVotes', 'E56_nVotesPct',
       'E78_nVotesPos', 'E78_nVotes', 'E78_nVotesPct', 'E12_nVotesPos',
       'E12_nVotes', 'E12_nVotesPct'],
      dtype='object')
Index(['E14_nVotesPos', 'E14_nVotes', 'E14_nVotesPct', 'E16_nVotesPos',
       'E16_nVotes', 'E16_nVotesPct', 'Eap_nVotesPos', 'Eap_nVotes',
       'Eap_nVotesPct', 'Eag_nVote

Index(['elections', 'dates', 'cycle', 'etype', 'president', 'us_senate_maj',
       'us_repre_maj', 'ca_governor', 'ca_lt_govnor', 'ca_senate_maj',
       'ca_assembly_maj'],
      dtype='object')

In [3]:
households.head()

Unnamed: 0,Hid,StreetType,Zip,Precinct,PrecinctSub,District,CityArea,isApt,cHid,nVoters,...,E5_nVotes,E5_nVotesPct,E6_nVotesPos,E6_nVotes,E6_nVotesPct,PartiesInHH,mostAffls,mixedAffls,allAff,uniformAff
0,0,DR,94536,832400,0,3,Centerville,False,0,2,...,0,0.0,1,0,0.0,DEM NPP,DEM,False,False,False
1,1,PL/TER,94538,832910,0,3,Centerville,False,1,3,...,1,0.333333,3,3,1.0,REP REP NPP,REP,False,False,False
2,2,CMN,94538,832620,2,3,Centerville,False,2,3,...,3,1.0,3,3,1.0,DEM DEM DEM,DEM,False,True,True
3,3,PL/TER,94538,832910,0,3,Centerville,False,3,2,...,2,1.0,2,2,1.0,DEM DEM,DEM,False,True,True
4,4,PL/TER,94536,835410,0,3,Downtown / BART,False,4,4,...,3,0.75,4,3,0.75,DEM DEM DEM DEM,DEM,False,True,True


In [8]:
households.iloc[:5,:10]

Unnamed: 0,Hid,StreetType,Zip,Precinct,PrecinctSub,District,CityArea,isApt,cHid,nVoters
0,0,DR,94536,832400,0,3,Centerville,False,0,2
1,1,PL/TER,94538,832910,0,3,Centerville,False,1,3
2,2,CMN,94538,832620,2,3,Centerville,False,2,3
3,3,PL/TER,94538,832910,0,3,Centerville,False,3,2
4,4,PL/TER,94536,835410,0,3,Downtown / BART,False,4,4


In [4]:
voters.head()

Unnamed: 0,Vid,Abbr,Precinct,PrecinctSub,Party,PartyMain,RegDate,PAV,RegDateOriginal,E6_110816,...,E6_nVotesPctInHH,nVScorePosInHH,nVScoreInHH,nVScorePctInHH,CityArea,PartiesInHH,mostAfflsInHH,mixedAfflsInHH,allAffInHH,uniformAffInHH
0,0,82,832400,0,DEM,DEM,1992-10-05,Y,1992-10-05,A,...,1.0,33.0189,33.0189,1.0,Centerville,DEM NPP NPP,DEM,False,False,False
1,1,82,832910,0,DEM,DEM,2012-11-06,Y,2012-10-26,A,...,0.8,33.5069,20.8044,0.620899,Centerville,DEM NPP NPP NPP NPP DEM,DEM,False,False,False
2,2,12,832900,0,DEM,DEM,2012-11-06,Y,1980-07-25,A,...,1.0,32.0157,25.6097,0.799911,Centerville,DEM DEM DEM,DEM,False,True,True
3,3,99,832710,0,NPP,NPP,2016-09-15,N,2016-09-15,N,...,0.0,22.0126,0.0,0.0,Centerville,NPP NPP,NPP,False,False,True
4,4,111,832210,1,DEM,DEM,2018-02-13,Y,2018-02-13,,...,-1.0,0.0,0.0,-1.0,Centerville,DEM,DEM,False,True,True


In [5]:
elections.head()

Unnamed: 0,elections,dates,cycle,etype,president,us_senate_maj,us_repre_maj,ca_governor,ca_lt_govnor,ca_senate_maj,ca_assembly_maj
0,E8_110618,2018-11-06,Cong,General,REP,4,42,DEM,DEM,-13,-28
1,E7_060518,2018-06-05,Cong,Primary,REP,4,42,DEM,DEM,-13,-28
2,E6_110816,2016-11-08,Pres,General,DEM,10,60,DEM,DEM,-13,-24
3,E5_060716,2016-06-07,Pres,Primary,DEM,10,58,DEM,DEM,-13,-24
4,E4_110414,2014-11-04,Cong,General,DEM,-8,34,DEM,DEM,-13,-31
