## This page documents the collection of voting data for Orange County from Statewidedatabase.org
- Raw data was downloaded as .csv files from website.
- Data for 4 years were collected:  2012, 2014 2016, 2018

In [1]:
# imports
import pandas as pd
import pickle as pkl
 

Dataset:  2 CSV files per year from https://statewidedatabase.org/d10/g14.html
1. Orange County (county 059) - SOV column - by srprec (voting results)
1. Orange County (county 059) - VOTE column - by srprec (voter reg & demographics)
1. For general elections in the years:  2012, 2014, 2016, 2018

To be merged on the ['srprec'] column.

In [5]:
sov = pd.read_csv('./vote_data/c059_g18_sov_data_by_g18_srprec.csv')

In [6]:
voters = pd.read_csv('./vote_data/c059_g18_voters_by_g18_srprec.csv')

In [7]:
sov.shape

(1547, 85)

In [8]:
voters.shape

(1334, 140)

### Combine datasets, aligned by ['srprec']

In [None]:
sov['srprec']   

In [None]:
# change sov['srprec'] to number
sov['srprec'] = pd.to_numeric(sov['srprec'], errors='coerce')

In [None]:
sov['srprec']

In [None]:
#find and drop nulls, non-conforming rows
sov['srprec'].isna().sum()

In [None]:
sov['srprec'].dropna(inplace=True)

In [None]:
sov['srprec']

In [None]:
#change to integer to be congruent with voters df
sov['srprec'] = sov['srprec'].astype('Int64')

In [13]:
sov['srprec']

0       10316
1       10317
2       10319
3       10320
4       10325
        ...  
1542    75122
1543    75701
1544    75702
1545     7701
1546     <NA>
Name: srprec, Length: 1547, dtype: Int64

In [9]:
#get rid of non-conforming row.  
# Note:  May return errors.  
# IMPORTANT!:  Hand-coded for each year, based on results of cell above.
sov.drop(sov.index[1546], inplace=True)

In [15]:
voters['srprec']

0       10316
1       10317
2       10319
3       10320
4       10325
        ...  
1329    75117
1330    75122
1331    75701
1332    75702
1333     7701
Name: srprec, Length: 1334, dtype: int64

In [16]:
voters['srprec'].isna().sum()

0

### Examine Registration and Voter dataframes to align ['srprec'] and number of rows.

In [17]:
#combine two 'srprec' columns into a df to find uniques
sovprec_compare = sov['srprec']
sovprec_compare

0       10316
1       10317
2       10319
3       10320
4       10325
        ...  
1541    75117
1542    75122
1543    75701
1544    75702
1545     7701
Name: srprec, Length: 1546, dtype: Int64

In [18]:
voteprec_compare = voters['srprec']
voteprec_compare

0       10316
1       10317
2       10319
3       10320
4       10325
        ...  
1329    75117
1330    75122
1331    75701
1332    75702
1333     7701
Name: srprec, Length: 1334, dtype: int64

In [19]:
srprec_compare = pd.concat([sovprec_compare, voteprec_compare]).reset_index()

In [20]:
srprec_compare.shape

(2880, 2)

In [21]:
srprec_compare.head()

Unnamed: 0,index,srprec
0,0,10316
1,1,10317
2,2,10319
3,3,10320
4,4,10325


In [22]:
srprec_compare['srprec'].value_counts()

32270    2
2117     2
31005    2
2337     2
68073    2
        ..
65051    1
65065    1
42103    1
65061    1
70135    1
Name: srprec, Length: 1546, dtype: Int64

#### merge the vote df (sov) with registration df (voters), keeping unique rows

In [23]:
result = pd.merge(left=sov, right=voters, how='outer', on=['srprec', 'srprec'], indicator=True)

In [24]:
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 10)

result

Unnamed: 0,county,srprec,addist,cddist,sddist,...,oreg6g,oreg7g,oreg8g,oreg9g,_merge
0,30,10316,65,47,29,...,1.0,0.0,0.0,1.0,both
1,30,10317,65,47,29,...,2.0,2.0,4.0,1.0,both
2,30,10319,65,47,29,...,3.0,1.0,2.0,6.0,both
3,30,10320,65,47,29,...,4.0,1.0,3.0,9.0,both
4,30,10325,65,47,29,...,1.0,0.0,1.0,8.0,both
...,...,...,...,...,...,...,...,...,...,...,...
1541,30,75117,68,45,37,...,0.0,0.0,0.0,2.0,both
1542,30,75122,68,45,37,...,0.0,0.0,1.0,0.0,both
1543,30,75701,68,45,37,...,2.0,0.0,0.0,1.0,both
1544,30,75702,68,45,37,...,1.0,0.0,0.0,4.0,both


In [25]:
result['_merge'].value_counts()
# identifies 'left_only' rows from sov df that are unique

both          1334
left_only      212
right_only       0
Name: _merge, dtype: int64

In [26]:
#inspect the unique rows to understand if they hold any useful information
vote_only = result.loc[result['_merge'] == 'left_only']
pd.set_option('display.max_columns', None)
vote_only


Unnamed: 0,county,srprec,addist,cddist,sddist,bedist,TOTREG,DEMREG,REPREG,AIPREG,GRNREG,LIBREG,NLPREG,REFREG,DCLREG,MSCREG,TOTVOTE,DEMVOTE,REPVOTE,AIPVOTE,GRNVOTE,LIBVOTE,NLPVOTE,REFVOTE,DCLVOTE,MSCVOTE,PRCVOTE,ABSVOTE,ASSDEM01,ASSDEM02,ASSIND01,ASSLIB01,ASSREP01,ATGDEM01,ATGREP01,BOEDEM01,BOEREP01,CNGDEM01,CNGDEM02,CNGGRN01,CNGIND01,CNGREP01,CNGREP02,CONDEM01,CONREP01,GOVDEM01,GOVREP01,INSDEM01,INSIND01,LTGDEM01,LTGDEM02,PR_10_N,PR_10_Y,PR_11_N,PR_11_Y,PR_12_N,PR_12_Y,PR_1_N,PR_1_Y,PR_2_N,PR_2_Y,PR_3_N,PR_3_Y,PR_4_N,PR_4_Y,PR_5_N,PR_5_Y,PR_6_N,PR_6_Y,PR_7_N,PR_7_Y,PR_8_N,PR_8_Y,SENDEM01,SENDEM02,SENIND01,SENREP01,SOSDEM01,SOSREP01,SPINOP01,SPINOP02,TRSDEM01,TRSREP01,USSDEM01,USSDEM02,election,type,totreg_r,dem,rep,aip,paf,msc,lib,nlp,grn,ref,dcl,male,female,hispdem,hisprep,hispdcl,hispoth,jewdem,jewrep,jewdcl,jewoth,kordem,korrep,kordcl,koroth,jpndem,jpnrep,jpndcl,jpnoth,chidem,chirep,chidcl,chioth,inddem,indrep,inddcl,indoth,vietdem,vietrep,vietdcl,vietoth,fildem,filrep,fildcl,filoth,demmunk,demm1824,demm2534,demm3544,demm4554,demm5564,demm65pl,demfunk,demf1824,demf2534,demf3544,demf4554,demf5564,demf65pl,repmunk,repm1824,repm2534,repm3544,repm4554,repm5564,repm65pl,repfunk,repf1824,repf2534,repf3544,repf4554,repf5564,repf65pl,dclmunk,dclm1824,dclm2534,dclm3544,dclm4554,dclm5564,dclm65pl,dclfunk,dclf1824,dclf2534,dclf3544,dclf4554,dclf5564,dclf65pl,othmunk,othm1824,othm2534,othm3544,othm4554,othm5564,othm65pl,othfunk,othf1824,othf2534,othf3544,othf4554,othf5564,othf65pl,dreg1g,dreg2g,dreg3g,dreg4g,dreg5g,dreg6g,dreg7g,dreg8g,dreg9g,rreg1g,rreg2g,rreg3g,rreg4g,rreg5g,rreg6g,rreg7g,rreg8g,rreg9g,ireg1g,ireg2g,ireg3g,ireg4g,ireg5g,ireg6g,ireg7g,ireg8g,ireg9g,oreg1g,oreg2g,oreg3g,oreg4g,oreg5g,oreg6g,oreg7g,oreg8g,oreg9g,_merge
10,30,10342,65,47,29,4,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
11,30,10343,65,47,29,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
57,30,13100,65,39,29,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
79,30,13380,65,39,29,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
91,30,13467,65,39,29,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1484,30,71378,68,45,37,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
1517,30,72080,68,45,37,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
1518,30,72085,68,45,37,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
1519,30,72087,68,45,37,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only


In [27]:
#scan mean, min, max values to gauge data values

#expand display for quick visual assessment
pd.set_option('display.max_row', None)

vote_only.describe().T
#note:  returns ~200 rows of summary information

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
county,212.0,30.0,0.0,30.0,30.0,30.0,30.0,30.0
srprec,212.0,48921.29717,19823.386787,2096.0,42101.75,53097.0,65052.25,72110.0
addist,212.0,69.45283,4.895364,55.0,68.0,68.0,73.0,74.0
cddist,212.0,45.669811,2.595007,39.0,45.0,45.0,48.0,49.0
sddist,212.0,35.566038,2.616096,29.0,36.0,37.0,37.0,37.0
bedist,212.0,4.0,0.0,4.0,4.0,4.0,4.0,4.0
TOTREG,212.0,0.023585,0.247088,0.0,0.0,0.0,0.0,3.0
DEMREG,212.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
REPREG,212.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AIPREG,212.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
#reset display constraints
pd.set_option('display.max_row', 15)

vote_only['TOTVOTE'].value_counts()
# only a few mis-entered data points of a handful of votes.  OK to drop the unique rows.

0    200
1      9
2      3
Name: TOTVOTE, dtype: int64

In [34]:
vote_only.loc[vote_only['TOTVOTE'] > 0]  

Unnamed: 0,county,srprec,addist,cddist,sddist,bedist,TOTREG,DEMREG,REPREG,AIPREG,GRNREG,LIBREG,NLPREG,REFREG,DCLREG,MSCREG,TOTVOTE,DEMVOTE,REPVOTE,AIPVOTE,GRNVOTE,LIBVOTE,NLPVOTE,REFVOTE,DCLVOTE,MSCVOTE,PRCVOTE,ABSVOTE,ASSDEM01,ASSDEM02,ASSIND01,ASSLIB01,ASSREP01,ATGDEM01,ATGREP01,BOEDEM01,BOEREP01,CNGDEM01,CNGDEM02,CNGGRN01,CNGIND01,CNGREP01,CNGREP02,CONDEM01,CONREP01,GOVDEM01,GOVREP01,INSDEM01,INSIND01,LTGDEM01,LTGDEM02,PR_10_N,PR_10_Y,PR_11_N,PR_11_Y,PR_12_N,PR_12_Y,PR_1_N,PR_1_Y,PR_2_N,PR_2_Y,PR_3_N,PR_3_Y,PR_4_N,PR_4_Y,PR_5_N,PR_5_Y,PR_6_N,PR_6_Y,PR_7_N,PR_7_Y,PR_8_N,PR_8_Y,SENDEM01,SENDEM02,SENIND01,SENREP01,SOSDEM01,SOSREP01,SPINOP01,SPINOP02,TRSDEM01,TRSREP01,USSDEM01,USSDEM02,election,type,totreg_r,dem,rep,aip,paf,msc,lib,nlp,grn,ref,dcl,male,female,hispdem,hisprep,hispdcl,hispoth,jewdem,jewrep,jewdcl,jewoth,kordem,korrep,kordcl,koroth,jpndem,jpnrep,jpndcl,jpnoth,chidem,chirep,chidcl,chioth,inddem,indrep,inddcl,indoth,vietdem,vietrep,vietdcl,vietoth,fildem,filrep,fildcl,filoth,demmunk,demm1824,demm2534,demm3544,demm4554,demm5564,demm65pl,demfunk,demf1824,demf2534,demf3544,demf4554,demf5564,demf65pl,repmunk,repm1824,repm2534,repm3544,repm4554,repm5564,repm65pl,repfunk,repf1824,repf2534,repf3544,repf4554,repf5564,repf65pl,dclmunk,dclm1824,dclm2534,dclm3544,dclm4554,dclm5564,dclm65pl,dclfunk,dclf1824,dclf2534,dclf3544,dclf4554,dclf5564,dclf65pl,othmunk,othm1824,othm2534,othm3544,othm4554,othm5564,othm65pl,othfunk,othf1824,othf2534,othf3544,othf4554,othf5564,othf65pl,dreg1g,dreg2g,dreg3g,dreg4g,dreg5g,dreg6g,dreg7g,dreg8g,dreg9g,rreg1g,rreg2g,rreg3g,rreg4g,rreg5g,rreg6g,rreg7g,rreg8g,rreg9g,ireg1g,ireg2g,ireg3g,ireg4g,ireg5g,ireg6g,ireg7g,ireg8g,ireg9g,oreg1g,oreg2g,oreg3g,oreg4g,oreg5g,oreg6g,oreg7g,oreg8g,oreg9g,_merge
220,30,2111,68,45,37,4,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
261,30,2303,65,46,29,4,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,2,0,2,0,0,0,0,0,2,0,2,0,2,0,1,1,0,2,1,1,0,2,1,1,0,2,0,2,0,2,2,0,0,2,0,2,0,2,0,0,0,0,2,0,0,2,2,0,0,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
605,30,39232,72,48,34,4,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
657,30,43120,74,48,37,4,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
663,30,43133,74,48,37,4,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
667,30,44114,74,48,37,4,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
844,30,52023,74,48,37,4,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,0,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
989,30,5391,55,39,29,4,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,0,1,0,0,2,0,2,1,1,0,2,0,2,1,1,1,1,0,2,1,1,0,2,0,2,0,0,0,0,1,1,2,0,1,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
1177,30,59201,74,45,37,4,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
1396,30,69125,73,49,36,4,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,2,0,0,0,0,2,0,0,2,0,2,0,2,0,0,2,0,0,2,2,0,2,0,2,0,2,0,2,0,0,2,0,2,0,2,2,0,0,0,0,2,0,2,2,0,0,2,0,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only


In [35]:
result.shape


(1546, 225)

In [36]:
# drop 'left_only' rows
#df.drop(df.loc[df['line_race']==0].index, inplace=True)
# df[(df.line_race != 0) & (df.line_race != 10)]

result_both = result.drop(result.loc[result['_merge'] == 'left_only'].index)

In [37]:
result_both.shape

(1334, 225)

#### save result-both df to preserve 

In [469]:
# result_both.to_pickle('./vote_data/merged_OC-c059_g18_by_srprec.pkl')
# result_both.to_pickle('./vote_data/merged_OC-c059_g16_by_srprec.pkl')
# result_both.to_pickle('./vote_data/merged_OC-c059_g14_by_srprec.pkl')
# result_both.to_pickle('./vote_data/merged_OC-c059_g12_by_srprec.pkl')

In [10]:
data = pd.read_pickle('./vote_data/merged_OC-c059_g18_by_srprec.pkl')
# data = pd.read_pickle('./vote_data/merged_OC-c059_g16_by_srprec.pkl')
# data = pd.read_pickle('./vote_data/merged_OC-c059_g14_by_srprec.pkl')
# data = pd.read_pickle('./vote_data/merged_OC-c059_g12_by_srprec.pkl')
data.shape

(1334, 225)

In [11]:
data.head()

Unnamed: 0,county,srprec,addist,cddist,sddist,bedist,TOTREG,DEMREG,REPREG,AIPREG,...,oreg1g,oreg2g,oreg3g,oreg4g,oreg5g,oreg6g,oreg7g,oreg8g,oreg9g,_merge
0,30,10316,65,47,29,4,1735,0,0,0,...,16.0,8.0,2.0,2.0,3.0,1.0,0.0,0.0,1.0,both
1,30,10317,65,47,29,4,2079,0,0,0,...,19.0,13.0,3.0,2.0,6.0,2.0,2.0,4.0,1.0,both
2,30,10319,65,47,29,4,1448,0,0,0,...,12.0,7.0,3.0,2.0,2.0,3.0,1.0,2.0,6.0,both
3,30,10320,65,47,29,4,2407,0,0,0,...,11.0,11.0,5.0,1.0,5.0,4.0,1.0,3.0,9.0,both
4,30,10325,65,47,29,4,1465,0,0,0,...,14.0,7.0,5.0,1.0,2.0,1.0,0.0,1.0,8.0,both


In [12]:
data['cddist'].value_counts()

45    347
48    334
39    226
46    198
47    133
49     87
38      9
Name: cddist, dtype: int64

In [473]:
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 10)


#### NOTE:  While Orange County contains seven precincts, only **four** are majority Orange County:  39, 45, 46, 48