# Clean Data Feature Creation
Having cleaned the data there are some manipulations and data categories that I'd like to calculate prior to analysing the data for initial correlations and voter characterizations.

In [1]:
# imports
import pandas as pd
from collections import Counter

from modules.lv_utils import load_households
from modules.lv_utils import load_voters

In [2]:
# load the data
households = load_households('data_clean/20180627_households_district3.csv')
voters = load_voters('data_clean/20180627_votersWithRate_district3.csv')
elections = pd.read_csv('data_clean/20180621_election_data.csv')

# load look_ups
hid_lookup = pd.read_csv('data_clean/20180621_households_lookup_NO_GIT.csv', index_col='Hid')
vid_lookup = pd.read_csv('data_clean/20180616_voters_lookup_NO_GIT.csv', index_col='Vid')

In [3]:
v = voters
h = households
e = elections
print(v.columns)
print(h.columns)
e.columns

Index(['Vid', 'Abbr', 'Precinct', 'PrecinctSub', 'Party', 'PartyMain',
       'RegDate', 'PAV', 'RegDateOriginal', 'E6_110816', 'E5_060716',
       'E4_110414', 'E3_060314', 'E2_110612', 'E1_060512', 'District',
       'VoterScore', 'VoterScorePossible', 'VoterScorePctOfPoss', 'BirthYear',
       'OldestInHouseBirthYear', 'IsOldestInHouse', 'havePhone',
       'BirthPlaceState', 'BirthPlaceCountry', 'Gender', 'sameMailAddress',
       'MailCountry', 'isApt', 'Zip', 'StreetType', 'EmailProvider',
       'E5_060716BT', 'E1_060512BT', 'Hid', 'cHid', 'E34_nVotesPos',
       'E34_nVotes', 'E34_nVotesPct', 'E56_nVotesPos', 'E56_nVotes',
       'E56_nVotesPct', 'E78_nVotesPos', 'E78_nVotes', 'E78_nVotesPct'],
      dtype='object')
Index(['Hid', 'StreetType', 'Zip', 'Precinct', 'PrecinctSub', 'District',
       'CityArea', 'isApt', 'cHid'],
      dtype='object')


Index(['elections', 'dates', 'cycle', 'etype', 'president', 'us_senate_maj',
       'us_repre_maj', 'ca_governor', 'ca_lt_govnor', 'ca_senate_maj',
       'ca_assembly_maj'],
      dtype='object')

In [4]:
v.info()
h.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13307 entries, 0 to 13306
Data columns (total 45 columns):
Vid                       13307 non-null int64
Abbr                      13307 non-null int64
Precinct                  13307 non-null int64
PrecinctSub               13307 non-null int64
Party                     13307 non-null category
PartyMain                 13307 non-null object
RegDate                   13307 non-null datetime64[ns]
PAV                       13307 non-null category
RegDateOriginal           13307 non-null datetime64[ns]
E6_110816                 13307 non-null category
E5_060716                 13307 non-null category
E4_110414                 13307 non-null category
E3_060314                 13307 non-null category
E2_110612                 13307 non-null category
E1_060512                 13307 non-null category
District                  13307 non-null int64
VoterScore                13307 non-null float64
VoterScorePossible        13307 non-null float6

## Calculating additional features of interest
Household features: 
- Number of voters in HH,
- Number of PAV,
- Number with party affiliation
- Number of DEM party affiliation in HH
- Number of REP party affiliation in HH
- Number of NPP party affiliations in HH
- Party with most affiliations in HH
- Mixed affiliations True/False (all affiliated with same party)
- All voters affiliated
- Uniform affiliations (all same party or all NPP
- Vote rate (total number of votes in HH/total number of potential votes in HH), This is calculated 3 times for each election cycle we are modeling, `'E34_nVotesPosInHH'`, `'E34_nVotesInHH'`, `'E34_nVotesPctInHH'`, `'E56_nVotesPosInHH'`, `'E56_nVotesInHH'`, `'E56_nVotesPctInHH'` and `'E78_nVotesPosInHH'`, `'E78_nVotesInHH'`, `'E78_nVotesPctInHH'`

Voter features:
- Oldest in household - added during initial cleaning, 
- Vote rate (total number of votes/total number of potential votes) - added during vote rate calculations


### Calculating total number of voters in the household

In [5]:
v['nVotersInHH'] = v.groupby(['cHid'])['Vid'].transform('count')
v[['cHid', 'Hid', 'Party', 'Vid', 'nVotersInHH']].sort_values('cHid').head(15)

Unnamed: 0,cHid,Hid,Party,Vid,nVotersInHH
12729,0,0,NPP,12729,2
12185,0,0,DEM,12185,2
5716,1,1,REP,5716,3
9232,1,1,NPP,9232,3
2744,1,1,REP,2744,3
7945,2,2,DEM,7945,3
9347,2,2,DEM,9347,3
12263,2,2,DEM,12263,3
9120,3,3,DEM,9120,2
4813,3,3,DEM,4813,2


### Calculating total number of Permanent Absentee Voters in HouseHold

In [6]:
v['PAVCode'] = v.PAV.cat.codes
v['nPAVInHH'] = v.groupby(['cHid'])['PAVCode'].transform('sum')
v.loc[v.cHid.isin([0, 4,5, 8, 34, 94, 95, 98]),[
    'cHid', 'Hid', 'PAV', 'Vid','PAVCode','nPAVInHH']].sort_values(['cHid','PAV']).head(8)

Unnamed: 0,cHid,Hid,PAV,Vid,PAVCode,nPAVInHH
12185,0,0,Y,12185,1,2
12729,0,0,Y,12729,1,2
6970,4,4,Y,6970,1,4
7749,4,4,Y,7749,1,4
9580,4,4,Y,9580,1,4
9957,4,4,Y,9957,1,4
4907,5,5,N,4907,0,0
5903,5,5,N,5903,0,0


### Calculating number of voters in household with a party affiliation

In [7]:
v['HasParty'] = [0 if (x == 'UNK') | (x == 'NPP') else 1 for x in v.Party]
v['nAffInHH'] = v.groupby(['cHid'])['HasParty'].transform('sum')
v.loc[v.cHid.isin([4922,4, 5, 8, 34, 6794]),[
    'cHid', 'Hid', 'Party', 'Vid','HasParty','nAffInHH']].sort_values([
    'cHid','Party'])

Unnamed: 0,cHid,Hid,Party,Vid,HasParty,nAffInHH
6970,4,4,DEM,6970,1,4
7749,4,4,DEM,7749,1,4
9580,4,4,DEM,9580,1,4
9957,4,4,DEM,9957,1,4
4907,5,5,AI,4907,1,2
5903,5,5,DEM,5903,1,2
10964,5,5,NPP,10964,0,2
832,8,8,DEM,832,1,1
8377,34,34,DEM,8377,1,2
12567,34,34,DEM,12567,1,2


https://stackoverflow.com/questions/22219004/grouping-rows-in-list-in-pandas-groupby
https://stackoverflow.com/questions/27439023/pandas-groupby-agg-function-does-not-reduce/37955931#37955931
and
https://stackoverflow.com/questions/19530568/can-pandas-groupby-aggregate-into-a-list-rather-than-sum-mean-etc

For accessing lists of party affiliations by house.

### Calculating number of DEMs and REPs in a household 

In [8]:
v['isDEM'] = [1 if (x == 'DEM')  else 0 for x in v.Party]
v['isREP'] = [1 if (x == 'REP')  else 0 for x in v.Party]
v['isNPP'] = [1 if (x == 'NPP')  else 0 for x in v.Party]
v['nDEMInHH'] = v.groupby(['cHid'])['isDEM'].transform('sum')
v['nREPInHH'] = v.groupby(['cHid'])['isREP'].transform('sum')
v['nNPPInHH'] = v.groupby(['cHid'])['isNPP'].transform('sum')
v.loc[v.cHid.isin([4922,1,4, 5, 8, 34,39, 6794]),[
    'cHid', 'Hid', 'Party', 'Vid','isDEM','nDEMInHH','isREP','nREPInHH',
    'isNPP','nNPPInHH']].sort_values(['cHid','Party'])

Unnamed: 0,cHid,Hid,Party,Vid,isDEM,nDEMInHH,isREP,nREPInHH,isNPP,nNPPInHH
9232,1,1,NPP,9232,0,0,0,2,1,1
2744,1,1,REP,2744,0,0,1,2,0,1
5716,1,1,REP,5716,0,0,1,2,0,1
6970,4,4,DEM,6970,1,4,0,0,0,0
7749,4,4,DEM,7749,1,4,0,0,0,0
9580,4,4,DEM,9580,1,4,0,0,0,0
9957,4,4,DEM,9957,1,4,0,0,0,0
4907,5,5,AI,4907,0,1,0,0,0,1
5903,5,5,DEM,5903,1,1,0,0,0,1
10964,5,5,NPP,10964,0,1,0,0,1,1


### Calculating HH vote rates and HH vote scores

In [9]:
cols = ['Vid', 'Hid', 'cHid', 
        'VoterScore','VoterScorePossible','VoterScorePctOfPoss',
        'Tot_Possible_Votes','Act_Votes','Pct_Possible_Votes']
v.rename(columns = {'VoterScore':'VScore',
                    'VoterScorePossible':'VScorePos',
                    'VoterScorePctOfPoss':'VScorePct'}, inplace = True)
v[['Vid', 'Hid', 'cHid',
   'VScore','VScorePos','VScorePct',
   'E34_nVotesPos', 'E34_nVotes', 'E34_nVotesPct',
   'E56_nVotesPos', 'E56_nVotes', 'E56_nVotesPct',
   'E78_nVotesPos', 'E78_nVotes', 'E78_nVotesPct']].sort_values('cHid').head()

Unnamed: 0,Vid,Hid,cHid,VScore,VScorePos,VScorePct,E34_nVotesPos,E34_nVotes,E34_nVotesPct,E56_nVotesPos,E56_nVotes,E56_nVotesPct,E78_nVotesPos,E78_nVotes,E78_nVotesPct
12729,12729,0,0,0.0,-1.0,-1.0,0,0,-1.0,0,0,-1.0,0,0,-1.0
12185,12185,0,0,0.0,11.0063,0.0,2,0,0.0,4,0,0.0,6,0,0.0
5716,5716,1,1,6.8053,11.0063,0.62,2,2,1.0,4,3,0.75,6,4,0.666667
9232,9232,1,1,2.5001,11.0063,0.23,2,0,0.0,4,0,0.0,6,1,0.166667
2744,2744,1,1,9.8055,11.0063,0.89,2,2,1.0,4,3,0.75,6,5,0.833333


In [10]:
def calc_HH_vote_rate(df, pre, body):
    df[pre+body+'PosInHH'] = df.groupby(['cHid'])[pre+body+'Pos'].transform('sum')
    df[pre+body+'InHH'] = df.groupby(['cHid'])[pre+body].transform('sum')
    df[pre+body+'PctInHH'] = (df[pre+body+'InHH']/df[pre+body+'PosInHH']).fillna(-1)

In [11]:
# calculating working Voter Score columns moving the -1's to be 0 so can use sum accurately.
v['_nVScoreWPos'] = [x if (x > 0) else 0 for x in v.VScorePos]
v['_nVScoreW'] = [x if (x > 0) else 0 for x in v.VScore]

In [12]:
calc_HH_vote_rate(v, 'E34', '_nVotes')
calc_HH_vote_rate(v, 'E56', '_nVotes')
calc_HH_vote_rate(v, 'E78', '_nVotes')
calc_HH_vote_rate(v, '', '_nVScoreW')
v.rename(columns = {'_nVScoreWPosInHH':'nVScorePosInHH',
                        '_nVScoreWInHH':'nVScoreInHH',
                        '_nVScoreWPctInHH':'nVScorePctInHH'}, inplace = True)

In [13]:
cols = ['Vid', 'Hid', 'cHid',
        'VScorePos','VScore','VScorePct',
        '_nVScoreWPosInHH','_nVScoreWInHH', '_nVScoreWPctInHH',
        'E34_nVotesPos', 'E34_nVotes', 'E34_nVotesPct', 
        'E34_nVotesPosInHH', 'E34_nVotesInHH','E34_nVotesPctInHH', 
        'E56_nVotesPos','E56_nVotes', 'E56_nVotesPct', 
        'E56_nVotesPosInHH', 'E56_nVotesInHH','E56_nVotesPctInHH',
        'E78_nVotesPos', 'E78_nVotes','E78_nVotesPct',
        'E78_nVotesPosInHH', 'E78_nVotesInHH','E78_nVotesPctInHH',]
# check code
#v[cols].sort_values('cHid').head(14)

In [14]:
# check code including 3 merged HHs
#v.loc[v.cHid.isin([0,1,2,3,489,2953,478]),cols].sort_values('cHid')

In [15]:
v = v.drop(['_nVScoreWPos', '_nVScoreW'], axis='columns')

### Adding calculated fields to HH data

In [16]:
print(h.shape)
cols = ['cHid','nVotersInHH','nAffInHH','nPAVInHH',
        'nDEMInHH','nREPInHH','nNPPInHH',
        'nVScorePosInHH', 'nVScoreInHH', 'nVScorePctInHH',
        'E34_nVotesPosInHH', 'E34_nVotesInHH','E34_nVotesPctInHH',
        'E56_nVotesPosInHH', 'E56_nVotesInHH','E56_nVotesPctInHH',
        'E78_nVotesPosInHH', 'E78_nVotesInHH','E78_nVotesPctInHH',]
f_h = v[cols].groupby(cols).count().reset_index()
f_h.rename(columns={'nVotersInHH':'nVoters',
                    'nAffInHH': 'nAff',
                    'nPAVInHH':'nPAVs',
                    'nDEMInHH':'nDEMs',
                    'nREPInHH':'nREPs',
                    'nNPPInHH':'nNPPs',
                    'nVScorePosInHH':'nVScorePos',
                    'nVScoreInHH':'nVScore',
                    'nVScorePctInHH':'nVScorePct',
                    'E34_nVotesPosInHH':'E34_nVotesPos', 
                    'E34_nVotesInHH':'E34_nVotes',
                    'E34_nVotesPctInHH':'E34_nVotesPct',
                    'E56_nVotesPosInHH':'E56_nVotesPos', 
                    'E56_nVotesInHH':'E56_nVotes',
                    'E56_nVotesPctInHH':'E56_nVotesPct',
                    'E78_nVotesPosInHH':'E78_nVotesPos', 
                    'E78_nVotesInHH':'E78_nVotes',
                    'E78_nVotesPctInHH':'E78_nVotesPct',
                   }, inplace=True)
h = pd.merge(h, f_h, on='cHid', how='left')
print(h.shape)

(6930, 9)
(6930, 27)


### Calculating HH level party affiliation fields

In [17]:
# generate list of all party affiliations in a Household
partiesInHH = v.groupby(['cHid'])['Party'].apply(list).apply(lambda x: ' '.join(x))

In [18]:
# create df of party affiliation lists by Household ready for merge into HH data
partiesInHHdf = pd.DataFrame(partiesInHH).reset_index()
partiesInHHdf.rename(columns = {'Party':'PartiesInHH'}, inplace=True)
partiesInHHdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6927 entries, 0 to 6926
Data columns (total 2 columns):
cHid           6927 non-null int64
PartiesInHH    6927 non-null object
dtypes: int64(1), object(1)
memory usage: 108.3+ KB


In [19]:
# merge the party affiliation list into Household data
h = pd.merge(h, partiesInHHdf, on='cHid', how='left')

In [20]:
# check code
print(len(h))
print(h.columns)
h.loc[h.cHid.isin([0,1,2,3,489,2953,478]),[
    'cHid','Hid','isApt', 'PartiesInHH']].sort_values(['cHid','Hid'])

6930
Index(['Hid', 'StreetType', 'Zip', 'Precinct', 'PrecinctSub', 'District',
       'CityArea', 'isApt', 'cHid', 'nVoters', 'nAff', 'nPAVs', 'nDEMs',
       'nREPs', 'nNPPs', 'nVScorePos', 'nVScore', 'nVScorePct',
       'E34_nVotesPos', 'E34_nVotes', 'E34_nVotesPct', 'E56_nVotesPos',
       'E56_nVotes', 'E56_nVotesPct', 'E78_nVotesPos', 'E78_nVotes',
       'E78_nVotesPct', 'PartiesInHH'],
      dtype='object')


Unnamed: 0,cHid,Hid,isApt,PartiesInHH
0,0,0,False,DEM NPP
1,1,1,False,REP REP NPP
2,2,2,False,DEM DEM DEM
3,3,3,False,DEM DEM
478,478,478,False,DEM IND
6399,478,6399,False,DEM IND
489,489,489,False,NPP REP NPP NPP
4849,489,4849,False,NPP REP NPP NPP
2953,2953,2953,False,NPP NPP NPP DEM NPP NPP
6208,2953,6208,False,NPP NPP NPP DEM NPP NPP


### Using the party affiliation data to calculate affiliation fields

In [21]:
def party_stats(row):
    mostAff = 'ERR'
    mixAff = False
    allAff = row.nVoters == row.nAff
    p = row.PartiesInHH.split()
    pnn = [x for x in row.PartiesInHH.split() if (x not in ['NPP','UNK'])]
    #print('{} voters {} {}'.format(row.nVoters, p, pnn))
    c = Counter(p)
    cnn = Counter(pnn)
    #print('all party: {}, n NPP/UNK: {}'.format(c, cnn))

    #print('leng of c : {} uniform: {}'.format(len(c),len(c) == 1))
    if len(cnn) < 1:
        #print('no parties {}'.format(cnn))
        mostAff = 'NPP'
        mixAff = False
    if len(cnn) == 1:
        #print('exactly one party {}'.format(cnn))
        mostAff = next(iter(cnn.keys()))
        mixAff = False
    if len(cnn) > 1:
        #print('multi parties {}'.format(cnn))
        mostAff = next(iter(cnn.keys()))
        top_value = next(iter(cnn.values()))
        p_with_tv = [v for v in cnn.values() if (v == top_value)]
        if len(p_with_tv) > 1:
            mostAff = 'TIE'
        mixAff = True
    
    row['mostAffls'] = mostAff
    row['mixedAffls'] = mixAff
    row['allAff'] = allAff
    row['uniformAff'] = len(c) == 1
    return row


In [22]:
h = h.apply(party_stats, axis=1)

In [23]:
h.sort_values('cHid').head()

Unnamed: 0,Hid,StreetType,Zip,Precinct,PrecinctSub,District,CityArea,isApt,cHid,nVoters,...,E56_nVotes,E56_nVotesPct,E78_nVotesPos,E78_nVotes,E78_nVotesPct,PartiesInHH,mostAffls,mixedAffls,allAff,uniformAff
0,0,DR,94536,832400,0,3,Centerville,False,0,2,...,0,0.0,6,0,0.0,DEM NPP,DEM,False,False,False
1,1,PL/TER,94538,832910,0,3,Centerville,False,1,3,...,6,0.5,18,10,0.555556,REP REP NPP,REP,False,False,False
2,2,CMN,94538,832620,2,3,Centerville,False,2,3,...,9,0.75,18,15,0.833333,DEM DEM DEM,DEM,False,True,True
3,3,PL/TER,94538,832910,0,3,Centerville,False,3,2,...,8,1.0,12,12,1.0,DEM DEM,DEM,False,True,True
4,4,PL/TER,94536,835410,0,3,Downtown / BART,False,4,4,...,5,0.416667,20,11,0.55,DEM DEM DEM DEM,DEM,False,True,True


### Adding calculated fields to Voter data

In [24]:
print(v.columns)
h.columns

Index(['Vid', 'Abbr', 'Precinct', 'PrecinctSub', 'Party', 'PartyMain',
       'RegDate', 'PAV', 'RegDateOriginal', 'E6_110816', 'E5_060716',
       'E4_110414', 'E3_060314', 'E2_110612', 'E1_060512', 'District',
       'VScore', 'VScorePos', 'VScorePct', 'BirthYear',
       'OldestInHouseBirthYear', 'IsOldestInHouse', 'havePhone',
       'BirthPlaceState', 'BirthPlaceCountry', 'Gender', 'sameMailAddress',
       'MailCountry', 'isApt', 'Zip', 'StreetType', 'EmailProvider',
       'E5_060716BT', 'E1_060512BT', 'Hid', 'cHid', 'E34_nVotesPos',
       'E34_nVotes', 'E34_nVotesPct', 'E56_nVotesPos', 'E56_nVotes',
       'E56_nVotesPct', 'E78_nVotesPos', 'E78_nVotes', 'E78_nVotesPct',
       'nVotersInHH', 'PAVCode', 'nPAVInHH', 'HasParty', 'nAffInHH', 'isDEM',
       'isREP', 'isNPP', 'nDEMInHH', 'nREPInHH', 'nNPPInHH',
       'E34_nVotesPosInHH', 'E34_nVotesInHH', 'E34_nVotesPctInHH',
       'E56_nVotesPosInHH', 'E56_nVotesInHH', 'E56_nVotesPctInHH',
       'E78_nVotesPosInHH', 'E78_nVotes

Index(['Hid', 'StreetType', 'Zip', 'Precinct', 'PrecinctSub', 'District',
       'CityArea', 'isApt', 'cHid', 'nVoters', 'nAff', 'nPAVs', 'nDEMs',
       'nREPs', 'nNPPs', 'nVScorePos', 'nVScore', 'nVScorePct',
       'E34_nVotesPos', 'E34_nVotes', 'E34_nVotesPct', 'E56_nVotesPos',
       'E56_nVotes', 'E56_nVotesPct', 'E78_nVotesPos', 'E78_nVotes',
       'E78_nVotesPct', 'PartiesInHH', 'mostAffls', 'mixedAffls', 'allAff',
       'uniformAff'],
      dtype='object')

In [25]:
print(h.shape)
cols = ['cHid','CityArea','PartiesInHH','mostAffls','mixedAffls','allAff', 'uniformAff']
f_v = h[cols].drop_duplicates()
print(f_v.shape)
f_v.rename(columns={'mostAffls':'mostAfflsInHH',
                    'mixedAffls':'mixedAfflsInHH',
                    'allAff':'allAffInHH',
                    'uniformAff':'uniformAffInHH'
                   }, inplace=True)
v = pd.merge(v, f_v, on='cHid', how='left')
print(v.shape)

(6930, 32)
(6927, 7)
(13307, 74)


In [26]:
v[['cHid','CityArea','PartiesInHH','mostAfflsInHH','mixedAfflsInHH',
   'allAffInHH','uniformAffInHH']].sort_values('cHid').head(10)

Unnamed: 0,cHid,CityArea,PartiesInHH,mostAfflsInHH,mixedAfflsInHH,allAffInHH,uniformAffInHH
12729,0,Centerville,DEM NPP,DEM,False,False,False
12185,0,Centerville,DEM NPP,DEM,False,False,False
5716,1,Centerville,REP REP NPP,REP,False,False,False
9232,1,Centerville,REP REP NPP,REP,False,False,False
2744,1,Centerville,REP REP NPP,REP,False,False,False
7945,2,Centerville,DEM DEM DEM,DEM,False,True,True
9347,2,Centerville,DEM DEM DEM,DEM,False,True,True
12263,2,Centerville,DEM DEM DEM,DEM,False,True,True
9120,3,Centerville,DEM DEM,DEM,False,True,True
4813,3,Centerville,DEM DEM,DEM,False,True,True


### Saving out the enhanced data

In [27]:
v.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13307 entries, 0 to 13306
Data columns (total 74 columns):
Vid                       13307 non-null int64
Abbr                      13307 non-null int64
Precinct                  13307 non-null int64
PrecinctSub               13307 non-null int64
Party                     13307 non-null category
PartyMain                 13307 non-null object
RegDate                   13307 non-null datetime64[ns]
PAV                       13307 non-null category
RegDateOriginal           13307 non-null datetime64[ns]
E6_110816                 13307 non-null category
E5_060716                 13307 non-null category
E4_110414                 13307 non-null category
E3_060314                 13307 non-null category
E2_110612                 13307 non-null category
E1_060512                 13307 non-null category
District                  13307 non-null int64
VScore                    13307 non-null float64
VScorePos                 13307 non-null float6

In [28]:
h.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6930 entries, 0 to 6929
Data columns (total 32 columns):
Hid              6930 non-null int64
StreetType       6930 non-null object
Zip              6930 non-null int64
Precinct         6930 non-null int64
PrecinctSub      6930 non-null int64
District         6930 non-null int64
CityArea         6930 non-null object
isApt            6930 non-null bool
cHid             6930 non-null int64
nVoters          6930 non-null int64
nAff             6930 non-null int64
nPAVs            6930 non-null int64
nDEMs            6930 non-null int64
nREPs            6930 non-null int64
nNPPs            6930 non-null int64
nVScorePos       6930 non-null float64
nVScore          6930 non-null float64
nVScorePct       6930 non-null float64
E34_nVotesPos    6930 non-null int64
E34_nVotes       6930 non-null int64
E34_nVotesPct    6930 non-null float64
E56_nVotesPos    6930 non-null int64
E56_nVotes       6930 non-null int64
E56_nVotesPct    6930 non-null fl

In [29]:
date = pd.Timestamp("today").strftime("%Y%m%d")
v.set_index('Vid', inplace=True)
v.to_csv('data_clean/{}_fullset_voters_district3.csv'.format(date))
h.set_index('Hid', inplace=True)
h.to_csv('data_clean/{}_fullset_households_district3.csv'.format(date))