# Votes: Create county lists of voters

Create a list of all the voters for each county in NJ, using voter id as the key.

In [1]:
import pandas as pd
import csv
import zipfile
import os
import glob
import csv 
import math
import njvotes
pd.set_option("display.max_columns", 999)


# Get data headers
nametypes = njvotes.get_voteheadertypes()

In [4]:
# Create master list of voters, in NGP VAN format
parties = 'all'
outformat = ''

allvoters = pd.DataFrame([])
votefilenames = glob.glob('../2017 Voting data/*/ElectionHistory_cleaned.csv')
for votefilename in votefilenames: 
        
    # Get voting data from file
    lastslash = votefilename.rfind('/')
    county = votefilename[votefilename[:lastslash].rfind('/')+1:lastslash]
    print('{}: getting voter data'.format(county))
    df = pd.read_csv(votefilename, dtype=nametypes)
    df = df.fillna('')
    df['county'] = county.title()

    # Only keep one record per voter (we don't care about how they voted at this point)
    df.drop_duplicates(subset='voter id', inplace=True, keep='last')
    print('{}: found {} people'.format(county, len(df)))

    # Do any asked-for filtering
    if parties != 'all':
        df = df[df['party code'] == parties]

    # Format and add to stack
    if outformat == 'NGPVAN':
        df = njvotes.format_for_ngpvan(df)
    elif outformat == 'nationbuilder':
        df = njvotes.format_for_nationbuilder(df)
        
    allvoters = allvoters.append(df)

    # Output data for individual counties
    df.to_csv('../2017 Voter data/NJvoters_{}{}parties{}.csv'.format(county, parties, outformat), index=False)
    if outformat == 'NGPVAN':
        write_ngpvan(df, '../2017 Voter data/NJvoters_{}{}parties{}'.format(county, parties, outformat))
        
outfile = '../2017 voter data/NJvoters_{}{}parties{}.csv'.format('', parties, outformat)
allvoters.to_csv(outfile, index=False)

ATLANTIC: getting voter data
ATLANTIC: found 177515 people
BERGEN: getting voter data
BERGEN: found 590662 people
BURLINGTON: getting voter data
BURLINGTON: found 305670 people
CAMDEN: getting voter data
CAMDEN: found 349823 people
CAPE MAY: getting voter data
CAPE MAY: found 69328 people
CUMBERLAND: getting voter data
CUMBERLAND: found 92132 people
ESSEX: getting voter data
ESSEX: found 538696 people
GLOUCESTER: getting voter data
GLOUCESTER: found 209024 people
HUDSON: getting voter data
HUDSON: found 349418 people
HUNTERDON: getting voter data
HUNTERDON: found 94662 people
MERCER: getting voter data
MERCER: found 237400 people
MIDDLESEX: getting voter data
MIDDLESEX: found 520526 people
MONMOUTH: getting voter data
MONMOUTH: found 450494 people
MORRIS: getting voter data
MORRIS: found 346374 people
OCEAN: getting voter data
OCEAN: found 402277 people
PASSAIC: getting voter data
PASSAIC: found 303723 people
SALEM: getting voter data
SALEM: found 45097 people
SOMERSET: getting voter d

# Double-check on voter numbers above

In [7]:
votesizes = pd.DataFrame([['ATLANTIC', 177515], ['BERGEN', 590662], 
                         ['BURLINGTON', 305670], ['CAMDEN', 349823], 
                         ['CAPE MAY', 69328], ['CUMBERLAND', 92132],
                         ['ESSEX', 538696], ['GLOUCESTER', 209024],
                         ['HUDSON', 349418], ['HUNTERDON', 94662],
                         ['MERCER', 237400], ['MIDDLESEX', 520526],
                         ['MONMOUTH', 450494], ['MORRIS', 346374],
                         ['OCEAN', 402277], ['PASSAIC', 303723],
                         ['SALEM', 45097], ['SOMERSET', 225442],
                         ['SUSSEX', 104472], ['UNION', 331293],
                         ['WARREN', 77820]], columns=['county', 'voters'])
votesizes.sort_values('voters', ascending=False)

Unnamed: 0,county,voters
1,BERGEN,590662
6,ESSEX,538696
11,MIDDLESEX,520526
12,MONMOUTH,450494
14,OCEAN,402277
3,CAMDEN,349823
8,HUDSON,349418
13,MORRIS,346374
19,UNION,331293
2,BURLINGTON,305670
