# Democracy Works - Silas Lee

### Imports, parsing test

In [203]:
import pandas as pd # data processing, Excel file I/O (e.g. pd.read_csv)

import usaddress
addr='974 Great Plain Avenue Needham MA 02492 USA CON-069 CT069'
usaddress.parse(addr)


[('974', 'AddressNumber'),
 ('Great', 'StreetName'),
 ('Plain', 'StreetName'),
 ('Avenue', 'StreetNamePostType'),
 ('Needham', 'PlaceName'),
 ('MA', 'StateName'),
 ('02492', 'ZipCode'),
 ('USA', 'CountryName'),
 ('CON-069', 'OccupancyIdentifier'),
 ('CT069', 'OccupancyIdentifier')]

### Read Addresses and create ID column

In [204]:
addresses = pd.read_excel('addresses.csv.xlsx')
addresses['ID'] = addresses['State'] + addresses['Precinct ID'].astype(str).str[-3:] #Create ID Column to match Precinct
display(addresses.head(10))

Unnamed: 0,Street,Apt,City,State,Zip,Precinct ID,ID
0,7 BEACON BLVD,,PEABODY,MA,01960-6303,025-090,MA090
1,1847 TOWER DR,,STOUGHTON,WI,53589-3539,055-015,WI015
2,4628 GREEN VALLEY RD,,FAIRFIELD,CA,94534-1368,006-009,CA009
3,14 HALET ST,,PORTLAND,ME,04102-1734,023-089,ME089
4,1606 BLACKBURN HEIGHTS DR,,SEWICKLEY,PA,15143-8626,042-018,PA018
5,405 LAUREL PL,,MACON,GA,31220-8761,013-041,GA041
6,1011 VALENCIA AVE,,CORAL GABLES,FL,33134-5536,012-051,FL051
7,43 BOYDEN ST,,BROCKTON,MA,02302-2303,025-111,MA111
8,1051 E STEARNS AVE,APT 36,LA HABRA,CA,90631-4857,006-021,CA021
9,10 RUSTIC DR,,N BRUNSWICK,NJ,08902-4706,034-010,NJ010


### Read Polling Addresses and change column names for merge

In [205]:
polling = pd.read_excel('precinct_polling_list.csv.xlsx')
polling.columns = ['Poll_street', 'Poll_city','Poll_state/zip', 'Poll_country', 'Precinct'] #to prevent duplicate column names
display(polling.head(10))

Unnamed: 0,Poll_street,Poll_city,Poll_state/zip,Poll_country,Precinct
0,288 York Street,New Haven,CT 06511,USA,CON-069
1,301-399 South Boulevard Drive,Bainbridge,GA 39819,USA,GEO-062
2,150-151 Tremont Street,Boston,MA 02111,USA,MAS-111
3,2395 Ingleside Avenue,Macon,GA 31204,USA,GEO-041
4,1007 Merchant Street,Ambridge,PA 15003,USA,MAS-018
5,859 Washington Avenue,Miami Beach,FL 33139,USA,FLO-051
6,974 Great Plain Avenue Needham MA 02492,USA,MAS-006,,
7,139 Lynnfield Street,Peabody,MA 01960,USA,MAS-090
8,180 Nassau Street,Princeton,NJ 08542,USA,NEWJ-010
9,563 Carlsbad Village Drive,Carlsbad,CA 92008,USA,CAL-032


### Create Polling Address column, address missing values

In [206]:
polling.fillna('', inplace=True) #for later parsing,s tring is necessary
polling['Polling Address'] = polling['Poll_street'].astype(str) + ' ' + polling['Poll_city'].astype(str) + ' ' + polling['Poll_state/zip'].astype(str) + ' ' + polling['Poll_country'] + ' ' + polling['Precinct']
display(polling.head(10))

Unnamed: 0,Poll_street,Poll_city,Poll_state/zip,Poll_country,Precinct,Polling Address
0,288 York Street,New Haven,CT 06511,USA,CON-069,288 York Street New Haven CT 06511 USA CON-069
1,301-399 South Boulevard Drive,Bainbridge,GA 39819,USA,GEO-062,301-399 South Boulevard Drive Bainbridge GA 39...
2,150-151 Tremont Street,Boston,MA 02111,USA,MAS-111,150-151 Tremont Street Boston MA 02111 USA MAS...
3,2395 Ingleside Avenue,Macon,GA 31204,USA,GEO-041,2395 Ingleside Avenue Macon GA 31204 USA GEO-041
4,1007 Merchant Street,Ambridge,PA 15003,USA,MAS-018,1007 Merchant Street Ambridge PA 15003 USA MAS...
5,859 Washington Avenue,Miami Beach,FL 33139,USA,FLO-051,859 Washington Avenue Miami Beach FL 33139 USA...
6,974 Great Plain Avenue Needham MA 02492,USA,MAS-006,,,974 Great Plain Avenue Needham MA 02492 USA MA...
7,139 Lynnfield Street,Peabody,MA 01960,USA,MAS-090,139 Lynnfield Street Peabody MA 01960 USA MAS-090
8,180 Nassau Street,Princeton,NJ 08542,USA,NEWJ-010,180 Nassau Street Princeton NJ 08542 USA NEWJ-010
9,563 Carlsbad Village Drive,Carlsbad,CA 92008,USA,CAL-032,563 Carlsbad Village Drive Carlsbad CA 92008 U...


### Redistribute stray address labels in table

In [207]:
for index, row in polling.iterrows():
    if row['Precinct'] == '':
        street_address = ''
        city = ''
        state_zip = ''
        country = ''
        precinct = ''
        for part, part_type in usaddress.parse(row['Polling Address']):
            if part_type == "AddressNumber" or part_type == 'StreetNamePreDirectional' or part_type == 'StreetName' or part_type == 'StreetNamePostType':
                street_address += part
                street_address += ' '
            elif part_type == 'PlaceName':
                city += part
                city += ' '
            elif part_type == "StateName" or part_type == 'ZipCode':
                state_zip += part
                state_zip += ' '
            elif part_type == 'CountryName':
                country += part
                country += ' '
            elif part_type == 'OccupancyIdentifier':
                precinct += part
                precinct += ' '
       
        row['Poll_street'] = street_address.strip()
        row['Poll_city'] = city.strip()
        row['Poll_state/zip'] = state_zip.strip()
        row['Poll_country'] = country.strip()
        row['Precinct'] = precinct.strip()
        
display(polling)

Unnamed: 0,Poll_street,Poll_city,Poll_state/zip,Poll_country,Precinct,Polling Address
0,288 York Street,New Haven,CT 06511,USA,CON-069,288 York Street New Haven CT 06511 USA CON-069
1,301-399 South Boulevard Drive,Bainbridge,GA 39819,USA,GEO-062,301-399 South Boulevard Drive Bainbridge GA 39...
2,150-151 Tremont Street,Boston,MA 02111,USA,MAS-111,150-151 Tremont Street Boston MA 02111 USA MAS...
3,2395 Ingleside Avenue,Macon,GA 31204,USA,GEO-041,2395 Ingleside Avenue Macon GA 31204 USA GEO-041
4,1007 Merchant Street,Ambridge,PA 15003,USA,MAS-018,1007 Merchant Street Ambridge PA 15003 USA MAS...
5,859 Washington Avenue,Miami Beach,FL 33139,USA,FLO-051,859 Washington Avenue Miami Beach FL 33139 USA...
6,974 Great Plain Avenue,Needham,MA 02492,USA,MAS-006,974 Great Plain Avenue Needham MA 02492 USA MA...
7,139 Lynnfield Street,Peabody,MA 01960,USA,MAS-090,139 Lynnfield Street Peabody MA 01960 USA MAS-090
8,180 Nassau Street,Princeton,NJ 08542,USA,NEWJ-010,180 Nassau Street Princeton NJ 08542 USA NEWJ-010
9,563 Carlsbad Village Drive,Carlsbad,CA 92008,USA,CAL-032,563 Carlsbad Village Drive Carlsbad CA 92008 U...


### Now that Polling addresses are cleaned, drop the full address column and create polling ID to match Addresses

In [208]:
polling = polling.drop(['Polling Address'], axis=1)
polling['ID'] = polling['Poll_state/zip'].astype(str).str[:2] + polling['Precinct'].astype(str).str[-3:] #Create ID Column to match Precinct
display(polling.head())

Unnamed: 0,Poll_street,Poll_city,Poll_state/zip,Poll_country,Precinct,ID
0,288 York Street,New Haven,CT 06511,USA,CON-069,CT069
1,301-399 South Boulevard Drive,Bainbridge,GA 39819,USA,GEO-062,GA062
2,150-151 Tremont Street,Boston,MA 02111,USA,MAS-111,MA111
3,2395 Ingleside Avenue,Macon,GA 31204,USA,GEO-041,GA041
4,1007 Merchant Street,Ambridge,PA 15003,USA,MAS-018,PA018


### Merge the two tables, save as CSV file

In [215]:
merged = pd.merge(addresses, polling, how='left', on= 'ID')
display(merged)
merged.to_csv('silasleemerged.csv', index_label=False)

Unnamed: 0,Street,Apt,City,State,Zip,Precinct ID,ID,Poll_street,Poll_city,Poll_state/zip,Poll_country,Precinct
0,7 BEACON BLVD,,PEABODY,MA,01960-6303,025-090,MA090,139 Lynnfield Street,Peabody,MA 01960,USA,MAS-090
1,1847 TOWER DR,,STOUGHTON,WI,53589-3539,055-015,WI015,114 State Street,Madison,WI 53703,USA,WIS-015
2,4628 GREEN VALLEY RD,,FAIRFIELD,CA,94534-1368,006-009,CA009,3085 Jefferson Street,Napa,CA 94559,USA,CAL-009
3,14 HALET ST,,PORTLAND,ME,04102-1734,023-089,ME089,574 Congress Street,Portland,ME 04101,USA,MAI-089
4,1606 BLACKBURN HEIGHTS DR,,SEWICKLEY,PA,15143-8626,042-018,PA018,1007 Merchant Street,Ambridge,PA 15003,USA,MAS-018
5,405 LAUREL PL,,MACON,GA,31220-8761,013-041,GA041,2395 Ingleside Avenue,Macon,GA 31204,USA,GEO-041
6,1011 VALENCIA AVE,,CORAL GABLES,FL,33134-5536,012-051,FL051,859 Washington Avenue,Miami Beach,FL 33139,USA,FLO-051
7,43 BOYDEN ST,,BROCKTON,MA,02302-2303,025-111,MA111,150-151 Tremont Street,Boston,MA 02111,USA,MAS-111
8,1051 E STEARNS AVE,APT 36,LA HABRA,CA,90631-4857,006-021,CA021,108 West 2nd Street,Los Angeles,CA 90012,USA,CAL-021
9,10 RUSTIC DR,,N BRUNSWICK,NJ,08902-4706,034-010,NJ010,180 Nassau Street,Princeton,NJ 08542,USA,NEWJ-010
