## Geocoding
### Author: Caleb Easterly

In [76]:
import os
import pandas as pd
import re
import csv

# change directory
os.chdir("C:\\Users\\caleb\\OneDrive - University of North Carolina at Chapel Hill\\Documents\\Projects\\Cancer care crowdfunding")

analytic_file = pd.read_csv("GoFundMeUU/data/analytic_file.csv",
    lineterminator='\n', encoding='utf-8', index_col="URL", low_memory=False)

Get deduplicated list of locations.

In [77]:
locations = analytic_file["Location"]
locations.reset_index(drop=True, inplace=True)
locations.drop_duplicates(inplace=True)

Set up Google Maps API

In [78]:
import googlemaps
# key stored in untracked file
with open("key.txt") as k:
    api_key = k.readline().strip()
gmaps = googlemaps.Client(key=api_key)

Geocode to get county, state, and zip code if available.

In [82]:
# dataset to hold results
geocoded = pd.DataFrame(columns=['OrigLocation', 'County', 'State', 'Country', 'PostalCode', 'Error'])
geocoded.OrigLocation = locations

for ind in geocoded.index:
    orig_loc = geocoded.loc[ind, "OrigLocation"]

    # api call
    apireturn = gmaps.geocode(orig_loc)
    if len(apireturn) == 0:
        # called api and got error
        geocoded.loc[ind, 'Error'] = 1
        continue
    # extract county and state
    acmp = apireturn[0]['address_components']
    for comp in acmp:
        if 'administrative_area_level_1' in comp['types']:
            state = comp['long_name']
        if 'administrative_area_level_2' in comp['types']:
            county = comp['long_name']
        if 'postal_code' in comp['types']:
            zip = comp['long_name']
        if 'country' in comp['types']:
            country = comp['long_name']
    # put in dataset
    geocoded.loc[ind, 'County'] = county
    geocoded.loc[ind, 'State'] = state
    geocoded.loc[ind, 'Country'] = country
    geocoded.loc[ind, 'PostalCode'] = zip
    # clear out variables
    county = ''
    state = ''
    zip = ''
    country = ''

Write out coded data to file.

In [80]:
geocoded.to_csv("GoFundMeUU/data/geocodes.csv",
    line_terminator='\n', index=False, encoding='utf-8', quoting=csv.QUOTE_ALL)