In [1]:
import numpy as np
import pandas as pd
import googlemaps
from datetime import datetime
import os
import pickle
import re

In [2]:
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
gmaps = googlemaps.Client(key=GOOGLE_API_KEY)

In [3]:
with open('../data/df_city_subs.pkl', 'rb') as fp:
    df_city_subs = pickle.load(fp)
with open('../data/df_state_subs.pkl', 'rb') as fp:
    df_state_subs = pickle.load(fp)

In [4]:
geocode_result_states = []
for state in df_state_subs['state_name']:
    geocode_result_states.append(gmaps.geocode(state))

In [10]:
pickle.dump(geocode_result_states, open('../data/geocode_result_states.pkl', 'wb'))

In [6]:
df_city_states = pd.merge(df_city_subs, df_state_subs, on='state_fip')

In [7]:
df_city_states['city_state'] = df_city_states['city_short'] + ', ' + df_city_states['state_name']

In [8]:
geocode_result_cities = []
for city in df_city_states['city_state']:
    geocode_result_cities.append(gmaps.geocode(city))

In [9]:
pickle.dump(geocode_result_cities, open('../data/geocode_result_cities.pkl', 'wb'))

In [11]:
# Fix city naming and merge to dataframe
geo_latlng_list = []
for list in geocode_result_cities:
    for row in list:
        city_name_raw = row['address_components'][0]['long_name']
        if city_name_raw == 'Washington':
            city_name = 'Washington DC'
        elif city_name_raw == 'New York':
            city_name = 'New York City'
        elif city_name_raw == 'Saint Paul':
            city_name = 'St. Paul'
        elif city_name_raw == 'Saint Charles':
            city_name = 'St. Charles'
        else:
            city_name = row['address_components'][0]['long_name']
                
        formatted_address = row['formatted_address']
        state_abbr = re.search('.*, ([A-Z]{2}), USA', formatted_address).group(1)
        city_lat = row['geometry']['location']['lat']
        city_lng = row['geometry']['location']['lng']
        geo_latlng_list.append([city_name, state_abbr, city_lat, city_lng])

In [12]:
df_geo = pd.DataFrame(geo_latlng_list, columns=['city_name', 'state_abbr', 'city_lat', 'city_lng'])

In [13]:
df_geo_cities = pd.merge(df_city_states, df_geo, how='left', left_on=['city_short', 'state_abbr'], right_on=['city_name', 'state_abbr'])

In [15]:
df_geo_cities = df_geo_cities[['state_city_id', 'city_lat', 'city_lng']]

In [17]:
pickle.dump(df_geo_cities, open('../data/df_geo_cities.pkl', 'wb'))