In [1]:
import geocoder
import pandas as pd
import os
from time import sleep
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut

In [None]:
# load in csv file to be used for df
df = pd.read_csv('../Documents/example.csv', encoding = 'latin1', 
                index_col=['Name'])

#print(df.columns.tolist())

temp_streets = df['Address'].tolist()  #field in csv that holds the street address
temp_cities = df['City'].astype(str).tolist()  #field in csv that holds the city 
temp_states = df['State'].tolist()  #field in csv that holds the state

temp_list = [] # temporary list to hold concatenated addresses

range_length = df.shape[0]
range_length

In [None]:
# filter out PO Boxes and other mailboxes that don't represent street address locations
for i in range(0, range_length):
    if pd.isnull(temp_streets[i]) == False and "PO BOX" not in temp_streets[i] and "MLBX" not in temp_streets[i]:
        temp_list.append(temp_streets[i] + " " + temp_cities[i] + ", " + temp_states[i])
    else:
        temp_list.append(temp_cities[i] + ", " + temp_states[i])

df['Concatenated_Addresses'] = temp_list # new field in data frame that holds combined addresses

no_address = [] # will hold addresses that either didn't work with geopy or weren't complete
latitudes = [] # will hold latitudes produced from geopy
longitudes = [] # will hold longitudes produced from geopy

# Iterate through the concatenated list of addresses and run it through Bing's location API
# If the address is returned, assign it the respective latitude and longitudes.  If the 
# address is not found, place an X in the no_address list to later run through Google's API
for address in temp_list:
    geolocator = Nominatim()
    try:
        location = geolocator.geocode(address, timeout=10)
        if location == None:
            no_address.append(address)
            latitudes.append('X')
            longitudes.append('X')
        else:
            no_address.append('X')
            latitudes.append(location.latitude)
            longitudes.append(location.longitude)
    except GeocoderTimedOut as e:
        print("Error: geocode failed on input %s with message %s"%(address, e.msg))

print('Step 1 complete')

In [None]:
needs_geocode_2 = [] # list of addresses that need to be run through Google's location API
google_geocoded = [] # list of addresses that were found in Google's API
coordinates = [] # list of coordinates for the found addresses

# check the list of addresses previously run and add addresses to needs_geocode_2 if they need
# to be run through Google's location API
for address in no_address:
    if address != 'X':
        needs_geocode_2.append(address)
    else:
        needs_geocode_2.append('X')

# run the faulty addresses through Google's location API.  Make sure to set the timeout to at 
# least 5, otherwise you'll exceed search rates and the process will quit
for address in needs_geocode_2:
    if address != 'X':
        google_geocoded.append(geocoder.google(address, timeout=10))
    else:
        google_geocoded.append('X')

# add the coordinates of the returned addresses from Google to the coordinates list
for place in google_geocoded:
    if place != 'X':
        try:
            coordinates.append(place.latlng)
        except:
            coordinates.append('X')
    else:
        coordinates.append('X')

print(coordinates)

In [None]:
new_latitudes = [] # list will hold new latitudes for combined Bing and Google geocoded addresses
new_longitudes = [] # list will hold new longitudes for combined Bing and Google addresses

# if there are any X's left over in the lists, the associated addresses was not found through 
# Bing or Google.  If there isn't an X place holder, add the new lat or long to their respective
# lists.
for i in range(0, range_length):
    if latitudes[i] != 'X':
        new_latitudes.append(latitudes[i])
        new_longitudes.append(longitudes[i])
    elif len(coordinates[i]) == 0:
        new_latitudes.append('X')
        new_longitudes.append('X')
    else:
        new_latitudes.append(coordinates[i][0])
        new_longitudes.append(coordinates[i][1])
        
print(new_latitudes)
print('\n' + '\n')
print(new_longitudes)

df['Latitude'] = new_latitudes # add new Latitude field to dataframe
df['Longitude'] = new_longitudes # add new Longitude field to dataframe

df.to_csv('../Documents/example.csv')