ENV: **google_env**

# Geocoding with Google Maps

In [1]:
import googlemaps
from geopy.geocoders import GoogleV3

import warnings
warnings.filterwarnings('ignore')

In [2]:
file = open('plus_geocoding_key.key', 'r')
key2 = file.read()

In [3]:
gmaps = googlemaps.Client(key=key2)
g = GoogleV3(key2)

In [4]:
# Test the geocoder
geocode_result = gmaps.geocode('Santa Clara California')
geocode_result

[{'address_components': [{'long_name': 'Santa Clara',
    'short_name': 'Santa Clara',
    'types': ['locality', 'political']},
   {'long_name': 'Santa Clara County',
    'short_name': 'Santa Clara County',
    'types': ['administrative_area_level_2', 'political']},
   {'long_name': 'California',
    'short_name': 'CA',
    'types': ['administrative_area_level_1', 'political']},
   {'long_name': 'United States',
    'short_name': 'US',
    'types': ['country', 'political']}],
  'formatted_address': 'Santa Clara, CA, USA',
  'geometry': {'bounds': {'northeast': {'lat': 37.418939, 'lng': -121.9297351},
    'southwest': {'lat': 37.3228419, 'lng': -122.00537}},
   'location': {'lat': 37.3541079, 'lng': -121.9552356},
   'location_type': 'APPROXIMATE',
   'viewport': {'northeast': {'lat': 37.418939, 'lng': -121.9297351},
    'southwest': {'lat': 37.3228419, 'lng': -122.00537}}},
  'place_id': 'ChIJk8EIXIG3j4ARwL_Ao3ykdeQ',
  'types': ['locality', 'political']}]

In [5]:
import pandas as pd
import requests, logging, time 

logger = logging.getLogger("root")
logger.setLevel(logging.DEBUG)

# create console handler
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
logger.addHandler(ch)

API_KEY = key2

In [6]:
#Defining files to be geocoded
unique_files = ['LOC_FAC_GPE.csv']

# Defining the address columns within corresponding files 
address_column_names = ['LOC_FAC_GPE']

In [7]:
# Defining path for each file
ressources = 'Geocoding/unq_locations/' # folder name 
unique_files = [ressources  + elem + '.csv' for elem in address_column_names]
unique_files

['Geocoding/unq_locations/LOC_FAC_GPE.csv']

In [8]:
# Specifying the results column
results = 'Geocoding/google_coords'
output_filenames =[results +'/google_' + elem + '.csv' for elem in address_column_names]
output_filenames

['Geocoding/google_coords/google_LOC_FAC_GPE.csv']

In [9]:
#------------------	FUNCTION DEFINITIONS ------------------------

def get_google_results(LOC, api_key=key2, return_full_response=True):
    """
    Get geocode results from Google Maps Geocoding API.
    
    Note, that in the case of multiple google geocode results, this function returns details of the FIRST result.
    
    @param address: String address as accurate as possible.
    @param api_key: String API key from google. 
                    
    @param return_full_response: Boolean to indicate if you'd like to return the full response from google. This
                    is useful if you'd like additional location details for storage or parsing later.
    """
    # Set up the Geocoding url, defining the bounding box and the input address format
    geocode_url = "https://maps.googleapis.com/maps/api/geocode/json?address={}".format(LOC)
    if api_key is not None:
        geocode_url = geocode_url + "&key={}".format(api_key)
        
    # Ping google for the reuslts:
    results = requests.get(geocode_url)
    # Results will be in JSON format - convert to dict using requests functionality
    results = results.json()
    
    # if there's no results or an error, return empty results.
    if len(results['results']) == 0:
        output = {
            "formatted_address" : None,
            "latitude": None,
            "longitude": None,
        }
    else:    
        answer = results['results'][0]
        output = {
            "formatted_address" : answer.get('formatted_address'),
            "lat": answer.get('geometry').get('location').get('lat'),
            "long": answer.get('geometry').get('location').get('lng'),
            "NE_lat": answer.get('geometry').get('viewport').get('northeast').get('lat'),
            "NE_long": answer.get('geometry').get('viewport').get('northeast').get('lng'),
            "SW_lat": answer.get('geometry').get('viewport').get('southwest').get('lat'),
            "SW_long": answer.get('geometry').get('viewport').get('southwest').get('lng'),
            "accuracy": answer.get('geometry').get('location_type'),
        }
        
    # Append some other details:    
    output['input_string'] = LOC
    output['number_of_results'] = len(results['results'])
    output['status'] = results.get('status')
    if return_full_response is True:
        output['response'] = results
    
    return output



In [10]:
for unique_file, output_filename, address_column_name in zip(unique_files, output_filenames, address_column_names):
    data = pd.read_csv(unique_file)
    data = data.dropna()
    #df1.tail() 
    input_filename = data
    RETURN_FULL_RESULTS = False
    
    if address_column_name not in data.columns:
        raise ValueError("Missing Address column in input data")

    # Form a list of addresses for geocoding:
    # Make a big list of all of the addresses to be processed.
    addresses = data[address_column_name].tolist()
   
    # Create a list to hold results
    results = []

    # Go through each address in turn
    for address in addresses:
        # While the address geocoding is not finished:
        geocoded = False
        while geocoded is not True:
            # Geocode the address with google
            try:
                geocode_result = get_google_results(address, API_KEY, return_full_response=RETURN_FULL_RESULTS)
            except Exception as e:
                logger.exception(e)
                logger.error("Major error with {}".format(address))
                logger.error("Skipping!")
                geocoded = True

            # If we're over the API limit, backoff for a while and try again later.
            if geocode_result['status'] == 'OVER_QUERY_LIMIT':
                logger.info("Hit Query Limit! Backing off for a bit.")
                time.sleep(BACKOFF_TIME * 30) # sleep for 30 minutes
                geocoded = False
            else:
                # If we're ok with API use, save the results
                # Note that the results might be empty / non-ok - log this
                if geocode_result['status'] != 'OK':
                    logger.warning("Error geocoding {}: {}".format(address, geocode_result['status']))
                logger.debug("Geocoded: {}: {}".format(address, geocode_result['status']))
                results.append(geocode_result)           
                geocoded = True

        # Print status every 100 addresses
        if len(results) % 100 == 0:
            logger.info("Completed {} of {} address".format(len(results), len(addresses)))

        # Every 50 addresses, save progress to file(in case of a failure so you have something!)
        if len(results) % 50 == 0:
            pd.DataFrame(results).to_csv("{}_bak".format(output_filename))

    # All done
    logger.info("Finished geocoding all addresses")

    # Write the full results to csv using the pandas library.
    pd.DataFrame(results).to_csv(output_filename, encoding='utf8')

Starting new HTTPS connection (1): maps.googleapis.com:443
https://maps.googleapis.com:443 "GET /maps/api/geocode/json?address=11th,%20Shaw%20Dog%20Park,%20Rhode%20Island&key=AIzaSyDaGxRFDEt40Jb3InMZVzrUOQs6bqiUSh8 HTTP/1.1" 200 None
Geocoded: 11th, Shaw Dog Park, Rhode Island: OK
Starting new HTTPS connection (1): maps.googleapis.com:443
https://maps.googleapis.com:443 "GET /maps/api/geocode/json?address=3rd%20Floor,%20DSCC%20Building20,%20Columbus,%20OH&key=AIzaSyDaGxRFDEt40Jb3InMZVzrUOQs6bqiUSh8 HTTP/1.1" 200 None
Geocoded: 3rd Floor, DSCC Building20, Columbus, OH: OK
Starting new HTTPS connection (1): maps.googleapis.com:443
https://maps.googleapis.com:443 "GET /maps/api/geocode/json?address=Central%20Park,%20Midtown%20South,%20Madison%20Square%20Garden,%20NYC,%20New%20York&key=AIzaSyDaGxRFDEt40Jb3InMZVzrUOQs6bqiUSh8 HTTP/1.1" 200 None
Geocoded: Central Park, Midtown South, Madison Square Garden, NYC, New York: OK
Starting new HTTPS connection (1): maps.googleapis.com:443
https://m