Author: Geoff Boeing
<br />
Web: http://geoffboeing.com
<br />
Date: 2014-08-31
<br />
Description: This notebook reverse geocodes a lat-long data set to city + country. More info: http://geoffboeing.com/2014/08/visualizing-summer-travels/

In [2]:
# magic command to display matplotlib plots inline within the ipython notebook
%matplotlib inline

# import necessary modules
import pandas as pd
import json, requests, logging

In [3]:
# configure logging for our tool
lfh = logging.FileHandler('logs/reverseGeocoder.log')
lfh.setFormatter(logging.Formatter('%(levelname)s %(asctime)s %(message)s'))
log = logging.getLogger('reverseGeocoder')
log.setLevel(logging.INFO)
log.addHandler(lfh)

In [4]:
# load the gps coordinate data
df = pd.read_csv('data/summer-travel-gps-no-city-country.csv')

# create new columns
df['geocode_data'] = ''
df['city'] = ''
df['country'] = ''

df.head()

Unnamed: 0,lat,lon,date,geocode_data,city,country
0,51.481292,-0.451011,05/14/2014 09:07,,,
1,51.474005,-0.450999,05/14/2014 09:22,,,
2,51.478199,-0.446081,05/14/2014 10:51,,,
3,51.478199,-0.446081,05/14/2014 11:24,,,
4,51.474146,-0.451562,05/14/2014 11:38,,,


In [5]:
# function that handles the geocoding requests
def reverseGeocode(latlng):
    
    result = {}
    url = 'https://maps.googleapis.com/maps/api/geocode/json?latlng={0}&key={1}'
    apikey = 'YOUR-API-KEY-HERE'
    
    request = url.format(latlng, apikey)
    log.info(request)
    data = json.loads(requests.get(request).text)
    #log.info(data)
    if len(data['results']) > 0:
        result = data['results'][0]

    return result

In [None]:
# comment out the following line of code to geocode the entire dataframe
df = df.head()

for i, row in df.iterrows():
    # for each row in the dataframe, geocode the lat-long data
    df['geocode_data'][i] = reverseGeocode(df['lat'][i].astype(str) + ',' + df['lon'][i].astype(str))
    
    # once every 100 loops print a counter
    if i % 100 == 0: 
        print i,
        
df.head()

In [7]:
# identify municipality and country data in the blob that google sent back
for i, row in df.iterrows():
    if 'address_components' in row['geocode_data']:
        
        # first try to identify the country
        for component in row['geocode_data']['address_components']:
            if 'country' in component['types']:
                df['country'][i] = component['long_name']
        
        # now try to identify the municipality
        for component in row['geocode_data']['address_components']:
            if 'locality' in component['types']:
                df['city'][i] = component['long_name']
                break
            elif 'postal_town' in component['types']:
                df['city'][i] = component['long_name']
                break
            elif 'administrative_area_level_2' in component['types']:
                df['city'][i] = component['long_name']
                break
            elif 'administrative_area_level_1' in component['types']:
                df['city'][i] = component['long_name']
                break

df.head()

Unnamed: 0,lat,lon,date,geocode_data,city,country
0,51.481292,-0.451011,05/14/2014 09:07,{u'geometry': {u'location': {u'lat': 51.481069...,Heathrow,United Kingdom
1,51.474005,-0.450999,05/14/2014 09:22,{u'geometry': {u'location_type': u'APPROXIMATE...,Hounslow,United Kingdom
2,51.478199,-0.446081,05/14/2014 10:51,{u'geometry': {u'location_type': u'APPROXIMATE...,Hounslow,United Kingdom
3,51.478199,-0.446081,05/14/2014 11:24,{u'geometry': {u'location_type': u'APPROXIMATE...,Hounslow,United Kingdom
4,51.474146,-0.451562,05/14/2014 11:38,{u'geometry': {u'location_type': u'APPROXIMATE...,Hounslow,United Kingdom


In [8]:
# google's geocoder fails on anything in kosovo, so do those manually now
df.loc[df['country']=='', 'country'] = 'Kosovo'
df.loc[df['city']=='', 'city'] = 'Prizren'

In [9]:
# save our reverse-geocoded data set
#df.to_csv('data/summer-travel-gps-full.csv', encoding='utf-8', index=False)