# Reverse geocode latitude-longitude to city + country, worldwide

This notebook reverse geocodes a lat-long data set to city + country. 

More info: http://geoffboeing.com/2014/08/visualizing-summer-travels/

For an advanced version that uses local caching, see [this notebook](https://github.com/gboeing/data-visualization/blob/master/location-history/google-location-history-reverse-geocode.ipynb) and [this write-up](http://geoffboeing.com/2016/06/mapping-everywhere-ever-been/) of that project.

In [1]:
# import necessary modules
import pandas as pd, requests, logging, time

# magic command to display matplotlib plots inline within the ipython notebook
%matplotlib inline

In [2]:
# configure logging for our tool
lfh = logging.FileHandler('logs/reverse_geocoder.log', mode='w', encoding='utf-8')
lfh.setFormatter(logging.Formatter('%(levelname)s %(asctime)s %(message)s'))
log = logging.getLogger('reverse_geocoder')
log.setLevel(logging.INFO)
log.addHandler(lfh)
log.info('process started')

In [3]:
# load the gps coordinate data
df = pd.read_csv('data/summer-travel-gps-no-city-country.csv', encoding='utf-8')

# create new columns
df['geocode_data'] = ''
df['city'] = ''
df['country'] = ''

df.head()

Unnamed: 0,lat,lon,date,geocode_data,city,country
0,51.481292,-0.451011,05/14/2014 09:07,,,
1,51.474005,-0.450999,05/14/2014 09:22,,,
2,51.478199,-0.446081,05/14/2014 10:51,,,
3,51.478199,-0.446081,05/14/2014 11:24,,,
4,51.474146,-0.451562,05/14/2014 11:38,,,


In [4]:
# function that handles the geocoding requests
def reverse_geocode(latlng):
    time.sleep(0.1)
    url = 'https://maps.googleapis.com/maps/api/geocode/json?latlng={0}'    
    request = url.format(latlng)
    log.info(request)
    response = requests.get(request)
    data = response.json()
    if 'results' in data and len(data['results']) > 0:
        return data['results'][0]

In [5]:
# create concatenated lat+lng column then reverse geocode each value
df['latlng'] = df.apply(lambda row: '{},{}'.format(row['lat'], row['lon']), axis=1)
df['geocode_data'] = df['latlng'].map(reverse_geocode)
df.head()

Unnamed: 0,lat,lon,date,geocode_data,city,country,latlng
0,51.481292,-0.451011,05/14/2014 09:07,"{'address_components': [{'long_name': '30', 't...",,,"51.481291600000006,-0.4510112"
1,51.474005,-0.450999,05/14/2014 09:22,{'address_components': [{'long_name': 'Croydon...,,,"51.474005,-0.4509991"
2,51.478199,-0.446081,05/14/2014 10:51,"{'address_components': [{'long_name': '7912', ...",,,"51.478199100000005,-0.44608100000000006"
3,51.478199,-0.446081,05/14/2014 11:24,"{'address_components': [{'long_name': '7912', ...",,,"51.478199100000005,-0.44608100000000006"
4,51.474146,-0.451562,05/14/2014 11:38,{'address_components': [{'long_name': 'Croydon...,,,"51.4741456,-0.4515622"


In [6]:
# identify municipality and country data in the json that google sent back
def parse_city(geocode_data):
    if (not geocode_data is None) and ('address_components' in geocode_data):
        for component in geocode_data['address_components']:
            if 'locality' in component['types']:
                return component['long_name']
            elif 'postal_town' in component['types']:
                return component['long_name']
            elif 'administrative_area_level_2' in component['types']:
                return component['long_name']
            elif 'administrative_area_level_1' in component['types']:
                return component['long_name']
    return None

def parse_country(geocode_data):
    if (not geocode_data is None) and ('address_components' in geocode_data):
        for component in geocode_data['address_components']:
            if 'country' in component['types']:
                return component['long_name']
    return None

In [7]:
df['city'] = df['geocode_data'].map(parse_city)
df['country'] = df['geocode_data'].map(parse_country)
print(len(df))
df.head()

1759


Unnamed: 0,lat,lon,date,geocode_data,city,country,latlng
0,51.481292,-0.451011,05/14/2014 09:07,"{'address_components': [{'long_name': '30', 't...",Heathrow,United Kingdom,"51.481291600000006,-0.4510112"
1,51.474005,-0.450999,05/14/2014 09:22,{'address_components': [{'long_name': 'Croydon...,Longford,United Kingdom,"51.474005,-0.4509991"
2,51.478199,-0.446081,05/14/2014 10:51,"{'address_components': [{'long_name': '7912', ...",Longford,United Kingdom,"51.478199100000005,-0.44608100000000006"
3,51.478199,-0.446081,05/14/2014 11:24,"{'address_components': [{'long_name': '7912', ...",Longford,United Kingdom,"51.478199100000005,-0.44608100000000006"
4,51.474146,-0.451562,05/14/2014 11:38,{'address_components': [{'long_name': 'Croydon...,Longford,United Kingdom,"51.4741456,-0.4515622"


In [8]:
# google's geocoder fails on anything in kosovo, so do those manually now
df.loc[df['country']=='', 'country'] = 'Kosovo'
df.loc[df['city']=='', 'city'] = 'Prizren'

In [9]:
# save our reverse-geocoded data set
df.to_csv('data/summer-travel-gps-full.csv', encoding='utf-8', index=False)