# Geocode previous travels to lat-long

See [this blog post](http://geoffboeing.com/2016/06/mapping-google-location-history-python/) for my full write-up of this project.

This notebook takes a spreadsheet of place names, city, state, country, and date and geocodes each row to lat-long with full local caching.

In [1]:
import pandas as pd, time, requests, json, os.path

In [2]:
pause = 0.1
previous_travels_file = 'data/previous-travels-original.csv'

In [3]:
# configure local caching
geocode_cache_filename = 'data/geocode_cache.js'
geocode_cache = json.load(open(geocode_cache_filename)) if os.path.isfile(geocode_cache_filename) else {}

In [4]:
# read in the un-geocoded list of places i've visited before smartphone/gps
df = pd.read_csv(previous_travels_file, encoding='utf-8')
print('{:,} rows in dataset'.format(len(df)))
df.head()

318 rows in dataset


Unnamed: 0,place,city,state,country,date
0,,Bach,,Austria,2009-06
1,Hundertwasserhaus,Vienna,,Austria,2009-06
2,Schonbrunn Palace,Vienna,,Austria,2009-06
3,St. Stephen's Cathedral,Vienna,,Austria,2009-06
4,,Siem Reap,,Cambodia,2011-02


In [5]:
# combine place + state/country into an address column for geocoding
def make_address(row):
    address = '{}, {}, {}, {}'.format(row['place'], row['city'], row['state'], row['country'])
    return address.replace('nan, ', '')

df['address'] = df.apply(make_address, axis=1)
df.head()

Unnamed: 0,place,city,state,country,date,address
0,,Bach,,Austria,2009-06,"Bach, Austria"
1,Hundertwasserhaus,Vienna,,Austria,2009-06,"Hundertwasserhaus, Vienna, Austria"
2,Schonbrunn Palace,Vienna,,Austria,2009-06,"Schonbrunn Palace, Vienna, Austria"
3,St. Stephen's Cathedral,Vienna,,Austria,2009-06,"St. Stephen's Cathedral, Vienna, Austria"
4,,Siem Reap,,Cambodia,2011-02,"Siem Reap, Cambodia"


In [6]:
def geocode(address):
    
    global geocode_cache
    
    if address in geocode_cache:
        return geocode_cache[address]
    else:
        time.sleep(pause)
        url = 'http://maps.googleapis.com/maps/api/geocode/json?sensor=false&address={}'
        request = url.format(address)
        response = requests.get(request)
        data = response.json()
        if len(data['results']) > 0:
            latitude = data['results'][0]['geometry']['location']['lat']
            longitude = data['results'][0]['geometry']['location']['lng']
            latlng = '{},{}'.format(latitude, longitude)
            geocode_cache[address] = latlng            
            return latlng

In [7]:
# geocode each place to lat-long
df['latlng'] = df['address'].map(geocode)
df.head()

Unnamed: 0,place,city,state,country,date,address,latlng
0,,Bach,,Austria,2009-06,"Bach, Austria","47.2632631,10.3977463"
1,Hundertwasserhaus,Vienna,,Austria,2009-06,"Hundertwasserhaus, Vienna, Austria","48.20722199999999,16.394167"
2,Schonbrunn Palace,Vienna,,Austria,2009-06,"Schonbrunn Palace, Vienna, Austria","48.18486480000001,16.3122398"
3,St. Stephen's Cathedral,Vienna,,Austria,2009-06,"St. Stephen's Cathedral, Vienna, Austria","48.2084943,16.373156"
4,,Siem Reap,,Cambodia,2011-02,"Siem Reap, Cambodia","13.3670968,103.8448134"


In [8]:
# couldn't geocode the following rows
df[pd.isnull(df['latlng'])]

Unnamed: 0,place,city,state,country,date,address,latlng


In [9]:
# split lat and lon into separate columns
df['lat'] = df['latlng'].map(lambda x: x.split(',')[0] if isinstance(x, str) else None)
df['lon'] = df['latlng'].map(lambda x: x.split(',')[1] if isinstance(x, str) else None)
df.head()

Unnamed: 0,place,city,state,country,date,address,latlng,lat,lon
0,,Bach,,Austria,2009-06,"Bach, Austria","47.2632631,10.3977463",47.2632631,10.3977463
1,Hundertwasserhaus,Vienna,,Austria,2009-06,"Hundertwasserhaus, Vienna, Austria","48.20722199999999,16.394167",48.20722199999999,16.394167
2,Schonbrunn Palace,Vienna,,Austria,2009-06,"Schonbrunn Palace, Vienna, Austria","48.18486480000001,16.3122398",48.18486480000001,16.3122398
3,St. Stephen's Cathedral,Vienna,,Austria,2009-06,"St. Stephen's Cathedral, Vienna, Austria","48.2084943,16.373156",48.2084943,16.373156
4,,Siem Reap,,Cambodia,2011-02,"Siem Reap, Cambodia","13.3670968,103.8448134",13.3670968,103.8448134


In [10]:
# remove un-needed columns
df = df.drop(labels=['address', 'latlng'], axis=1, inplace=False)
df.head()

Unnamed: 0,place,city,state,country,date,lat,lon
0,,Bach,,Austria,2009-06,47.2632631,10.3977463
1,Hundertwasserhaus,Vienna,,Austria,2009-06,48.20722199999999,16.394167
2,Schonbrunn Palace,Vienna,,Austria,2009-06,48.18486480000001,16.3122398
3,St. Stephen's Cathedral,Vienna,,Austria,2009-06,48.2084943,16.373156
4,,Siem Reap,,Cambodia,2011-02,13.3670968,103.8448134


In [11]:
# save geocoded travel destinations to csv and cache to disk
df.to_csv('data/previous-travels-geocoded.csv', index=False, encoding='utf-8')
with open(geocode_cache_filename, 'w', encoding='utf-8') as cache_file:
    cache_file.write(json.dumps(geocode_cache))