Get the timezone for each venue's lat-long from the Google Maps timezone API: https://developers.google.com/maps/documentation/timezone/intro

Then convert each check-in's datetime to the timezone of the venue at which it was checked in.

In [1]:
import time, requests, pytz, pandas as pd
from keys import google_timezone_api_key
from dateutil import parser as date_parser

In [2]:
# define pause interval to not hammer their server
pause = 0.25

In [3]:
# load the data, parse datetime string to datetime object, and combine lat-long into single column
df = pd.read_csv('data/untappd_details_geocoded.csv', encoding='utf-8')
df['date_pacific_tz'] = df['date_pacific_tz'].map(lambda x: date_parser.parse(x))
df['venue_latlng'] = df.apply(lambda row: '{},{}'.format(row['venue_lat'], row['venue_lon']), axis=1)
df.head()

Unnamed: 0,date_pacific_tz,beer_name,beer_style,brewery_name,brewery_place,brewery_type,rating,beer_avg_rating,brewery_avg_rating,beer_abv,...,venue_lat,venue_lon,venue_total_checkins,checkin_id,beer_url,brewery_url,venue_url,brewery_lat,brewery_lon,venue_latlng
0,2016-06-11 23:14:09-07:00,Liberty Ale,IPA - American,Anchor Brewing Company,"San Francisco, CA United States",Micro Brewery,4.0,3.49,3.5,5.9,...,37.8665,-122.282,18.0,322871654,https://untappd.com/b/anchor-brewing-company-l...,https://untappd.com/w/anchor-brewing-company/3891,https://untappd.com/v/chateau-szibbo/2699353,37.774929,-122.419415,"37.8665,-122.282"
1,2016-06-11 22:46:58-07:00,Anchor IPA,IPA - American,Anchor Brewing Company,"San Francisco, CA United States",Micro Brewery,3.75,3.48,3.5,6.5,...,37.8665,-122.282,18.0,322868109,https://untappd.com/b/anchor-brewing-company-a...,https://untappd.com/w/anchor-brewing-company/3891,https://untappd.com/v/chateau-szibbo/2699353,37.774929,-122.419415,"37.8665,-122.282"
2,2016-06-11 21:06:04-07:00,Apocalypse IPA,IPA - American,10 Barrel Brewing Co.,"Bend, OR United States",Micro Brewery,4.0,3.78,3.67,6.8,...,37.8665,-122.282,18.0,322842097,https://untappd.com/b/10-barrel-brewing-co-apo...,https://untappd.com/w/10-barrel-brewing-co/3436,https://untappd.com/v/chateau-szibbo/2699353,44.058173,-121.31531,"37.8665,-122.282"
3,2016-06-11 21:03:12-07:00,Angel City IPA,IPA - American,Angel City Brewery,"Los Angeles, CA United States",Micro Brewery,3.5,3.56,3.47,6.1,...,37.8665,-122.282,18.0,322841026,https://untappd.com/b/angel-city-brewery-angel...,https://untappd.com/w/angel-city-brewery/4406,https://untappd.com/v/chateau-szibbo/2699353,34.052234,-118.243685,"37.8665,-122.282"
4,2016-06-11 17:21:37-07:00,Apocalypse IPA,IPA - American,10 Barrel Brewing Co.,"Bend, OR United States",Micro Brewery,4.0,3.78,3.67,6.8,...,37.8665,-122.282,18.0,322684843,https://untappd.com/b/10-barrel-brewing-co-apo...,https://untappd.com/w/10-barrel-brewing-co/3436,https://untappd.com/v/chateau-szibbo/2699353,44.058173,-121.31531,"37.8665,-122.282"


In [4]:
# how many total venue lat-longs are there, and how many unique lat-longs are there?
print len(df['venue_latlng'])

venue_latlngs_unique = pd.Series(df['venue_latlng'].unique())
print len(venue_latlngs_unique)

1435
427


In [5]:
venue_latlngs_unique = venue_latlngs_unique.sort_values()

In [6]:
# send each unique lat-long to the google timezone api to retrieve the local time zone id at that location
def get_timezone_google(latlng, timestamp=0):
    time.sleep(pause)
    url = u'https://maps.googleapis.com/maps/api/timezone/json?location={}&timestamp={}&key={}'
    request = url.format(latlng, timestamp, google_timezone_api_key)
    response = requests.get(request)
    data = response.json()
    try:
        return data['timeZoneId']
    except:
        return None
    
timezones = venue_latlngs_unique.map(get_timezone_google)

In [7]:
# create a dict with key of lat-long and value of timezone
latlng_timezone = {}
for label in timezones.index:
    key = venue_latlngs_unique[label]
    val = timezones[label]
    latlng_timezone[key] = val

In [8]:
# for each row in the df, look up the lat-long in the dict to get the local timezone
def get_timezone_from_dict(venue_latlng):
    try:
        return latlng_timezone[venue_latlng]
    except:
        return None

df['venue_timezone'] = df['venue_latlng'].map(get_timezone_from_dict)
df = df.drop('venue_latlng', axis=1)

In [9]:
# backfill timezones from the next earlier observation as this is more likely to be accurate
# compared to randomly using the default timezone
df['venue_timezone'] = df['venue_timezone'].fillna(method='bfill')

In [10]:
# convert each row's datetime to the local timezone of the venue i checked into
def localize_date_time(row):
    date_time = row['date_pacific_tz']
    local_timezone = row['venue_timezone']
    try:
        return date_time.astimezone(pytz.timezone(local_timezone))
    except:
        return None
    
df['date_local_tz'] = df.apply(localize_date_time, axis=1)

In [11]:
# look at the first 10 venues and their timezones
df[['venue_name', 'venue_place', 'venue_timezone', 'date_pacific_tz', 'date_local_tz']].head(10)

Unnamed: 0,venue_name,venue_place,venue_timezone,date_pacific_tz,date_local_tz
0,Chateau Szibbo,"Berkeley, CA",America/Los_Angeles,2016-06-11 23:14:09-07:00,2016-06-11 23:14:09-07:00
1,Chateau Szibbo,"Berkeley, CA",America/Los_Angeles,2016-06-11 22:46:58-07:00,2016-06-11 22:46:58-07:00
2,Chateau Szibbo,"Berkeley, CA",America/Los_Angeles,2016-06-11 21:06:04-07:00,2016-06-11 21:06:04-07:00
3,Chateau Szibbo,"Berkeley, CA",America/Los_Angeles,2016-06-11 21:03:12-07:00,2016-06-11 21:03:12-07:00
4,Chateau Szibbo,"Berkeley, CA",America/Los_Angeles,2016-06-11 17:21:37-07:00,2016-06-11 17:21:37-07:00
5,Boeing Manor,"Berkeley, CA",America/Los_Angeles,2016-06-09 20:43:09-07:00,2016-06-09 20:43:09-07:00
6,Golden Monkey,"389 Lonsdale St. Melbourne, VIC",Australia/Hobart,2016-06-08 01:45:36-07:00,2016-06-08 18:45:36+10:00
7,Naked For Satan,"285 Brunswick St. Melbourne, VIC",Australia/Hobart,2016-06-07 22:39:59-07:00,2016-06-08 15:39:59+10:00
8,Town Hall Hotel,"166 Johnston St. Fitzroy, VIC",Australia/Hobart,2016-06-07 21:25:57-07:00,2016-06-08 14:25:57+10:00
9,Bowl Bowl,"88 Smith St Fitzroy, VIC",Australia/Hobart,2016-06-07 20:37:02-07:00,2016-06-08 13:37:02+10:00


In [12]:
# save to csv
df.to_csv('data/untappd_details_geocoded_timezone.csv', index=False, encoding='utf-8')