Get the timezone for each venue's lat-long from the Google Maps timezone API: https://developers.google.com/maps/documentation/timezone/intro

Then convert each check-in's datetime to the timezone of the venue at which it was checked in.

In [1]:
import time, requests, pytz, pandas as pd
from keys import google_timezone_api_key
from dateutil import parser as date_parser

In [2]:
# define pause interval to not hammer their server
pause = 0.25

In [3]:
# load the data, parse datetime string to datetime object, and combine lat-long into single column
df = pd.read_csv('data/untappd_details_geocoded.csv', encoding='utf-8')
df['date_pacific_tz'] = df['date_pacific_tz'].map(lambda x: date_parser.parse(x))
df['venue_latlng'] = df.apply(lambda row: '{},{}'.format(row['venue_lat'], row['venue_lon']), axis=1)
df.head()

Unnamed: 0,date_pacific_tz,beer_name,beer_style,brewery_name,brewery_place,brewery_type,rating,beer_avg_rating,brewery_avg_rating,beer_abv,...,venue_lat,venue_lon,venue_total_checkins,checkin_id,beer_url,brewery_url,venue_url,brewery_lat,brewery_lon,venue_latlng
0,2016-06-09 20:43:09-07:00,Angel City IPA,IPA - American,Angel City Brewery,"Los Angeles, CA United States",Micro Brewery,3.5,3.56,3.47,6.1,...,37.8605,-122.271,,321628572,https://untappd.com/b/angel-city-brewery-angel...,https://untappd.com/w/angel-city-brewery/4406,https://untappd.com/v/boeing-manor/509841,34.052234,-118.243685,"37.8605,-122.271"
1,2016-06-08 01:45:36-07:00,Cascade Premium Light,Lager - Pale,Cascade Brewery Co. (Australia),"South Hobart, Tas. Australia",Macro Brewery,2.0,2.32,2.93,2.6,...,-37.8129,144.961,,321044810,https://untappd.com/b/cascade-brewery-co-austr...,https://untappd.com/w/cascade-brewery-co-austr...,https://untappd.com/v/golden-monkey/1201269,-42.894523,147.309491,"-37.8129,144.961"
2,2016-06-07 22:39:59-07:00,Naked Lager,Lager - Helles,Naked For Satan,Australia,Nano Brewery,3.75,3.47,3.24,4.2,...,-37.7988,144.978,,321038174,https://untappd.com/b/naked-for-satan-naked-la...,https://untappd.com/w/naked-for-satan/83569,https://untappd.com/v/naked-for-satan/86227,-25.274398,133.775136,"-37.7988,144.978"
3,2016-06-07 21:25:57-07:00,Golden Ale,Golden Ale,Venom Brewing,Australia,Micro Brewery,3.25,3.58,3.61,4.8,...,-37.7986,144.981,,321031246,https://untappd.com/b/venom-brewing-golden-ale...,https://untappd.com/w/venom-brewing/219040,https://untappd.com/v/town-hall-hotel/52668,-25.274398,133.775136,"-37.7986,144.981"
4,2016-06-07 20:37:02-07:00,Fat Yak,Pale Ale - American,Matilda Bay Brewing Company,"Port Melbourne, Vic. Australia",Micro Brewery,4.0,3.39,3.23,4.7,...,-37.8056,144.983,,321022902,https://untappd.com/b/matilda-bay-brewing-comp...,https://untappd.com/w/matilda-bay-brewing-comp...,https://untappd.com/v/bowl-bowl/4803853,-37.836926,144.94455,"-37.8056,144.983"


In [4]:
# how many total venue lat-longs are there, and how many unique lat-longs are there?
print len(df['venue_latlng'])

venue_latlngs_unique = pd.Series(df['venue_latlng'].unique())
print len(venue_latlngs_unique)

1430
427


In [5]:
venue_latlngs_unique = venue_latlngs_unique.sort_values()

In [6]:
# send each unique lat-long to the google timezone api to retrieve the local time zone id at that location
def get_timezone_google(latlng, timestamp=0):
    time.sleep(pause)
    url = u'https://maps.googleapis.com/maps/api/timezone/json?location={}&timestamp={}&key={}'
    request = url.format(latlng, timestamp, google_timezone_api_key)
    response = requests.get(request)
    data = response.json()
    try:
        return data['timeZoneId']
    except:
        return None
    
timezones = venue_latlngs_unique.map(get_timezone_google)

In [7]:
# create a dict with key of lat-long and value of timezone
latlng_timezone = {}
for label in timezones.index:
    key = venue_latlngs_unique[label]
    val = timezones[label]
    latlng_timezone[key] = val

In [8]:
# for each row in the df, look up the lat-long in the dict to get the local timezone
def get_timezone_from_dict(venue_latlng):
    try:
        return latlng_timezone[venue_latlng]
    except:
        return None

df['venue_timezone'] = df['venue_latlng'].map(get_timezone_from_dict)
df = df.drop('venue_latlng', axis=1)

In [16]:
# backfill timezones from the next earlier observation as this is more likely to be accurate
# compared to randomly using the default timezone
df['venue_timezone'] = df['venue_timezone'].fillna(method='bfill')

In [17]:
# convert each row's datetime to the local timezone of the venue i checked into
def localize_date_time(row):
    date_time = row['date_pacific_tz']
    local_timezone = row['venue_timezone']
    try:
        return date_time.astimezone(pytz.timezone(local_timezone))
    except:
        return None
    
df['date_local_tz'] = df.apply(localize_date_time, axis=1)

In [18]:
# look at the first 10 venues and their timezones
df[['venue_name', 'venue_place', 'venue_timezone', 'date_pacific_tz', 'date_local_tz']].head(10)

Unnamed: 0,venue_name,venue_place,venue_timezone,date_pacific_tz,date_local_tz
0,Boeing Manor,"Berkeley, CA",America/Los_Angeles,2016-06-09 20:43:09-07:00,2016-06-09 20:43:09-07:00
1,Golden Monkey,"389 Lonsdale St. Melbourne, VIC",Australia/Hobart,2016-06-08 01:45:36-07:00,2016-06-08 18:45:36+10:00
2,Naked For Satan,"285 Brunswick St. Melbourne, VIC",Australia/Hobart,2016-06-07 22:39:59-07:00,2016-06-08 15:39:59+10:00
3,Town Hall Hotel,"166 Johnston St. Fitzroy, VIC",Australia/Hobart,2016-06-07 21:25:57-07:00,2016-06-08 14:25:57+10:00
4,Bowl Bowl,"88 Smith St Fitzroy, VIC",Australia/Hobart,2016-06-07 20:37:02-07:00,2016-06-08 13:37:02+10:00
5,Nieuw Amsterdam,"106-112 Hardware St Melbourne, VIC",Australia/Hobart,2016-06-07 02:49:24-07:00,2016-06-07 19:49:24+10:00
6,Great Ocean Road Resort,"105 Great Ocean Road Anglesea, VIC",Australia/Hobart,2016-06-05 02:41:25-07:00,2016-06-05 19:41:25+10:00
7,,,Australia/Sydney,2016-06-03 15:22:43-07:00,2016-06-04 08:22:43+10:00
8,Trinity Bar,"505 Crown St. Surry Hills, NSW",Australia/Sydney,2016-06-01 05:00:48-07:00,2016-06-01 22:00:48+10:00
9,Trinity Bar,"505 Crown St. Surry Hills, NSW",Australia/Sydney,2016-06-01 04:11:27-07:00,2016-06-01 21:11:27+10:00


In [20]:
# save to csv
df.to_csv('data/untappd_details_geocoded_timezone.csv', index=False, encoding='utf-8')