# Part 4: Get Untappd Check-In Venue Time Zones

Get the timezone for each venue's lat-long from the Google Maps timezone API: https://developers.google.com/maps/documentation/timezone/intro

Then convert each check-in's datetime to the timezone of the venue at which it was checked in.

In [1]:
import time, requests, pytz, pandas as pd
from keys import google_timezone_api_key
from dateutil import parser as date_parser

In [2]:
# define pause interval to not hammer their server
pause = 0.25

In [3]:
# load the data, parse datetime string to datetime object, and combine lat-long into single column
df = pd.read_csv('data/untappd_details_geocoded.csv', encoding='utf-8')
df['date_pacific_tz'] = df['date_pacific_tz'].map(lambda x: date_parser.parse(x))
df['venue_latlng'] = df.apply(lambda row: '{},{}'.format(row['venue_lat'], row['venue_lon']), axis=1)
df.head()

Unnamed: 0,date_pacific_tz,beer_name,beer_style,brewery_name,brewery_place,brewery_type,rating,beer_avg_rating,brewery_avg_rating,beer_abv,...,venue_lat,venue_lon,venue_total_checkins,checkin_id,beer_url,brewery_url,venue_url,brewery_lat,brewery_lon,venue_latlng
0,2016-07-04 18:59:34-07:00,McRed,Red Ale - American Amber / Red,Humboldt Regeneration,"McKinleyville, CA United States",Nano Brewery,3.75,3.25,3.77,5.4,...,,,,332182456,https://untappd.com/b/humboldt-regeneration-mc...,https://untappd.com/w/humboldt-regeneration/49462,,40.946515,-124.100621,"nan,nan"
1,2016-07-03 19:00:03-07:00,Earth Thirst,IPA - Imperial / Double,Eel River Brewing Co.,"Fortuna, CA United States",Micro Brewery,2.5,3.68,3.56,8.2,...,40.5793,-124.153,3649.0,331728849,https://untappd.com/b/eel-river-brewing-co-ear...,https://untappd.com/w/eel-river-brewing-co/481,https://untappd.com/v/eel-river-brewing-compan...,40.598187,-124.157276,"40.5793,-124.153"
2,2016-07-03 18:58:48-07:00,Organic Amber Ale,Red Ale - American Amber / Red,Eel River Brewing Co.,"Fortuna, CA United States",Micro Brewery,2.5,3.36,3.56,4.8,...,40.5793,-124.153,3649.0,331728103,https://untappd.com/b/eel-river-brewing-co-org...,https://untappd.com/w/eel-river-brewing-co/481,https://untappd.com/v/eel-river-brewing-compan...,40.598187,-124.157276,"40.5793,-124.153"
3,2016-07-03 18:04:39-07:00,Emerald Triangle IPA,IPA - American,Eel River Brewing Co.,"Fortuna, CA United States",Micro Brewery,4.0,3.56,3.56,6.7,...,40.5793,-124.153,3649.0,331692205,https://untappd.com/b/eel-river-brewing-co-eme...,https://untappd.com/w/eel-river-brewing-co/481,https://untappd.com/v/eel-river-brewing-compan...,40.598187,-124.157276,"40.5793,-124.153"
4,2016-07-03 18:03:50-07:00,Blonde Ale,Blonde Ale,Eelriver Brewing Company,United States,Macro Brewery,3.5,3.54,3.59,5.8,...,40.5793,-124.153,3649.0,331691575,https://untappd.com/b/eelriver-brewing-company...,https://untappd.com/w/eelriver-brewing-company...,https://untappd.com/v/eel-river-brewing-compan...,37.09024,-95.712891,"40.5793,-124.153"


In [4]:
# how many total venue lat-longs are there, and how many unique lat-longs are there?
print(len(df['venue_latlng']))

venue_latlngs_unique = pd.Series(df['venue_latlng'].unique())
print(len(venue_latlngs_unique))

1480
439


In [5]:
venue_latlngs_unique = venue_latlngs_unique.sort_values()

In [6]:
# send each unique lat-long to the google timezone api to retrieve the local time zone id at that location
def get_timezone_google(latlng, timestamp=0):
    time.sleep(pause)
    url = 'https://maps.googleapis.com/maps/api/timezone/json?location={}&timestamp={}&key={}'
    request = url.format(latlng, timestamp, google_timezone_api_key)
    response = requests.get(request)
    data = response.json()
    try:
        return data['timeZoneId']
    except:
        return None
    
timezones = venue_latlngs_unique.map(get_timezone_google)

In [7]:
# create a dict with key of lat-long and value of timezone
latlng_timezone = {}
for label in timezones.index:
    key = venue_latlngs_unique[label]
    val = timezones[label]
    latlng_timezone[key] = val

In [8]:
# for each row in the df, look up the lat-long in the dict to get the local timezone
def get_timezone_from_dict(venue_latlng):
    try:
        return latlng_timezone[venue_latlng]
    except:
        return None

df['venue_timezone'] = df['venue_latlng'].map(get_timezone_from_dict)
df = df.drop('venue_latlng', axis=1)

In [9]:
# backfill timezones from the next earlier observation as this is more likely to be accurate...
# ...than randomly using the default timezone
df['venue_timezone'] = df['venue_timezone'].fillna(method='bfill')

In [10]:
# convert each row's datetime to the local timezone of the venue i checked into
def localize_date_time(row):
    date_time = row['date_pacific_tz']
    local_timezone = row['venue_timezone']
    try:
        return date_time.astimezone(pytz.timezone(local_timezone))
    except:
        return None
    
df['date_local_tz'] = df.apply(localize_date_time, axis=1)

In [11]:
# look at the first 10 venues and their timezones
df[['venue_name', 'venue_place', 'venue_timezone', 'date_pacific_tz', 'date_local_tz']].head(10)

Unnamed: 0,venue_name,venue_place,venue_timezone,date_pacific_tz,date_local_tz
0,,,America/Los_Angeles,2016-07-04 18:59:34-07:00,2016-07-04 18:59:34-07:00
1,Eel River Brewing Company,,America/Los_Angeles,2016-07-03 19:00:03-07:00,2016-07-03 19:00:03-07:00
2,Eel River Brewing Company,,America/Los_Angeles,2016-07-03 18:58:48-07:00,2016-07-03 18:58:48-07:00
3,Eel River Brewing Company,,America/Los_Angeles,2016-07-03 18:04:39-07:00,2016-07-03 18:04:39-07:00
4,Eel River Brewing Company,,America/Los_Angeles,2016-07-03 18:03:50-07:00,2016-07-03 18:03:50-07:00
5,Eel River Brewing Company,,America/Los_Angeles,2016-07-03 17:49:15-07:00,2016-07-03 17:49:15-07:00
6,Eel River Brewing Company,,America/Los_Angeles,2016-07-03 17:44:47-07:00,2016-07-03 17:44:47-07:00
7,Six Rivers Brewery,,America/Los_Angeles,2016-07-03 16:07:21-07:00,2016-07-03 16:07:21-07:00
8,Humboldt Regeneration Brewery & Farm,,America/Los_Angeles,2016-07-03 15:33:19-07:00,2016-07-03 15:33:19-07:00
9,Humboldt Regeneration Brewery & Farm,,America/Los_Angeles,2016-07-03 15:25:04-07:00,2016-07-03 15:25:04-07:00


In [12]:
# save to csv
df.to_csv('data/untappd_details_geocoded_timezone.csv', index=False, encoding='utf-8')