In [34]:
from api_keys import untappd_URL, untappd_ID, untappd_SECRET   # private, local constants

import pickle
import requests
import time

In [42]:
# make a call to untappd's API to get User Feed data for a specific User ID
# This method is used in the main data getter, below (userFeedBatch)
def searchUserFeeds(userFeed_params, uid):
    # build the query string
    method_endpoint = '/user/checkins/' + str(uid)
    query = untappd_URL + method_endpoint
    response = requests.get(query, userFeed_params)
    # parse the response
    if response:  # response==True for codes 200-400
        remaining_calls = response.headers['X-Ratelimit-Remaining']
        return remaining_calls, response
    else: 
        print(f"That GET request for user {uid} with params={userFeed_params.items()} \
failed, with code: {response.status_code}")
        print(response.json())
        return 0,0

In [43]:
def userFeedBatch(uid, builderDict, olderthan=None, newerthan=None, numCalls=5):
    '''
    This method makes @numCalls queries to untappd's User Feed,
    for username @uid, using either @olderthan or @newerthan to let untappd know
    where to start (going back in time from @olderthan, which is untappd's 
    "max_id" param) or end (going from now to @newerthan, which is "min_id").
    
    The results and updated pagination limits are added to the 
    @builderDict passed into this method.
    '''
    # the max 'limit' accepted by untappd is 50
    params = {'limit':50, 'max_id': olderthan, 'min_id': newerthan, 'client_id':untappd_ID, 'client_secret':untappd_SECRET}
       
    for i in range(numCalls):
        calls_left, response = searchUserFeeds(params, uid)
        if not response:
            print('NO RESPONSE')
            return
        resp = response.json()
        # start pagination and rate limit bookkeeping
        try:
            new_oldest = resp['response']['pagination']['max_id']
            params['max_id'] = new_oldest
            builderDict[uid]['oldestID'] = new_oldest
            builderDict[uid]['datalist'].append(resp)
            # this happens only first time; mark in this user's dict the youngest checkin seen
            if builderDict[uid]['newestID'] is None:
                builderDict[uid]['newestID'] = resp['response']['checkins']['items'][0]['checkin_id']
        except KeyError as kerr:
            print(f'No {kerr} was included in the response.')
            print(f'There are {calls_left} calls left.')
            print(f'The max_id passed in was {olderthan}.')
            print(f'The userID was {uid}')
            
        if resp['response']['checkins']['count'] < params['limit']:  # ran out of items, so break
            print(f"Last call response had a count of {resp['response']['checkins']['count']}")
            break

    print(f'Completed {i+1} calls. Hourly calls remaining: {calls_left}') 
    
    
      

#### Pickling utilities for generators

In [26]:
def unpickle_gen(pickled_filename):
    """Unpickles a generator, maintaining the order of its elements.
    Returns the generator.
    """
    with open(pickled_filename, 'rb') as f:
        gen = (item for item in pickle.load(f))
    return gen

def pickle_gen(gen, to_pkl_filename):
    """Pickles a generator, preserving the order of its elements for later use."""
    with open(to_pkl_filename, 'wb') as f:
        pickle.dump([item for item in gen], f)

In [29]:
# Get an ordered generator (ordered by users with highest total IPA checkins on first 607 IPAs in this case) 
ipa_user_gen = unpickle_gen('capstone_1/ipa_user_gen.pkl')

### Main loop for building User Feed dataset

In [28]:
# load a partially filled UserFeedDict
with open('capstone_1/userFeeds/userFeedDicts_part_5.pkl', 'rb') as f:
    userFeedDicts_part_5 = pickle.load(f)

For initializing a new dict, when you reach about 500MB in a dict, and ipynb slows down

In [74]:
# Every dict has every User who checked in on first 607 IPAs, so like 38K people
all_38K_users = userFeedDicts_part_5.keys()

In [100]:
# Initialize a storage dict from all those 38K users.
#  Use that dict as input to the main GET routine, to accumulate User Feeds
userFeedDicts_part_7 = {uID: {'newestID':None, 'oldestID':None, 'datalist':[]} for uID in all_38K_users}

In [101]:
# Update this as files get too large, to use (fill) in the main GET routine, below
current_dict = userFeedDicts_part_7

In [96]:
#  ======= Can get about 500 users/day with 200 reviews per user = 100K checkins/day ============

#time.sleep(600)  # in case the call rate is too fast for the API, use this when restarting process
for batch in range(200):  # The argument here divided by about 23 is how many hours this cell will take
    uID = next(ipa_user_gen)
    #                     (newerthan=current_dict[uID]['newestID'])   to fill in newer
    userFeedBatch(uID, current_dict, olderthan=current_dict[uID]['oldestID'], numCalls=4)
    print(f'Batch {batch} finished at {time.asctime()[11:16]}')
    time.sleep(150)  # 4 calls per 150 secs = 96 calls per hour, with 100 being the API limit

Completed 4 calls. Hourly calls remaining: 32
Batch 0 finished at 17:10
Completed 4 calls. Hourly calls remaining: 32
Batch 1 finished at 17:12
Completed 4 calls. Hourly calls remaining: 29
Batch 2 finished at 17:15
Completed 4 calls. Hourly calls remaining: 29
Batch 3 finished at 17:18
Completed 4 calls. Hourly calls remaining: 29
Batch 4 finished at 17:20
Completed 4 calls. Hourly calls remaining: 29
Batch 5 finished at 17:23
Completed 4 calls. Hourly calls remaining: 29
Batch 6 finished at 17:25
Completed 4 calls. Hourly calls remaining: 29
Batch 7 finished at 17:28
Last call response had a count of 8
Completed 1 calls. Hourly calls remaining: 32
Batch 8 finished at 17:30
Completed 4 calls. Hourly calls remaining: 32
Batch 9 finished at 17:33
Completed 4 calls. Hourly calls remaining: 32
Batch 10 finished at 17:36
Completed 4 calls. Hourly calls remaining: 32
Batch 11 finished at 17:38
Completed 4 calls. Hourly calls remaining: 32
Batch 12 finished at 17:41
Completed 4 calls. Hourly

In [97]:
# pickle a UserFeedDict
with open('capstone_1/userFeeds/userFeedDicts_part_6.pkl', 'wb') as f:
    pickle.dump(userFeedDicts_part_6, f)

In [98]:
# Checking updated file sizes
!ls -l capstone_1/userFeeds/

total 5386456
-rw-r--r--  1 ethanhaley  staff  542487840 Oct 24 13:16 userFeedDicts.pkl
-rw-r--r--  1 ethanhaley  staff  471741274 Oct 26 12:28 userFeedDicts_part_2.pkl
-rw-r--r--  1 ethanhaley  staff  501215060 Oct 28 09:46 userFeedDicts_part_3.pkl
-rw-r--r--  1 ethanhaley  staff  413816034 Oct 29 21:21 userFeedDicts_part_4.pkl
-rw-r--r--  1 ethanhaley  staff  430760177 Oct 31 10:35 userFeedDicts_part_5.pkl
-rw-r--r--  1 ethanhaley  staff  397829802 Nov  1 22:22 userFeedDicts_part_6.pkl


In [99]:
# Checking updated user counts
users_6 = [user for user in userFeedDicts_part_6 if userFeedDicts_part_6[user]['datalist']]
len(users_6)

659