### Just trying to fill in large blanks for beers that had no global ratings or descriptions

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [2]:
df = pd.read_csv('bigframe.csv')

#### Remove all but the most recent one User/Beer combo for multiple such checkins

In [3]:
df.drop_duplicates(subset=['user_id', 'beer_id'], inplace=True)  # keep_first (default) means keep most recent checkin

df.shape  # remaining from 1.42M prev

(1296064, 27)

In [4]:
zero_globs = df[df.rating_global == 0]
zero_globs.shape

(2519, 27)

In [5]:
null_globs = df[df.rating_global.isnull()]
len(null_globs)

203941

In [6]:
null_globs.beer_id.nunique()

128147

In [7]:
df.beer_id.nunique()  # so half the beers don't have globrates

264620

Just make sure there's no info to fill in from globs to nulls:

In [9]:
globs = df[df.rating_global.notnull()]

In [10]:
null_ids = set(null_globs.beer_id)
glob_ids = set(globs.beer_id)
# check overlapping id's
overlaps = null_ids.intersection(glob_ids)
len(overlaps)

0

### So half the beers have global ratings and descriptions and account for 1M ratings.  The other half don't have globals or descriptions, but they are barely rated (200K checkins).  There are diminishing returns on querying these 128K beers, but maybe if we query for a few days, starting with the IPA's, we can get some useful data and a few hundred K more checkins.

In [2]:
from api_keys import untappd_URL, untappd_ID, untappd_SECRET   # private, local constants

import pickle
import requests
import time

In [3]:
# make a call to untappd's API to get Beer Feed data for a specific Beer ID
# This method is used in the main data getter, below (beerFeedBatch())
def searchBeerInfo(beerInfo_params, bid):
    # build the query string
    method_endpoint = '/beer/info/' + str(bid)
    query = untappd_URL + method_endpoint
    response = requests.get(query, beerInfo_params)
    # parse the response
    if response:  # response==True for codes 200-400
        remaining_calls = response.headers['X-Ratelimit-Remaining']
        return remaining_calls, response
    else: 
        print(f"That GET request for beer {bid} with params={beerInfo_params.items()} \
failed, with code: {response.status_code}")
        print(response.json())
        return 0,0

In [4]:
def beerInfoBatch(bid, builderDict, numCalls=1):
    '''
    This method makes @numCalls queries to untappd's 
    Beer Info endpoint, for beer @bid.
    
    The results are added to the 
    @builderDict passed into this method.
    '''
    # the max 'limit' accepted by untappd is 50
    params = {'limit':50, 'client_id':untappd_ID, 'client_secret':untappd_SECRET}
       
    for i in range(numCalls):
        calls_left, response = searchBeerInfo(params, bid)
        if not response:
            print('NO RESPONSE')
            return
        resp = response.json()
        
        try:
            builderDict[bid] = resp
        except KeyError as kerr:
            print(f'No {kerr} was included in the response.')
            print(f'There are {calls_left} calls left.')
            print(f'The beerID was {bid}')
            break

    print(f'Completed {i+1} calls. Hourly calls remaining: {calls_left}') 
    
    
      

=================================================================================================================  

Code to initiate dict that stores data

In [18]:
bid = null_globs.beer_id.iloc[0]
beerInfo_params = {'limit':100, 'client_id':untappd_ID, 'client_secret':untappd_SECRET} # limit turned out to be 50
method_endpoint = '/beer/info/' + str(bid)
query = untappd_URL + method_endpoint
response = requests.get(query, beerInfo_params)

In [19]:
resp = response.json()

resp.keys()

In [29]:
resp['response']['beer']['beer_name']

'Richmond Dark Lager'

In [30]:
# initiate a dict to hold beer info
beerInfoDicts = {bid: resp['response']}

Would rather start with the IPA's if possible, not the Dark Lagers, so will sort that out here.

In [32]:
null_glob_ipas = null_globs[null_globs.beer_style.str.startswith('IPA')]
null_glob_ipas.beer_id.nunique()

34883

OK, so that's a good target group

In [53]:
ipa_id_gen = (x for x in null_glob_ipas.beer_id.unique())

In [22]:
for batch in range(1200):  # The argument here divided by about 100 is how many hours this cell will take
    bID = next(ipa_id_gen)
    beerInfoBatch(bID, beerInfoDicts_6, numCalls=1)
    print(f'Batch {batch} finished at {time.asctime()[11:16]}')
    time.sleep(36)  # 1 call per 36 secs = 100 calls per hour, with 100 being the API limit

Completed 1 calls. Hourly calls remaining: 11
Batch 0 finished at 08:16
Completed 1 calls. Hourly calls remaining: 11
Batch 1 finished at 08:17
Completed 1 calls. Hourly calls remaining: 11
Batch 2 finished at 08:18
Completed 1 calls. Hourly calls remaining: 11
Batch 3 finished at 08:18
Completed 1 calls. Hourly calls remaining: 11
Batch 4 finished at 08:19
Completed 1 calls. Hourly calls remaining: 11
Batch 5 finished at 08:19
Completed 1 calls. Hourly calls remaining: 11
Batch 6 finished at 08:20
Completed 1 calls. Hourly calls remaining: 11
Batch 7 finished at 08:21
Completed 1 calls. Hourly calls remaining: 11
Batch 8 finished at 08:21
Completed 1 calls. Hourly calls remaining: 11
Batch 9 finished at 08:22
Completed 1 calls. Hourly calls remaining: 11
Batch 10 finished at 08:23
Completed 1 calls. Hourly calls remaining: 11
Batch 11 finished at 08:23
Completed 1 calls. Hourly calls remaining: 11
Batch 12 finished at 08:24
Completed 1 calls. Hourly calls remaining: 11
Batch 13 finish

In [23]:
len(beerInfoDicts_6)

2195

In [24]:
# pickle the beerInfoDict
with open('beerInfoDict6.pkl', 'wb') as f:
    pickle.dump(beerInfoDicts_6, f)

In [25]:
ls -l ./beerInfoDict6.pkl

-rw-r--r--  1 ethanhaley  staff  359422923 Dec 29 21:00 ./beerInfoDict6.pkl


In [16]:
# To avoid lengthy repickling every day:
beerInfoDicts_7 = dict()

In [5]:
def unpickle_gen(pickled_filename):
    """Unpickles a generator, maintaining the order of its elements.
    Returns the generator.
    """
    with open(pickled_filename, 'rb') as f:
        gen = (item for item in pickle.load(f))
    return gen

def pickle_gen(gen, to_pkl_filename):
    """Pickles a generator, preserving the order of its elements for later use."""
    with open(to_pkl_filename, 'wb') as f:
        pickle.dump([item for item in gen], f)

In [26]:
pickle_gen(ipa_id_gen, 'ipagen.pkl')

In [6]:
ipa_id_gen = unpickle_gen('ipagen.pkl')