In [1]:
import pandas as pd
import requests
import json
# Assign private constants from a separate file, for API calls to brewerydb and untappd
from api_keys import prodKEY, prodURL, untappd_ID, untappd_SECRET, untappd_URL

### Start by getting all beer styles listed on breweryDB

In [3]:
all_styles = requests.get(prodURL + 'styles?key=' + prodKEY).json()
# parse that JSON response into a readable list
style_list = [(style['id'], style['name']) for style in all_styles['data']]

print(len(style_list))
style_list

181


[(1, 'Classic English-Style Pale Ale'),
 (2, 'English-Style India Pale Ale'),
 (3, 'Ordinary Bitter'),
 (4, 'Special Bitter or Best Bitter'),
 (5, 'Extra Special Bitter'),
 (6, 'English-Style Summer Ale'),
 (7, 'Scottish-Style Light Ale'),
 (8, 'Scottish-Style Heavy Ale'),
 (9, 'Scottish-Style Export Ale'),
 (10, 'English-Style Pale Mild Ale'),
 (11, 'English-Style Dark Mild Ale'),
 (12, 'English-Style Brown Ale'),
 (13, 'Old Ale'),
 (14, 'Strong Ale'),
 (15, 'Scotch Ale'),
 (16, 'British-Style Imperial Stout'),
 (17, 'British-Style Barley Wine Ale'),
 (18, 'Brown Porter'),
 (19, 'Robust Porter'),
 (20, 'Sweet or Cream Stout'),
 (21, 'Oatmeal Stout'),
 (22, 'Irish-Style Red Ale'),
 (23, 'Classic Irish-Style Dry Stout'),
 (24, 'Foreign (Export)-Style Stout'),
 (25, 'American-Style Pale Ale'),
 (26, 'Fresh "Wet" Hop Ale'),
 (27, 'Pale American-Belgo-Style Ale'),
 (28, 'Dark American-Belgo-Style Ale'),
 (29, 'American-Style Strong Pale Ale'),
 (30, 'American-Style India Pale Ale'),
 (31, 

In [4]:
# I'm going to choose these 6 styles from the above list to focus on for this project.
# ('Fresh "Wet" Hop Ale', 'American-Style India Pale Ale', 'Imperial or Double India Pale Ale',
#  'Juicy or Hazy Pale Ale', 'Juicy or Hazy India Pale Ale', 'Juicy or Hazy Imperial or Double India Pale Ale' )
ipas = [26,30,31,171,172,173]

### Using those 6 style ID's, amass the IPA's breweryDB has listed

In [37]:
# The breweryDB API returns beers 50 per page
def getBeerPageByStyle(styleID, page):
    method = 'beers/'
    query = prodURL + method
    params = {'styleId': str(styleID), 'p': str(page), 'key': prodKEY,
              'withSocialAccounts': 'Y', 'withIngredients': 'Y', 'withBreweries': 'Y'}
    response = requests.get(query, params)
    
    # parse the response
    if response:  # response==True for codes 200-400, False otherwise
        remaining_calls = response.headers['X-Ratelimit-Remaining'] # 200 API calls per hour is the limit
        return remaining_calls, response.json()
    else: 
        print(f"That GET request with params={params.items()} failed, with code: {response.status_code}")
        print(response.json())
        return 0,0
    

In [23]:
# Sample response for style 30 (American IPA)
calls_left, resp = getBeerPageByStyle(30, 1)

print(f'You have {calls_left} daily calls left.')
resp

You have 196 hourly calls left.


{'currentPage': 1,
 'data': [{'abv': '7.5',
   'available': {'description': 'Limited availability.',
    'id': 2,
    'name': 'Limited'},
   'availableId': 2,
   'breweries': [{'brewersAssociation': {'brewersAssocationId': 'HLM67XR9E3',
      'isCertifiedCraftBrewer': 'Y'},
     'createDate': '2012-01-03 02:41:53',
     'description': 'Welcome to Dust Bowl Brewing Company located in Turlock, Ca. We are a small brewing company in the Central Valley of California. Our size allows us to have a very personal and passionate connection to each of our beers. We are continuously testing new flavors and styles to develop our selection of beers. Our goal is to help create a thriving beer culture in the Central Valley.\r\n\r\nOur company is a tribute to the strong-willed, gritty people of the Dust Bowl and Great Depression era. Inspired by the families who came west in search of a new start as well as those who stayed behind to weather the storm. This is great beer born of hard times. Storm’s com

In [24]:
# Show the 50 beers on that first style 30 page, along with their brewery.
#  (Only going to use the first brewer listed, in the case where there are more than one for the beer.  Cleaner.)
[(beer['breweries'][0]['name'], beer['name']) for beer in resp['data']]

[('Dust Bowl Brewing Company', '"Galactic Wrath" IPA'),
 ('Working Man Brewing Company', '"Ignition" IPA'),
 ("Three Notch'd Brewing Company", '"Roux 40" Red IPA'),
 ('Little Machine Beer', '"Sniff" IPA'),
 ('Four Fathers Brewing, LLC', '#15'),
 ('Victory Brewing Company', '#429 Red IPA'),
 ('Evil Genius Beer Company', '#Adulting'),
 ('Ohana Brewing Co', '#Hashtag Hops IPA'),
 ('Thomas Hooker Brewing', '#NOFILTER IPA'),
 ('Mikkeller', '#Overtime IPA'),
 ('Three Floyds Brewing Company', '$600 Lizard Shoes'),
 ('Dry Ground Brewing Co.', "'37 Flood"),
 ('4 Noses Brewing Company', "'Bout Damn Time"),
 ('Lawrence Beer Company', "'Lectric"),
 ('Lawrence Beer Company', "'Lectric IPA"),
 ('Isley Brewing Company', "'Scott's Addition' India Pale Ale"),
 ('Stony Creek Brewery', '(401) India Pale Ale'),
 ('(512) Brewing Company', '(512) IPA'),
 ('(512) Brewing Company', '(512) Juicy IPA'),
 ('(512) Brewing Company', '(512) SMaSH Mosaic'),
 ('(512) Brewing Company', '(512) White IPA'),
 ('Stony Cre

In [25]:
resp.keys()

dict_keys(['currentPage', 'numberOfPages', 'totalResults', 'data', 'status'])

In [26]:
# Use response['numberOfPages'] to learn how many calls to make to each style
pages = resp['numberOfPages']
pages

198

In [41]:
def growDF(df, styleID, start_page, num_pages=10):  # there's a rate limit like 10/sec on brewerydb
    """Build a pandas DataFrame for each IPA style."""
    for i in range(num_pages):
        limit, beer_data = getBeerPageByStyle(styleID, start_page+i)
        print(f'You have {limit} calls left today.')
        if beer_data:  # This will evaluate to False if the GET method failed
            df = df.append(pd.DataFrame(beer_data['data']))
    return df

In [31]:
# Initialize a DF with page 1 of style 30
style_30_df = pd.DataFrame(resp['data'])

style_30_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 28 columns):
abv                          45 non-null object
available                    21 non-null object
availableId                  21 non-null float64
breweries                    50 non-null object
createDate                   50 non-null object
description                  38 non-null object
foodPairings                 1 non-null object
glass                        12 non-null object
glasswareId                  12 non-null float64
ibu                          30 non-null object
id                           50 non-null object
ingredients                  1 non-null object
isOrganic                    50 non-null object
isRetired                    50 non-null object
labels                       22 non-null object
name                         50 non-null object
nameDisplay                  50 non-null object
originalGravity              8 non-null object
servingTemperature           8 non

Well, that stinks that only 1 of the first 50 IPA's has ingredients listed, but at least 38 of them have descriptions, which maybe can be analyzed for keywords like ingredients.

In [44]:
# concatenate vertically pages 2-198
##  NOTE THAT THIS WILL USE ALL YOUR CALLS UP FOR A DAY, FOR STYLE 30 ##
style_30_df = growDF(style_30_df, styleID=30, start_page=2, num_pages=197)




In [45]:
len(style_30_df)

In [None]:
style_30_df.to_pickle('style_30_df.pkl')

### So that's it for the largest style, #30, and that's it for API calls for the day.
### Here's the encapsulated routine for the other 5 styles:

In [48]:
ipas.remove(30)

ipas

[26, 31, 171, 172, 173]

In [None]:
for ipa in ipas:
    # initialize a pd DF from the first page of the style
    calls_left, resp = getBeerPageByStyle(ipa, page=1)
    print(f'You have {calls_left} daily calls left.')
    df = pd.DataFrame(resp['data'])
    # see how many total pages to query
    pages = resp['numberOfPages']
    df = growDF(df, styleID=ipa, start_page=2, num_pages=pages-1)
    # pickle the resulting df
    print(f'Pickling {len(df)} style {ipa} IPAs to "style_{ipa}_df.pkl"')
    df.to_pickle('style_' + str(ipa) + '_df.pkl')
    