# Getting Beer Data

## Imports

In [5]:
import requests
from time import sleep
import pandas as pd
import numpy as np

## Get Beers

In [2]:
key = "c96d2760fbd1bc0c6a8831b87ce86137"
beer_url = "http://api.brewerydb.com/v2/beers/?key={}&availableId=1&p={}"

beers_response = requests.get(beer_url.format(key, "1"))
beers = beers_response.json()

In [5]:
totalPages = beers["numberOfPages"]

beers_dict = beers["data"]

for i in range(2, totalPages + 1):
    sleep(1) # Be nice to their servers
    beers_response = requests.get(beer_url.format(key, i))
    beers = beers_response.json()
    
    beers_dict.extend(beers["data"])

In [6]:
df = pd.DataFrame.from_dict(beers_dict)
df.head()

Unnamed: 0,abv,available,availableId,beerVariation,beerVariationId,createDate,description,foodPairings,glass,glasswareId,...,servingTemperature,servingTemperatureDisplay,srm,srmId,status,statusDisplay,style,styleId,updateDate,year
0,6.33,"{'id': 1, 'name': 'Year Round', 'description':...",1,,,2015-04-20 21:43:59,"Our first beer has been aptly named ""633"" afte...",,,,...,,,,,verified,Verified,"{'id': 25, 'categoryId': 3, 'category': {'id':...",25.0,2015-04-20 21:43:59,
1,5.6,"{'id': 1, 'name': 'Year Round', 'description':...",1,,,2013-04-28 20:21:56,"Good, dense head retention is desirable. Light...",,,,...,,,"{'id': 5, 'name': '5', 'hex': 'FBB123'}",5.0,verified,Verified,"{'id': 45, 'categoryId': 4, 'category': {'id':...",45.0,2016-05-05 14:07:35,
2,6.6,"{'id': 1, 'name': 'Year Round', 'description':...",1,,,2013-07-27 14:02:13,This medium amber beer is infused with a blend...,,"{'id': 5, 'name': 'Pint', 'createDate': '2012-...",5.0,...,,,,,verified,Verified,"{'id': 30, 'categoryId': 3, 'category': {'id':...",30.0,2015-03-18 18:05:08,
3,,"{'id': 1, 'name': 'Year Round', 'description':...",1,,,2012-09-29 17:22:22,The stout style lends itself well to pairing w...,,"{'id': 5, 'name': 'Pint', 'createDate': '2012-...",5.0,...,,,,,verified,Verified,"{'id': 20, 'categoryId': 1, 'category': {'id':...",20.0,2015-04-24 13:24:02,
4,4.7,"{'id': 1, 'name': 'Year Round', 'description':...",1,,,2012-01-03 02:44:19,The true Summer Ale of Central Oregon! A Golde...,,"{'id': 5, 'name': 'Pint', 'createDate': '2012-...",5.0,...,,,,,verified,Verified,"{'id': 36, 'categoryId': 3, 'category': {'id':...",36.0,2015-05-22 02:27:37,


In [7]:
df.shape

(9670, 27)

In [8]:
df.columns.values

array(['abv', 'available', 'availableId', 'beerVariation',
       'beerVariationId', 'createDate', 'description', 'foodPairings',
       'glass', 'glasswareId', 'ibu', 'id', 'isOrganic', 'labels', 'name',
       'nameDisplay', 'originalGravity', 'servingTemperature',
       'servingTemperatureDisplay', 'srm', 'srmId', 'status',
       'statusDisplay', 'style', 'styleId', 'updateDate', 'year'], dtype=object)

In [9]:
df.to_csv("./beers_raw.csv", encoding="utf-8")

## Get Styles

In [10]:
style_url = "http://api.brewerydb.com/v2/styles/?key={}"

styles_response = requests.get(style_url.format(key))
styles = styles_response.json()

In [11]:
styles_dict = [[style["id"], style["name"], style["shortName"]] for style in styles["data"]]
style_df = pd.DataFrame.from_dict(styles_dict)
style_df.columns = ["id", "name", "shortName"]

style_df.head()

Unnamed: 0,id,name,shortName
0,1,Classic English-Style Pale Ale,English Pale
1,2,English-Style India Pale Ale,English IPA
2,3,Ordinary Bitter,Bitter
3,4,Special Bitter or Best Bitter,Special Bitter
4,5,Extra Special Bitter,ESB


In [12]:
style_df.to_csv("./styles_raw.csv", encoding="utf-8")

In [41]:
style_df = pd.read_csv("styles_raw.csv")

## Clean Data

### Drop unneeded columns

In [55]:
df = df.drop(["Unnamed: 0", "updateDate", "originalGravity", "year", "available", "glasswareId", "name", "beerVariation", "beerVariationId", "description", "foodPairings", "glass", "servingTemperatureDisplay", "style", "labels", "srm", "statusDisplay"], axis=1)
df.head()

Unnamed: 0,abv,availableId,createDate,ibu,id,isOrganic,nameDisplay,servingTemperature,srmId,status,styleId
0,6.33,1,2015-04-20 21:43:59,25.0,ZsQEJt,N,"""633"" American Pale Ale",,,verified,25.0
1,5.6,1,2013-04-28 20:21:56,28.2,Hr5A0t,N,"""God Country"" Kolsch",,5.0,verified,45.0
2,6.6,1,2013-07-27 14:02:13,45.0,5UcMBc,N,"""Ignition"" IPA",,,verified,30.0
3,,1,2012-09-29 17:22:22,,54rSgo,N,"""Jemmy Dean"" Breakfast Stout",,,verified,20.0
4,4.7,1,2012-01-03 02:44:19,26.0,b7WWL6,N,"""Mike Saw a Sasquatch"" Session Ale",,,verified,36.0


### Change `styleId` type from float to integer.

In [54]:
df = pd.read_csv("beers_raw.csv")

In [56]:
df["styleId"].isnull().values.any()

True

In [57]:
df.dropna(subset=["styleId"], inplace=True)

In [58]:
df["styleId"].isnull().sum()

0

In [59]:
df["styleId"] = df["styleId"].astype(int)

In [60]:
df["ibu"].isnull().values.any()

True

In [61]:
print(df["ibu"].isnull().sum())
print(df.shape)

3974
(9624, 11)


In [62]:
ibu_mean = round(np.mean(df["ibu"]), 1)

In [63]:
df["ibu"] = df["ibu"].fillna(ibu_mean)

In [64]:
df["ibu"].isnull().values.any()

False

In [65]:
df

Unnamed: 0,abv,availableId,createDate,ibu,id,isOrganic,nameDisplay,servingTemperature,srmId,status,styleId
0,6.33,1,2015-04-20 21:43:59,25.0,ZsQEJt,N,"""633"" American Pale Ale",,,verified,25
1,5.60,1,2013-04-28 20:21:56,28.2,Hr5A0t,N,"""God Country"" Kolsch",,5.0,verified,45
2,6.60,1,2013-07-27 14:02:13,45.0,5UcMBc,N,"""Ignition"" IPA",,,verified,30
3,,1,2012-09-29 17:22:22,39.6,54rSgo,N,"""Jemmy Dean"" Breakfast Stout",,,verified,20
4,4.70,1,2012-01-03 02:44:19,26.0,b7WWL6,N,"""Mike Saw a Sasquatch"" Session Ale",,,verified,36
5,6.00,1,2012-01-03 02:42:36,39.6,HXmS9k,N,"""My"" Bock",,,verified,88
6,6.50,1,2017-05-04 14:15:42,36.0,J7ysvG,N,"""OG"" Original Gangster",,,verified,25
7,7.80,1,2013-07-27 14:02:12,80.0,TAtY2B,N,"""Sneaky Devil"" Double IPA",,,verified,31
8,5.20,1,2013-07-27 14:02:47,28.0,K83yUU,N,"""Whistleblower"" Belgian Wit",,,verified,65
9,10.00,1,2015-06-01 14:29:55,24.0,Z7tz1C,Y,#2 Strong Ale,,,verified,14


### Merge data frames

In [71]:
df = df.merge(style_df, how="inner", left_on="styleId", right_on="id")

### Clean merged data frame

In [72]:
df.columns.values

array(['abv', 'availableId', 'createDate', 'ibu', 'id_x', 'isOrganic',
       'nameDisplay', 'servingTemperature', 'srmId', 'status', 'styleId',
       'styleName', 'Unnamed: 0', 'id_y', 'name', 'shortName'], dtype=object)

In [73]:
df = df.drop(["availableId", "Unnamed: 0", "id_y", "shortName"], axis=1)

In [75]:
df = df.rename(columns={"id_x": "id", "name": "styleName"})

In [78]:
df.to_csv("beers_clean.csv")