In [1]:
import pandas as pd
import numpy as np

# data from https://www.kaggle.com/nickhould/craft-cans (scraped from CraftCans.com in January 2017)
beers = pd.read_csv("beers.csv")
breweries = pd.read_csv("breweries.csv")

# remove redundant column, rename columns for clarity
beers = beers.drop('Unnamed: 0',axis=1)
breweries = breweries.rename(columns = {'Unnamed: 0': 'brewery_id', 'name': 'brewery_name'})

# merge dataframes, remove NaN values, make ABV more readable, remove leading whitespace
data = pd.merge(beers,breweries,on='brewery_id',how='inner')
data = data[np.isfinite(data['ibu'])]
data['abv'] = data['abv']*100
data['state'] = data['state'].str.slice(1,3)

# filter data only from California
ca = data[data['state'] == 'CA']

In [2]:
import pixiedust

sqlContext = SQLContext(sc)
ca2 = sqlContext.createDataFrame(ca)

# search table for 'k' and some faulty entries should show up
display(ca2)

abv,ibu,id,name,style,brewery_id,ounces,brewery_name,city,state
9.9,92.0,1036,Lower De Boom,American Barleywine,368,8.4,21st Amendment Brewery,San Francisco,CA
7.9,45.0,1024,Fireside Chat,Winter Warmer,368,12.0,21st Amendment Brewery,San Francisco,CA
4.4,42.0,876,Bitter American,American Pale Ale (APA),368,12.0,21st Amendment Brewery,San Francisco,CA
4.9,17.0,802,Hell or High Watermelon Wheat (2009),Fruit / Vegetable Beer,368,12.0,21st Amendment Brewery,San Francisco,CA
4.9,17.0,801,Hell or High Watermelon Wheat (2009),Fruit / Vegetable Beer,368,12.0,21st Amendment Brewery,San Francisco,CA
4.9,17.0,800,21st Amendment Watermelon Wheat Beer (2006),Fruit / Vegetable Beer,368,12.0,21st Amendment Brewery,San Francisco,CA
7.0,70.0,799,21st Amendment IPA (2006),American IPA,368,12.0,21st Amendment Brewery,San Francisco,CA
7.0,70.0,797,Brew Free! or Die IPA (2008),American IPA,368,12.0,21st Amendment Brewery,San Francisco,CA
7.0,70.0,796,Brew Free! or Die IPA (2009),American IPA,368,12.0,21st Amendment Brewery,San Francisco,CA
8.5,52.0,531,Special Edition: Allies Win The War!,English Strong Ale,368,12.0,21st Amendment Brewery,San Francisco,CA


In [5]:
# no bug when importing data directly into pixiedust

df = pixiedust.sampleData("https://raw.githubusercontent.com/benhuds/craftbeer-data/master/ca-data.csv")
display(df)

Unnamed: 0,abv,ibu,id,name,style,brewery_id,ounces,brewery_name,city,state,city-state,latitude,longitude,norcal
21,9.9,92.0,1036,Lower De Boom,American Barleywine,368,8.4,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.7792808,-122.4192362,True
22,7.9,45.0,1024,Fireside Chat,Winter Warmer,368,12.0,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.7792808,-122.4192362,True
24,4.4,42.0,876,Bitter American,American Pale Ale (APA),368,12.0,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.7792808,-122.4192362,True
25,4.9,17.0,802,Hell or High Watermelon Wheat (2009),Fruit / Vegetable Beer,368,12.0,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.7792808,-122.4192362,True
26,4.9,17.0,801,Hell or High Watermelon Wheat (2009),Fruit / Vegetable Beer,368,12.0,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.7792808,-122.4192362,True
27,4.9,17.0,800,21st Amendment Watermelon Wheat Beer (2006),Fruit / Vegetable Beer,368,12.0,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.7792808,-122.4192362,True
28,7.0,70.0,799,21st Amendment IPA (2006),American IPA,368,12.0,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.7792808,-122.4192362,True
29,7.0,70.0,797,Brew Free! or Die IPA (2008),American IPA,368,12.0,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.7792808,-122.4192362,True
30,7.0,70.0,796,Brew Free! or Die IPA (2009),American IPA,368,12.0,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.7792808,-122.4192362,True
31,8.5,52.0,531,Special Edition: Allies Win The War!,English Strong Ale,368,12.0,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.7792808,-122.4192362,True
