In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (30, 20)
plt.rcParams['font.family'] = 'sans-serif'

%matplotlib inline

# data from https://www.kaggle.com/nickhould/craft-cans (scraped from CraftCans.com in January 2017)
beers = pd.read_csv("beers.csv")
breweries = pd.read_csv("breweries.csv")

In [2]:
beers.head()

Unnamed: 0.1,Unnamed: 0,abv,ibu,id,name,style,brewery_id,ounces
0,0,0.05,,1436,Pub Beer,American Pale Lager,408,12.0
1,1,0.066,,2265,Devil's Cup,American Pale Ale (APA),177,12.0
2,2,0.071,,2264,Rise of the Phoenix,American IPA,177,12.0
3,3,0.09,,2263,Sinister,American Double / Imperial IPA,177,12.0
4,4,0.075,,2262,Sex and Candy,American IPA,177,12.0


In [3]:
breweries.head()

Unnamed: 0.1,Unnamed: 0,name,city,state
0,0,NorthGate Brewing,Minneapolis,MN
1,1,Against the Grain Brewery,Louisville,KY
2,2,Jack's Abby Craft Lagers,Framingham,MA
3,3,Mike Hess Brewing Company,San Diego,CA
4,4,Fort Point Beer Company,San Francisco,CA


In [4]:
# remove redundant column, rename columns for clarity
beers = beers.drop('Unnamed: 0',axis=1)
breweries = breweries.rename(columns = {'Unnamed: 0': 'brewery_id', 'name': 'brewery_name'})

# merge dataframes, remove NaN values, make ABV more readable
data = pd.merge(beers,breweries,on='brewery_id',how='inner')
data = data[np.isfinite(data['ibu'])]
data['abv'] = data['abv']*100

In [5]:
data.head()

Unnamed: 0,abv,ibu,id,name,style,brewery_id,ounces,brewery_name,city,state
14,6.1,60.0,1979,Bitter Bitch,American Pale Ale (APA),177,12.0,18th Street Brewery,Gary,IN
21,9.9,92.0,1036,Lower De Boom,American Barleywine,368,8.4,21st Amendment Brewery,San Francisco,CA
22,7.9,45.0,1024,Fireside Chat,Winter Warmer,368,12.0,21st Amendment Brewery,San Francisco,CA
24,4.4,42.0,876,Bitter American,American Pale Ale (APA),368,12.0,21st Amendment Brewery,San Francisco,CA
25,4.9,17.0,802,Hell or High Watermelon Wheat (2009),Fruit / Vegetable Beer,368,12.0,21st Amendment Brewery,San Francisco,CA


In [6]:
data['state'].unique()

array([' IN', ' CA', ' FL', ' MO', ' WA', ' CO', ' LA', ' KY', ' OR',
       ' AK', ' NC', ' MI', ' TX', ' AL', ' MA', ' AZ', ' MN', ' ME',
       ' VA', ' IL', ' TN', ' MT', ' WY', ' NE', ' NY', ' NJ', ' NV',
       ' OK', ' WI', ' OH', ' GA', ' RI', ' IA', ' ID', ' DC', ' KS',
       ' ND', ' VT', ' MD', ' WV', ' CT', ' PA', ' HI', ' NM', ' MS',
       ' AR', ' SC', ' DE', ' UT', ' NH'], dtype=object)

In [7]:
# remove space in front of state names
data['state'] = data['state'].str.slice(1,3)

In [8]:
ca = data[data['state'] == 'CA']
ca.shape

(135, 10)

In [9]:
ca['brewery_name'].value_counts()

21st Amendment Brewery               17
Golden Road Brewing                  14
Anderson Valley Brewing Company      14
Modern Times Beer                     8
TailGate Beer                         7
Mike Hess Brewing Company             6
Sierra Nevada Brewing Company         6
Manzanita Brewing Company             5
Ruhstaller Beer Company               5
Black Market Brewing Company          4
Ballast Point Brewing Company         4
Fort Point Beer Company               4
Central Coast Brewing Company         4
Saint Archer Brewery                  4
Hess Brewing Company                  3
Mission Brewery                       3
Devil's Canyon Brewery                3
The Dudes' Brewing Company            3
Firestone Walker Brewing Company      3
Headlands Brewing Company             3
Mavericks Beer Company                3
Hangar 24 Craft Brewery               2
Figueroa Mountain Brewing Company     2
Butcher's Brewing                     1
Mother Earth Brew Company             1


In [10]:
ca['city'].value_counts()

San Diego              35
San Francisco          22
Boonville              14
Los Angeles            14
Chico                   6
Temecula                5
Santee                  5
Sacramento              5
San Luis Obispo         4
Torrance                3
Paso Robles             3
Half Moon Bay           3
Belmont                 3
Mill Valley             3
Redlands                2
Santa Cruz              2
Buellton                2
Claremont               1
Vista                   1
South San Francisco     1
Carlsbad                1
Name: city, dtype: int64

In [11]:
from geopy.geocoders import Nominatim
geolocator = Nominatim()

# make it easier for geopy to find coordinates
ca['city-state'] = ca['city']+", "+ca['state']
cities = ca['city-state'].unique()

# create dictionaries of latitudes and longitudes
lats = dict(zip(cities, pd.Series(cities).apply(geolocator.geocode).apply(lambda x: x.latitude)))
longs = dict(zip(cities, pd.Series(cities).apply(geolocator.geocode).apply(lambda x: x.longitude)))

ca['latitude'] = ca['city-state'].map(lats)
ca['longitude'] = ca['city-state'].map(longs)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [12]:
ca.head()

Unnamed: 0,abv,ibu,id,name,style,brewery_id,ounces,brewery_name,city,state,city-state,latitude,longitude
21,9.9,92.0,1036,Lower De Boom,American Barleywine,368,8.4,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.779281,-122.419236
22,7.9,45.0,1024,Fireside Chat,Winter Warmer,368,12.0,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.779281,-122.419236
24,4.4,42.0,876,Bitter American,American Pale Ale (APA),368,12.0,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.779281,-122.419236
25,4.9,17.0,802,Hell or High Watermelon Wheat (2009),Fruit / Vegetable Beer,368,12.0,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.779281,-122.419236
26,4.9,17.0,801,Hell or High Watermelon Wheat (2009),Fruit / Vegetable Beer,368,12.0,21st Amendment Brewery,San Francisco,CA,"San Francisco, CA",37.779281,-122.419236


In [13]:
ca['norcal'] = ca['latitude'].apply(lambda x: x > 35.791111)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [14]:
ca['norcal'].value_counts()

False    76
True     59
Name: norcal, dtype: int64

In [15]:
ca[ca['norcal'] == True].value_counts()

AttributeError: 'DataFrame' object has no attribute 'value_counts'

In [16]:
ca[ca['norcal'] == True]['brewery_name'].value_counts()

21st Amendment Brewery             17
Anderson Valley Brewing Company    14
Sierra Nevada Brewing Company       6
Ruhstaller Beer Company             5
Fort Point Beer Company             4
Devil's Canyon Brewery              3
Mavericks Beer Company              3
Headlands Brewing Company           3
Speakasy Ales & Lagers              1
Seabright Brewery                   1
Armstrong Brewing Company           1
Santa Cruz Mountain Brewing         1
Name: brewery_name, dtype: int64

In [17]:
ca[ca['norcal'] == False]['brewery_name'].value_counts()

Golden Road Brewing                  14
Modern Times Beer                     8
TailGate Beer                         7
Mike Hess Brewing Company             6
Manzanita Brewing Company             5
Ballast Point Brewing Company         4
Saint Archer Brewery                  4
Central Coast Brewing Company         4
Black Market Brewing Company          4
Hess Brewing Company                  3
Firestone Walker Brewing Company      3
Mission Brewery                       3
The Dudes' Brewing Company            3
Hangar 24 Craft Brewery               2
Figueroa Mountain Brewing Company     2
Mother Earth Brew Company             1
Refuge Brewery                        1
Butcher's Brewing                     1
Claremont Craft Ales                  1
Name: brewery_name, dtype: int64

In [18]:
vt = data[data['state'] == 'VT']

In [19]:
vt['brewery_name'].value_counts()

Long Trail Brewing Company    5
Magic Hat Brewing Company     3
Fiddlehead Brewing Company    2
The Alchemist                 2
Otter Creek Brewing           2
Lost Nation Brewing           2
The Just Beer Project         1
Name: brewery_name, dtype: int64

In [20]:
vt['name'].value_counts()

Heady Topper                   2
#9                             2
Hodad Porter                   1
Overgrown American Pale Ale    1
Elder Betty                    1
Gose                           1
Fresh Slice White IPA          1
Vermont Pilsner                1
Long Trail Ale                 1
Second Fiddle                  1
Just IPA                       1
Blackbeary Wheat               1
Double Bag                     1
Long Trail Ale (1)             1
Long Trail IPA                 1
Name: name, dtype: int64

In [21]:
vt

Unnamed: 0,abv,ibu,id,name,style,brewery_id,ounces,brewery_name,city,state
789,8.2,80.0,1929,Second Fiddle,American Double / Imperial IPA,172,16.0,Fiddlehead Brewing Company,Shelburne,VT
790,5.5,30.0,1858,Hodad Porter,American Porter,172,16.0,Fiddlehead Brewing Company,Shelburne,VT
1218,5.9,42.0,1926,Long Trail IPA,English India Pale Ale (IPA),268,12.0,Long Trail Brewing Company,Bridgewater Corners,VT
1219,4.6,30.0,1924,Long Trail Ale,American Amber / Red Ale,268,12.0,Long Trail Brewing Company,Bridgewater Corners,VT
1220,7.2,33.0,1090,Double Bag,Altbier,268,16.0,Long Trail Brewing Company,Bridgewater Corners,VT
1221,4.0,8.0,574,Blackbeary Wheat,Fruit / Vegetable Beer,268,12.0,Long Trail Brewing Company,Bridgewater Corners,VT
1222,4.6,30.0,573,Long Trail Ale (1),Altbier,268,12.0,Long Trail Brewing Company,Bridgewater Corners,VT
1223,4.6,8.0,2584,Gose,Gose,41,16.0,Lost Nation Brewing,East Fairfield,VT
1224,4.8,20.0,2583,Vermont Pilsner,German Pilsener,41,16.0,Lost Nation Brewing,East Fairfield,VT
1252,5.1,20.0,1813,#9,Fruit / Vegetable Beer,303,16.0,Magic Hat Brewing Company,South Burlington,VT


In [22]:
ca.to_csv("ca-data.csv")