# Craft Beer Analysis

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

breweries = pd.read_csv('./data/breweries.csv')
beer = pd.read_csv('./data/beers.csv')

In [2]:
def some_info(df):
    print("Some info about dataset\n") 
    # Shape
    print("This dataset has {} features.".format(df.shape[1]))
    print("This dataset has {} observations.\n".format(df.shape[0]))
    
    # Describe
    print("Describe:\n", df.describe(), "\n\n")
    
    # Missing values
    print("Missing values:\n", df.isna().sum(), "\n")
    
    # Data head
    print(df.head())
    
some_info(beer)
some_info(breweries)

Some info about dataset

This dataset has 8 features.
This dataset has 2410 observations.

Describe:
         Unnamed: 0          abv          ibu           id   brewery_id  \
count  2410.000000  2348.000000  1405.000000  2410.000000  2410.000000   
mean   1204.500000     0.059773    42.713167  1431.113278   231.749793   
std     695.851397     0.013542    25.954066   752.459975   157.685604   
min       0.000000     0.001000     4.000000     1.000000     0.000000   
25%     602.250000     0.050000    21.000000   808.250000    93.000000   
50%    1204.500000     0.056000    35.000000  1453.500000   205.000000   
75%    1806.750000     0.067000    64.000000  2075.750000   366.000000   
max    2409.000000     0.128000   138.000000  2692.000000   557.000000   

            ounces  
count  2410.000000  
mean     13.592241  
std       2.352204  
min       8.400000  
25%      12.000000  
50%      12.000000  
75%      16.000000  
max      32.000000   


Missing values:
 Unnamed: 0       0
abv

In [3]:
index = breweries[breweries.state == ' MI'].index
MI_beer = beer.set_index('brewery_id').loc[index]

In [4]:
sort_count = MI_beer.groupby('brewery_id').name.apply(lambda x: len(set(x))).sort_values(ascending=False)
top_3 = pd.merge(breweries[breweries.state == ' MI'].name, sort_count, left_index=True, right_index=True).sort_values(by='name_y', ascending=False)[:3]

In [5]:
%matplotlib notebook

plt.bar(top_3.name_x, top_3.name_y, color=['#ffbe5a', '#ffbe5a', '#ffbe5a'], width=0.8, edgecolor='black')

ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.set_yticks([])
ax.set_ylabel('Number of beers', fontsize=15, c='#3f3f3f')
ax.axis([-0.5, 2.5, 0, 65])
ax.set_xticks([])

plt.text(-0.1, 54, "62", fontsize=20, alpha=0.9, c='#ffffff')
plt.text(0.95, 2.5, "9", fontsize=20, alpha=0.9, c='#ffffff')
plt.text(1.93, 1.5, "7", fontsize=20, alpha=0.9, c='#ffffff')
plt.text(-0.3, -5, "Brewery Vivant", fontsize=10, c='#3f3f3f')
plt.text(0.7, -5, "Griffin Claw BC", fontsize=10, c='#3f3f3f')
plt.text(1.7, -5, "Keweenaw BC", fontsize=10, c='#3f3f3f')
plt.title("Craft Beer offered by breweries in MI", c='#3f3f3f')

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Craft Beer offered by breweries in MI')