# Craft Beer Data

In [1]:
# imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
%matplotlib inline

In [None]:
# getting the beer data from a csv file
df = pd.read_csv('beers.csv')

In [None]:
df.head() # check_yo_head

In [None]:
# checking on the tail of the data
df.tail()

In [None]:
# taking a look at the data info
df.info()

In [None]:
# taking a look at the shape of the data
df.shape

Initial Takeaways:

- So it looks like there are 8 columns 

- The unnamed column is a count of the data for both .csv files

- Some of the ibu has data and some don't

- I don't know what the id column is for

In [None]:
# prep

# df.drop(columns = 'Unnamed: 0', )

In [None]:
# reading the breweries csv file
df_1 = pd.read_csv('breweries.csv')

In [None]:
df_1.head() #check_yo_head

In [None]:
# using merge function by setting how='inner'
df = pd.merge(df, df_1, 
                   on='Unnamed: 0', 
                   how='inner')

In [None]:
df.head() #check_yo_head

In [None]:
# checking for nulls in the data
df.isna().sum()

In [None]:
# taking a look at the shape of the merged data
df.shape

In [None]:
# quick info on the merged data
df.info()

In [None]:
# renaming columns for readability
df = df.rename(columns = {"style": "beer_style", "name_y": "brewery", "Unnamed: 0": "number", "name_x": "beer"})

In [None]:
df.head() #check_yo_head

In [None]:
# messing around and dropping a .plot() on the data
df.plot()

In [None]:
# taking a look at the counts of each beer style
df.beer_style.value_counts()

In [None]:
# looking to see if the same brewery has multiple beers
df.brewery.value_counts()

In [None]:
# df.shape had 558 rows
# seeing how many different breweries there are
df.brewery.nunique()

In [None]:
# seeing how many unique beers there are
df.beer.nunique()

In [None]:
# looking at the amount of beer per state
df.state.value_counts()

In [None]:
# trying out a .plot() on the states
df.state.value_counts().plot()

In [None]:
state_df = df.state.value_counts().rename_axis('states').reset_index(name='breweries')
state_df.head(5)

In [None]:
# Visualization

fig, axes = plt.subplots(1,2, sharey=True, figsize=(10,5))
sns.set_theme(style="whitegrid")
fig.suptitle('Top 10 States and Bottom 10 States')

# Top 10 States
sns.barplot(ax=axes[0], x="states", y="breweries", data=state_df.head(10))
axes[0].set_title("Brewery Lovers")

# Bottom 10 States
sns.barplot(ax=axes[1], x="states", y="breweries", data=state_df.tail(10))
axes[1].set_title("Brewery Haters")

In [None]:
df.head()

In [None]:
top_beer = df.beer_style.value_counts().rename_axis('beer_type').reset_index(name='total')
top_beer.head()

In [None]:
sns.barplot(x='total', y='beer_type', data=top_beer.head(10))
plt.title('Top Flight Beers')


In [None]:
top10_beers = top_beer.head(10)

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.axis('equal')
labels = top10_beers['beer_type']
totals = top10_beers['total']
ax.pie(totals, labels = labels, autopct='%1.2f%%')
plt.title('Percent of Top 10 Beers Consumed')
plt.show()

In [2]:
# making a function

def beer_df():
    '''
    This function takes in the beer data and brewery data 
    and merges into one dataframe 
    and renames the columns for readbility
    and saves the new dataframe as a csv
    '''
    # getting the beers data
    df = pd.read_csv('beers.csv')
    # getting the breweries data
    df_1 = pd.read_csv('breweries.csv')
    # merging the two csv files together
    df = pd.merge(df, df_1, 
                   on='Unnamed: 0', 
                   how='inner')
    # renaming columns for readability
    df = df.rename(columns = {"style": "beer_style", "name_y": "brewery", "Unnamed: 0": "number", "name_x": "beer"})
    
    df.to_csv('beer_data.csv')
    
    return df

In [3]:
# using my beer_df function
df = beer_df()
df.head() # check_yo_head

Unnamed: 0,number,abv,ibu,id,beer,beer_style,brewery_id,ounces,brewery,city,state
0,0,0.05,,1436,Pub Beer,American Pale Lager,408,12.0,NorthGate Brewing,Minneapolis,MN
1,1,0.066,,2265,Devil's Cup,American Pale Ale (APA),177,12.0,Against the Grain Brewery,Louisville,KY
2,2,0.071,,2264,Rise of the Phoenix,American IPA,177,12.0,Jack's Abby Craft Lagers,Framingham,MA
3,3,0.09,,2263,Sinister,American Double / Imperial IPA,177,12.0,Mike Hess Brewing Company,San Diego,CA
4,4,0.075,,2262,Sex and Candy,American IPA,177,12.0,Fort Point Beer Company,San Francisco,CA


In [6]:
# states function

def beer_states():
    '''
    This function will take the count of each state in the data and 
    make a new dataframe with the states and number of breweries
    '''
    state_df = df.state.value_counts().rename_axis('states').reset_index(name='breweries')
    
    return state_df

In [7]:
beer_states()

Unnamed: 0,states,breweries
0,CO,47
1,CA,39
2,MI,32
3,OR,29
4,TX,28
5,PA,25
6,MA,23
7,WA,23
8,IN,22
9,WI,20
