In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets

%matplotlib inline

In [2]:
beer_df_clean = pd.read_csv('beer_df_clean.csv')
beer_df_clean

Unnamed: 0,beer_id,beer_name,beer_style,beer_abv,weighted_review,review_overall,review_aroma,review_appearance,review_palate,review_taste,review_profilename,brewery_name,brewery_type,city,state,longitude,latitude
0,64883,Cauldron DIPA,American Double / Imperial IPA,7.7,4.32,4.0,4.5,4.0,4.0,4.5,johnmichaelsen,Caldera Brewing Company,micro,Ashland,Oregon,-122.663374,42.183738
1,52159,Caldera Ginger Beer,Herbed / Spiced Beer,4.7,3.35,3.0,3.5,3.5,3.0,3.5,oline73,Caldera Brewing Company,micro,Ashland,Oregon,-122.663374,42.183738
2,52159,Caldera Ginger Beer,Herbed / Spiced Beer,4.7,3.75,3.5,3.5,3.5,4.0,4.0,Reidrover,Caldera Brewing Company,micro,Ashland,Oregon,-122.663374,42.183738
3,52159,Caldera Ginger Beer,Herbed / Spiced Beer,4.7,3.01,3.0,2.5,3.5,2.0,3.5,alpinebryant,Caldera Brewing Company,micro,Ashland,Oregon,-122.663374,42.183738
4,52159,Caldera Ginger Beer,Herbed / Spiced Beer,4.7,3.68,4.0,3.0,3.5,3.5,4.0,LordAdmNelson,Caldera Brewing Company,micro,Ashland,Oregon,-122.663374,42.183738
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
563512,58076,Amber Wave,American Amber / Red Ale,5.4,3.46,3.5,3.0,4.0,4.0,3.5,DoubleJ,Pacific Beach Ale House,brewpub,San Diego,California,-117.255265,32.794255
563513,58078,Shipwrecked Stout,American Stout,6.5,3.31,3.0,3.0,4.0,3.5,3.5,glid02,Pacific Beach Ale House,brewpub,San Diego,California,-117.255265,32.794255
563514,58078,Shipwrecked Stout,American Stout,6.5,3.75,3.5,3.5,3.5,4.0,4.0,DoubleJ,Pacific Beach Ale House,brewpub,San Diego,California,-117.255265,32.794255
563515,58385,Belgian Blonde Anniversary Ale,Belgian Pale Ale,6.5,3.63,4.0,3.5,4.0,3.5,3.5,glid02,Pacific Beach Ale House,brewpub,San Diego,California,-117.255265,32.794255


### Rankings

In [94]:
def f(category):
    plt.figure(figsize=(12,15))
    sns.set_theme(style='darkgrid')
    if category in ['beer_name', 'beer_style', 'brewery_name', 'brewery_type']:
        # filter out reviews with less than 10 reviews 
        df_filter = beer_df_clean[category].value_counts()
        df_filter = df_filter[df_filter>9].index
        top25_df = beer_df_clean[beer_df_clean[category].isin(df_filter)]
        # list of top 25
        top25_list = top25_df.groupby(category).mean().weighted_review.sort_values(ascending=False).head(25).index
        # plot
        sns.pointplot(data=top25_df, x='weighted_review', y=category, order=top25_list, capsize=0.2)
        plt.grid(True)
        plt.xlabel('Average Review Score', fontsize=16, weight='bold', labelpad=20)
        plt.yticks(fontsize=12.5)
        if category == 'beer_name':     
            plt.title('Top 25 Beers by Average Review Score', fontsize=18, weight='bold', pad=20)
            plt.ylabel('Beer Name', fontsize=16, weight='bold', labelpad=10)
        elif category == 'beer_style':
            plt.title('Top 25 Beer Styles by Average Review Score', fontsize=18, weight='bold', pad=20)
            plt.ylabel('Beer Style', fontsize=16, weight='bold', labelpad=10)
        elif category == 'brewery_name':
            plt.title('Top 25 Breweries by Average Review Score', fontsize=18, weight='bold', pad=20)
            plt.ylabel('Brewery Name', fontsize=16, weight='bold', labelpad=10)
        elif category == 'brewery_type':
            plt.title('Brewery Type by Average Review Score', fontsize=18, weight='bold', pad=20)
            plt.ylabel('Brewery Type', fontsize=16, weight='bold', labelpad=10)
    elif category == 'state':
        # descending order for the plot
        top_states = beer_df_clean.groupby('state').weighted_review.mean().sort_values(ascending=False).index
        # plot
        sns.pointplot(data=beer_df_clean, x='weighted_review', y='state', order=top_states, capsize=0.2)
        plt.grid(True)
        plt.title('States by Average Review Score', fontsize=18, weight='bold', pad=20)
        plt.ylabel('State', fontsize=16, weight='bold', labelpad=10)
        plt.xlabel('Average Review Score', fontsize=16, weight='bold', labelpad=20)
        plt.yticks(fontsize=12.5)
    elif category == 'city':
        # filter out cities with less than 10 reviews
        city_filter = beer_df_clean.city.value_counts()
        city_filter = city_filter[city_filter>9].index
        top_cities_df = beer_df_clean[beer_df_clean.city.isin(city_filter)]
        # top 25
        top25_cities = top_cities_df.groupby('city').mean().weighted_review.sort_values(ascending=False).head(25).index
        # plot
        sns.pointplot(data=top_cities_df, x='weighted_review', y='city', order=top25_cities, capsize=0.2)
        plt.grid(True)
        plt.title('Top 25 Cities by Average Review Score', fontsize=18, weight='bold', pad=20)
        plt.ylabel('City', fontsize=16, weight='bold', labelpad=10)
        plt.xlabel('Average Review Score', fontsize=16, weight='bold', labelpad=20)
        plt.yticks(fontsize=12.5)    
    ;

categories = {
    'Top Rated Beer':'beer_name', 
    'Top Rated Beer Styles':'beer_style',
    'Top Rated Breweries':'brewery_name',
    'Top Rated Brewery Types':'brewery_type',
    'Top Rated States':'state',
    'Top Rated Cities':'city'
}
widgets.interact(f, category=categories)

interactive(children=(Dropdown(description='category', options={'Top Rated Beer': 'beer_name', 'Top Rated Beer…

<function __main__.f(category)>

-------

### Geospatial Visualizations

In [100]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

top_states = pd.read_csv('state_data.csv')
top_cities = pd.read_csv('city_data.csv')

In [122]:
fig = make_subplots(
    rows=2, cols=1,
    specs=[[{'type':'choropleth'}], [{'type':'scattergeo'}]],
    subplot_titles=['Average Beer Rating by State<br>(Hover for more info)'],
    vertical_spacing=0.1)

# states
fig.add_trace(go.Choropleth(
    locations = top_states['state_code'],
    z = top_states['mean'],
    locationmode = 'USA-states',
    text = top_states['state']+'<br>'+'# reviews: '+ top_states['count'].astype(str),
    colorscale = 'Blues',
    colorbar_title = 'Average<br>Review<br>Score',
    colorbar = {'ticks':'outside'}
), row=1, col=1)

# cities
limits = [1,2,3,4]
colors = ['lightcyan', 'rgb(59,59,59)', 'cornflowerblue', 'rgb(241,105,19)']

# trace for dummy data, used to populate a 1-2 marker in the legend. 
df_1_2 = top_cities.query("city == 'Dummy row'")    
fig.add_trace(go.Scattergeo(
    name = '1 - 2',
    visible = 'legendonly',
    showlegend = True,
    lon = df_1_2['longitude'],
    lat = df_1_2['latitude'],
    marker = dict(
        color = 'lightgrey',
        line_color = 'rgb(40,40,40)',
        line_width = 0.5,
        size = 8)), row=2, col=1)

# city review data
for i in range(len(limits)):
    df_sub = top_cities[top_cities['mean'].between(i+1, i+2)]
    fig.add_trace(go.Scattergeo(
        locationmode = 'USA-states',
        lon = df_sub['longitude'],
        lat = df_sub['latitude'],
        text = df_sub['city']+'<br>'+'Review score: '+df_sub['mean'].astype(str)+'<br>'+'# reviews: '+df_sub['count'].astype(str),
        name = f'{i+1} - {i+2}',
        marker = dict(
            color = colors[i],
            opacity = 0.8,
            size = 8,
            line_color = 'rgb(40,40,40)',
            line_width = 0.5)), row=2, col=1)

fig.update_layout(
    geo = dict(
        scope = 'usa'
        projection = go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True,
        lakecolor='rgb(255, 255, 255)',
        **{'geo2_scope': 'usa' + np.arange(2,rows*cols+1).tolist()})
)

# fig.update_layout(
#         title = dict(
#             text = 'Average Beer Rating by City<br>(Click on legend to toggle ratings)',
#             xanchor = 'center',
#             x = 0.5),
#         showlegend = True,
#         legend = dict(
#             title = 'Review Score',
#             x = 0.92,
#             y = 0.52),
#         geo = dict(
#             scope = 'usa',
#             landcolor = 'rgb(217,217,217)'))