In [202]:
import plotly.plotly
import plotly.graph_objs as go
from IPython.display import IFrame 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
plotly.offline.init_notebook_mode(connected=True)
import numpy as np
import pandas as pd
import datetime
from plotly import tools

In [73]:
# Retrieve datasets
reviews = pd.read_csv('data/beer_ratings.csv').drop('Unnamed: 0',axis = 1).set_index('review_id')
reviews.posted = pd.to_datetime(reviews.posted)
beers = pd.read_csv('data/beer_info.csv').drop('Unnamed: 0',axis=1).set_index('beer_id')
family_lookup = pd.read_csv('data/beer_family_lookup.csv').drop('idx',axis=1)
beers = pd.merge(beers,family_lookup,on='beer_style')

In [254]:
### Show boxplots of the percentage of beers rated in each family by the top 10 reviewers
def boxplot_family():
    top10 = reviews.groupby('username').agg('count').sort_values('posted',ascending=False).index[:10].values
    df = reviews[reviews.username.isin(top10)].join(beers[['family','BAscore']],on='beer_id',how='left')
    df = df.groupby(by=['family','username']).agg({'posted':'count','score':'mean'})
    families = df.reset_index().family.unique()    
    data = []
    fig = tools.make_subplots(rows=2, cols=1)
    i=0
    for family in families[:10]:
        num_beers = len(beers.reset_index()[beers.family == family].index.values)
        trace1 = go.Box(
            y=df.loc[family]['posted'] / num_beers,
            name=family,
            xaxis='x1',
            yaxis='y1'
            
        )
        trace2 = go.Box(
            y=df.loc[family]['score'],
            name=family,
            xaxis='x1',
            yaxis='y2'
        )
        data.append(trace1)
        data.append(trace2)
        i=i+1
            
    layout = go.Layout(
        xaxis = dict(            
            anchor='x1',
            title='Beer Family',
            linewidth = 2,
            mirror = True
        ),
        yaxis1 = dict(
            domain=[0, 0.5],
            anchor='y1',
            title = 'Number of Reviews'
        ),
        
        yaxis2 = dict(
            domain=[0.5, 1],
            anchor='y2',
            title = 'Ratings'
        ),
        showlegend = False
        
    )
    fig = go.Figure(data=data,layout=layout)
    
    return plotly.offline.iplot(fig)
boxplot_family()

This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x2,y2 ]



In [194]:
df.loc['India Pale Ales']['posted'] / len(beers.reset_index()[beers.family == 'India Pale Ales'].index.values)

username
BEERchitect      0.291275
NeroFiddled      0.199316
StonedTrippin    0.369119
UCLABrewN84      0.265612
brentk56         0.349444
jlindros         0.279726
metter98         0.331908
superspak        0.326775
woodychandler    0.295124
zeff80           0.256630
Name: posted, dtype: float64

In [248]:
np.linspace(1,39,11)

array([ 1. ,  4.8,  8.6, 12.4, 16.2, 20. , 23.8, 27.6, 31.4, 35.2, 39. ])

In [200]:
styles

array(['Bocks', 'Brown Ales', 'Dark Ales', 'Dark Lagers', 'Hybrid Beers',
       'India Pale Ales', 'Pale Ales', 'Pilseners and Pale Lagers',
       'Porters', 'Specialty Beers', 'Stouts', 'Strong Ales',
       'Wheat Beers', 'Wild/Sour Beers'], dtype=object)

In [10]:
beers.head(2)

Unnamed: 0_level_0,BAscore,abv,beer_img,beer_name,beer_style,brewery,desc,num_reviews,ranking
beer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,4.35,7.0,https://cdn.beeradvocate.com/im/beers/141544.jpg,Yakima Fresh Hop,American IPA,Columbus Brewing Company,None provided.,29,1033
2,3.59,7.0,https://cdn.beeradvocate.com/im/beers/94770.jpg,Red Clay IPA,American IPA,Sunken City Brewing Company,None provided.,24,38065


In [1]:
## Plot counts per day
ts = reviews.reset_index().posted.value_counts().index
cts = reviews.reset_index().posted.value_counts().values
trace = go.Scatter(
    x = ts,
    y = cts,
    mode = 'markers'
)
review_counts_by_time = [trace]

layout = dict(title = 'Frequency of Reviews',
              yaxis = dict(
                  title = 'Count'),
              xaxis = dict(
                  title = 'Date'
              )
             )
fig = dict(data = review_counts_by_time, layout = layout)

NameError: name 'reviews' is not defined

In [59]:
reviews.head(3)

Unnamed: 0_level_0,beer_id,posted,ratings,score,username
review_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1,2014-11-07,[''],4.4,Swettenham_Shire
2,1,2018-11-08,"[3.75, 3.25, 4.5, 4.25, 4.25]",4.08,KurtisCarman
3,1,2018-10-24,"[4.0, 4.75, 4.5, 4.5, 4.5]",4.53,ChilliHeights


In [6]:
df = pd.DataFrame({'nr':beers.num_reviews,'ba':beers.BAscore}).sort_values('nr')
x = df.nr.values
y = df.ba.values

# calculate polynomial
z = np.polyfit(x, y, 3)
f = np.poly1d(z)
f_text = ' + '.join(['{:0.3e}X^{}'.format(f[x],x) for x in range(len(f)+1)])


# calculate new x's and y's
x_fit = np.linspace(x[0], x[-1], 50)
y_fit = f(x_fit)

trace1 = go.Scatter(
                  x=x,
                  y=y,
                  mode='markers',
                  marker=go.scatter.Marker(color='rgb(255, 127, 14)'),
                  name='Data'
                  )
trace2 = go.Scatter(
                  x=x_fit,
                  y=y_fit,
                  mode='lines',
                  marker=go.scatter.Marker(color='rgb(31, 119, 180)'),
                  name='Fit',
                  text ='3d-PolyFit:{}'.format(f_text),
                  textposition = 'bottom center',
                  textfont=dict(
                          family='sans serif',
                          size=18,
                          color='#ff7f0e'
                          )
                  )

layout = go.Layout(
                title='Number of Reviews v. Beer Advocate Score',
                plot_bgcolor='rgb(229, 229, 229)',
                xaxis = dict(
                    zerolinecolor='rgb(255,255,255)', 
                    gridcolor='rgb(255,255,255)'
                ),
                yaxis= dict(
                    zerolinecolor='rgb(255,255,255)', 
                    gridcolor='rgb(255,255,255)'
                ),
                showlegend=False
                )


data=[trace1,trace2]
fig = go.Figure(data=data,layout=layout)
#plotly.offline.iplot(fig)

In [55]:
from IPython.display import display

In [56]:
def f(x):
    return x
w = widgets.IntSlider(f, (min=1.0, max =5.0))
display(w)



IntSlider(value=0)

In [57]:
w.value

28