In [None]:
#Quick cell to make jupyter notebook use the full screen width
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# Enable autoreloading from src
%load_ext autoreload
%autoreload 2

In [None]:
#Some plotting libraries
import matplotlib.pyplot as plt
%matplotlib notebook
from bokeh.plotting import show, save, output_notebook, output_file
from bokeh.resources import INLINE 
output_notebook(resources=INLINE)

In [None]:
from src import workflow
from src.data import Dataset

### What if we wanted to only group beer by the users that liked them?

Are two beers similar if two reviewer tried them?  Perhaps not, instead lets filter to only the reviewers who enjoyed the beer.

Because this is talking about reviewers and not beer we need to filter our initial data frame and re-run our process.

In [None]:
workflow.available_datasets()

In [None]:
reviews_ds = Dataset.load('beer_review_all')
reviews = reviews_ds.data

Let's do some initial EDA

In [None]:
reviews.review_overall.value_counts().sort_index().plot.bar();

Wow, people like the beer that they try.  Or at least review. Given this plot, let's take a positive review to be 4.5 or higher. 

Again, we'll use groupby to aggregate in the desired fashion. We'll add it directly to `transformers.py` this time.

In [None]:
workflow.available_transformers()

In [None]:
from src.data.transformers import groupby_beer_to_reviewers

In [None]:
help(groupby_beer_to_reviewers)

In [None]:
beer_ds = groupby_beer_to_reviewers(reviews_ds)

In [None]:
beer = beer_ds.data

In [None]:
beer.head(3).T

Now let's add it to the workflow.

In [None]:
transformations=[
    ("groupby_beer_to_reviewers", {"positive_threshold": 4.5}),
]

In [None]:
workflow.add_transformer(input_dataset='beer_review_all',
                         transformations=transformations,
                         output_dataset="beer_by_reviewers")

In [None]:
workflow.make_data()

In [None]:
beer_ds = Dataset.load("beer_by_reviewers")
beer = beer_ds.data

In [None]:
beer.head(3).T

Time for the analysis. See [09-Analysis-Beer-by-Reviewer-Positive.ipynb](09-Analysis-Beer-by-Reviewer-Positive.ipynb)