In [None]:
import numpy as np
import pandas as pd

# Cosine Similarity

In [None]:
attributes = ['citrus', 'tropical', 'hops']

In [None]:
from collections import defaultdict
# represent a review as a vector of a given set of attributes
def word2vec(words, attributes):
  word_counts = defaultdict(int)
  for word in words:
    if word in attributes:
      word_counts[word] += 1
  result = np.array([word_counts[key] for key in attributes]).reshape(1, -1)
  return result

In [None]:
reviews = pd.read_csv('beer_reviews.csv')[['name', 'review']]

In [None]:
import re
# clean reviews
reviews['split_review'] = reviews.review.map(lambda x: re.findall('\w+', x.lower()))

In [None]:
#reduce dimensionality for cosine similarity to only key attributes
reviews['attr_vector'] = reviews.split_review.map(lambda x: word2vec(x, attributes))
reviews

Unnamed: 0,name,review,split_review,attr_vector
0,Kentucky Brunch Brand Stout,"So ok, on the real, I gave up a ton for a coup...","[so, ok, on, the, real, i, gave, up, a, ton, f...","[[0, 0, 0]]"
1,Kentucky Brunch Brand Stout,"Poured into a snifter glass, a dark chocolaty ...","[poured, into, a, snifter, glass, a, dark, cho...","[[0, 0, 0]]"
2,Kentucky Brunch Brand Stout,If there's any beer that deserves the rating o...,"[if, there, s, any, beer, that, deserves, the,...","[[0, 0, 0]]"
3,Kentucky Brunch Brand Stout,This brew has been on my bucket list for a whi...,"[this, brew, has, been, on, my, bucket, list, ...","[[0, 0, 0]]"
4,Kentucky Brunch Brand Stout,I didnt think i was going to give it a perfect...,"[i, didnt, think, i, was, going, to, give, it,...","[[0, 0, 0]]"
...,...,...,...,...
5548,Stickee Monkee,"Look: Dark, not too thick.","[look, dark, not, too, thick]","[[0, 0, 0]]"
5549,Stickee Monkee,Great quad!,"[great, quad]","[[0, 0, 0]]"
5550,Stickee Monkee,"look dark amber, little head, short lived cinn...","[look, dark, amber, little, head, short, lived...","[[0, 0, 0]]"
5551,Stickee Monkee,I've had the delight of both the 2014 and 2015...,"[i, ve, had, the, delight, of, both, the, 2014...","[[0, 0, 0]]"


In [None]:
attr_vector = word2vec(attributes, attributes)


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
reviews['cos_similarity'] = reviews.attr_vector.map(lambda x: cosine_similarity(x, attr_vector)[0][0])
reviews

Unnamed: 0,name,review,split_review,attr_vector,cos_similarity
0,Kentucky Brunch Brand Stout,"So ok, on the real, I gave up a ton for a coup...","[so, ok, on, the, real, i, gave, up, a, ton, f...","[[0, 0, 0]]",0.0
1,Kentucky Brunch Brand Stout,"Poured into a snifter glass, a dark chocolaty ...","[poured, into, a, snifter, glass, a, dark, cho...","[[0, 0, 0]]",0.0
2,Kentucky Brunch Brand Stout,If there's any beer that deserves the rating o...,"[if, there, s, any, beer, that, deserves, the,...","[[0, 0, 0]]",0.0
3,Kentucky Brunch Brand Stout,This brew has been on my bucket list for a whi...,"[this, brew, has, been, on, my, bucket, list, ...","[[0, 0, 0]]",0.0
4,Kentucky Brunch Brand Stout,I didnt think i was going to give it a perfect...,"[i, didnt, think, i, was, going, to, give, it,...","[[0, 0, 0]]",0.0
...,...,...,...,...,...
5548,Stickee Monkee,"Look: Dark, not too thick.","[look, dark, not, too, thick]","[[0, 0, 0]]",0.0
5549,Stickee Monkee,Great quad!,"[great, quad]","[[0, 0, 0]]",0.0
5550,Stickee Monkee,"look dark amber, little head, short lived cinn...","[look, dark, amber, little, head, short, lived...","[[0, 0, 0]]",0.0
5551,Stickee Monkee,I've had the delight of both the 2014 and 2015...,"[i, ve, had, the, delight, of, both, the, 2014...","[[0, 0, 0]]",0.0


In [None]:
reviews.sort_values(by='cos_similarity', inplace=True, ascending=False)
output = reviews[['name', 'review', 'cos_similarity']]
output.to_csv('cos_similarity_recommendations.csv')

# Sentiment Analysis

In [None]:
!pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [None]:
vader = SentimentIntensityAnalyzer()
# add context
newWords = {'bitter': 0.75, 'hoppy': 1, 'opaque': 1, 'sour': 1.5, 'boozy': -2, 'alcohol': -2}
vader.lexicon.update(newWords)

output['sentiment'] = output.review.map(lambda x: vader.polarity_scores(x)['compound'])
output.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output['sentiment'] = output.review.map(lambda x: vader.polarity_scores(x)['compound'])


Unnamed: 0,name,review,cos_similarity,sentiment
1753,Haze,Received a few cans in a local trade. Poured i...,1.0,0.9554
3357,Double Dry Hopped Double Mosaic Daydream,Beautiful mango juice color. Enticing mosaic a...,1.0,0.9705
2279,4th Anniversary,"Brewed with pilsner malt, oats and wheat; quad...",1.0,0.2083
3119,Cutting Tiles - Mosaic,Wow. JUST WOW. Pours a cloudy yellow with hazy...,1.0,0.9784
721,Very Green,Dank hops & orange rind. Tropical notes are do...,1.0,0.8658


In [None]:
# evenly weighed recommendation score
output['recommendation_score'] = output.cos_similarity * output.sentiment
output_rec = output[['name', 'cos_similarity', 'sentiment', 'recommendation_score']].groupby(by=['name']).mean()
output_rec.sort_values(by='recommendation_score', ascending=False, inplace=True)

In [None]:
output_rec.head()

Unnamed: 0_level_0,cos_similarity,sentiment,recommendation_score
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Society & Solitude #5,0.3084,0.50895,0.218555
Society & Solitude #6,0.224336,0.535467,0.209314
Sosus,0.222058,0.650077,0.202903
Triple Sunshine,0.244893,0.654057,0.194617
Cutting Tiles - Mosaic,0.221496,0.753738,0.190344


# Our Recommendation
Based on your preferences for a citrus-y, tropical, hoppy beer, we have three brews for you to try out. All three should be somewhere in the ballpark given your exact tasting notes and are as follows:
1. "Society & Solitude #5"
2. "Society & Solitude #6"
3. "Sosus"

Keep in mind that using our current recommendation model is based on exact keywords and will yield potentially very different results if you use similar characteristics to the ones given.