### Spacy Similarity

Run the following line in your terminal AFTER you install spacy
<br />
`python -m spacy download en_core_web_lg`

In [1]:
import pandas as pd
import numpy as np
import spacy

nlp = spacy.load("en_core_web_lg")
STOPWORDS = nlp.Defaults.stop_words

In [2]:
df = pd.read_csv('output.csv')
df.head(10)

Unnamed: 0,beer,look,smell,taste,feel,overall,comment
0,Kentucky Brunch Brand Stout,4.75,4.75,4.75,4.75,5.0,"2016 Silver Wax. Aroma has whiskey, maple, tof..."
1,Kentucky Brunch Brand Stout,4.5,4.75,4.75,4.75,4.75,The beer pours Pitch Black with a frothy tan h...
2,Kentucky Brunch Brand Stout,4.75,4.75,4.5,5.0,4.75,Probably the smoothest beer I have ever had. S...
3,Kentucky Brunch Brand Stout,5.0,5.0,5.0,5.0,5.0,"Dark black, very thick, a little bit of tan he..."
4,Kentucky Brunch Brand Stout,4.5,5.0,5.0,5.0,5.0,Poured black as ink with thin ruby edges at 58...
5,Kentucky Brunch Brand Stout,5.0,5.0,4.5,5.0,4.75,she’s got heat....but man is she somethin nice...
6,Kentucky Brunch Brand Stout,5.0,5.0,5.0,5.0,5.0,Amazing brew. The maple aroma pours out of thi...
7,Kentucky Brunch Brand Stout,5.0,5.0,3.75,4.5,4.25,Finally got to try the white whale. Pours like...
8,Kentucky Brunch Brand Stout,5.0,5.0,4.75,5.0,4.75,Had this on tap at the 12-15-18 release in Dec...
9,Kentucky Brunch Brand Stout,4.75,5.0,4.75,5.0,5.0,Had this on tap At the KBBS release 12/15/18 p...


In [41]:
attributes = ['crisp', 'balanced', 'complex']
attstr = ' '.join(attributes)

In [42]:
# ignore stopwords, punctuation and pronouns
def process_text(text):
    doc = nlp(text.lower())
    result = []
    for token in doc:
        if token.text in nlp.Defaults.stop_words:
            continue
        if token.is_punct:
            continue
        if token.lemma_ == '-PRON-':
            continue
        result.append(token.lemma_)
    return ' '.join(result)

In [43]:
def calculate_similarity(text):
    base = nlp(attstr)
    compare = nlp(process_text(text))
    return base.similarity(compare)

In [107]:
sims = []

for i, r in df.head(6000).iterrows():
    s = calculate_similarity(r.comment)
    d = {
        'product_name': r.beer,
        'product_review': r.comment,
        'user_rating': r.overall,
        'similarity_score': s
    }
    sims.append(d)

In [108]:
sims_df = pd.DataFrame(sims)
sims_df.sort_values(by=['similarity_score'], ascending=False, inplace=True)
sims_df[0:300]

Unnamed: 0,product_name,product_review,similarity_score,user_rating
671,Oude Geuze Vintage,This is a pinnacle of gooziness. Extremely com...,0.785470,5.00
5003,Haze,"A citrus bomb, but not overly citrusy. There i...",0.769093,4.50
2209,Sip Of Sunshine,y excellent DIPA. Somewhat pale orange color w...,0.762809,4.50
1434,Darkstar November,My favorite beer. So complex and nicely balanc...,0.762789,5.00
5963,Flora,"Flora is clean, crisp, light, and tart, with s...",0.762249,4.75
672,Nectarine Premiere,"L: Radiant shining hues of apricots, tangerine...",0.758755,4.75
763,Beatification,"Bottle. A: Golden color, totally clear, with ...",0.753764,4.50
4137,Peche 'n Brett,This is exceptional and yet so simple. Fresh p...,0.753475,4.50
3531,Nelson,"Deep yellow color, moderate clarity. Bright wh...",0.753122,4.25
543,Dorothy (Wine Barrel Aged),"Yeasty, fairly dry, and a reasonable barrel pr...",0.752609,4.25


In [109]:
sims_df.to_csv('similarity_score.csv', index=None)

In [22]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()

def sentiment_analyzer_scores(sentence):
    score = analyser.polarity_scores(sentence)
    return score['compound']

In [23]:
df_sims_sentiment = pd.read_csv('similarity_score.csv')
df_sims_sentiment.head()
df_sims_sentiment['Sentiment Score'] = df_sentiment['product_review'].map(lambda x: sentiment_analyzer_scores(x))

In [24]:
df_sims_sentiment.sort_values(by=['Sentiment Score'], ascending=False, inplace=True)
print(df_sims_sentiment.head())
df_sims_sentiment.to_csv('similarity_score&sentiment_score.csv', index=None)

                                           product_name  \
2176                                      Beatification   
5400  BBADL (Bourbon Barrel Aged Dark Lord Imperial ...   
40                                       Peche 'n Brett   
389                                            Sure Bet   
1040                                 Oude Geuze Vintage   

                                         product_review  similarity_score  \
2176  Batch pH1  Pours a bright, peachy, warm golden...          0.669302   
5400  I promised not to review anything from Dark Lo...          0.586601   
40    E: Beeswax coated pry-off crown cap. Purchased...          0.736755   
389   Currently #175 in the Top 250 beers list and #...          0.706466   
1040  Presentation: Managed to snaffle two bottles o...          0.688740   

      user_rating  Sentiment Score  
2176         5.00           0.9995  
5400         5.00           0.9994  
40           4.75           0.9994  
389          4.00           0.9993

In [28]:
df_sentiment_300 = pd.read_csv('similarity_score.csv')
df_sentiment_300 = df_sentiment_300[0:300]
print(df_sentiment_300.shape)
df_sentiment_300.head()

(300, 4)


Unnamed: 0,product_name,product_review,similarity_score,user_rating
0,Oude Geuze Vintage,This is a pinnacle of gooziness. Extremely com...,0.78547,5.0
1,Haze,"A citrus bomb, but not overly citrusy. There i...",0.769093,4.5
2,Sip Of Sunshine,y excellent DIPA. Somewhat pale orange color w...,0.762809,4.5
3,Darkstar November,My favorite beer. So complex and nicely balanc...,0.762789,5.0
4,Flora,"Flora is clean, crisp, light, and tart, with s...",0.762249,4.75


In [30]:
df_sentiment_300['Sentiment Score'] = df_sentiment_300['product_review'].map(lambda x: sentiment_analyzer_scores(x))
df_sentiment_300.sort_values(by=['Sentiment Score'], ascending=False, inplace=True)
df_sentiment_300.head()

Unnamed: 0,product_name,product_review,similarity_score,user_rating,Sentiment Score
40,Peche 'n Brett,E: Beeswax coated pry-off crown cap. Purchased...,0.736755,4.75,0.9994
160,Society & Solitude #4,"Enjoyed on tap at Armsby Abbey in Worcester, M...",0.718309,5.0,0.999
99,Sang Noir,750ml brown glass bottle with appealing label ...,0.724068,4.5,0.999
120,Green,My dad showed up today. He's on a modified car...,0.721891,4.75,0.999
83,Lou Pepe - Framboise,2008 vintage purchased in Brussels from Bier T...,0.726821,4.5,0.9984


In [31]:
df_sentiment_300.to_csv('taskD.csv', index=None)