In [43]:
import pandas as pd 
import spacy
nlp = spacy.load('en_core_web_md')

In [None]:
# Input File Name
inputfile = 'CDE.bagofwords.csv'

# Output File Name
outputfile = 'CDE.bagofwords.csv'

# Translated Column Messages
messagecolumn = 'Product Review'

# Import Data
data = pd.read_csv(inputfile)

# User Attributes
userattributes = ['thick', 'rich', 'bodied']

In [47]:
# Joining the Attributes
text1 = ' '.join(userattributes)
doc1 = nlp(text1)

# Initializing the Lists
spacy_scores = []

# Calculating Similarity
for text2 in data[messagecolumn]:
    doc2 = nlp(text2)
    spacy_scores.append(doc1.similarity(doc2))

# Saving Results to DataFrame
data['Spacy Similarity'] = spacy_scores

# Outputting CSV
data.to_csv(outputfile, index = False)

# Inputting Recommendations
recommendations = pd.read_csv("E.beer_recommendations.csv")
recommendation = recommendations['Product Name'][0:3].values.tolist()

In [80]:
# Percent of Each Beer That Contains the Attributes
percent_TFIDF = []
percent_spacy = []

for name in recommendation:
    total_count = int(data[data['Product Name'] == name]['Product Name'].count())
    non_zero_TFIDF = int((data[data['Product Name'] == name]['Cosine Similarity TFIDF'] != 0).sum())
    non_zero_spacy = int((data[data['Product Name'] == name]['Spacy Similarity'] != 0).sum())
    percent_TFIDF.append((non_zero_TFIDF / total_count) * 100)
    percent_spacy.append((non_zero_spacy / total_count) * 100)

In [82]:
# Comparing Results 
comparison = pd.DataFrame()
comparison['Product Name'] = recommendation
comparison['Bag of Words Similarity'] = percent_TFIDF
comparison['Vector Similarity'] = percent_spacy
comparison

Unnamed: 0,Product Name,Bag of Words Similarity,Vector Similarity
0,Superstition Grand Cru Berry - F.O. Barrel Age...,27.906977,100.0
1,B. Nektar Ken Schramm Signature Series - The H...,28.0,100.0
2,Toppling Goliath SR-71 Blackbird (2015 Bottlin...,42.696629,100.0


In [84]:
# Finding New Evaluation Scores Using Spacy
# Calculate Evaluation Score Function
def evaluation(cosine_sim, sentiment):
    try:
        norm_sentiment = (float(sentiment) + 1) / 2
        return ((cosine_sim * 0.8) + (norm_sentiment * 0.2))
    except Exception as e:
        return(0)
# Import Data
cosine_sim = data['Spacy Similarity']
sentiment = data['Sentiment Scores']

# Calculation Evaluation Score
evaluation_list = []
for i in range(len(data)):
    evaluation_list.append(evaluation(cosine_sim[i], sentiment[i]))

# Save Evaluation Score 
data['Spacy Evaluation Score'] = evaluation_list

# Aggregate Evaluation Scores per Beer
beers = data['Product Name'].drop_duplicates()
beer_score = data[['Product Name', 'Spacy Evaluation Score']].groupby('Product Name').mean('Spacy Evaluation Score')
beer_score = beer_score.sort_values('Spacy Evaluation Score', ascending = False)

# Output to CSV
data.to_csv(outputfile, index = False)

beer_score.to_csv('F.new_recommendations.csv')

In [85]:
# New Recommendations
pd.DataFrame(beer_score.index[0:3])

Unnamed: 0,Product Name
0,Anchorage A Deal With The Devil - Double Oaked...
1,Sahtipaja MeadMe Batch #2 - Bourbon Vanilla🇸🇪M...
2,Superstition Grand Cru Berry - F.O. Barrel Age...
