In [1]:
import numpy as np
import pandas as pd
from nltk.corpus import stopwords


In [2]:
import re
import math

In [3]:
from collections import Counter

In [4]:
import operator

In [5]:
def stop_clean(text):
    text = text.split()
    text_keys = [word for word in city if word not in stopwords.words('english')]
    merge_text = " ".join(text_keys)
    return merge_text

In [6]:
df = pd.read_csv('df_1000.csv')

In [69]:
df_full = pd.read_csv('df_2000.csv')

In [7]:
df['review_clean']

0        the prince sure is an arrogant man people in m...
1        jaxon you sly dog you winks i sure as hell wil...
2        audrey i didnt like her she was a total bimbo ...
3        i must say jagger has some swag when it comes ...
4        xaviers words of romance are quite naughty i k...
                               ...                        
27060         good story worth the read  really enjoyed it
27061    the book is the best a great love story and ca...
27062    there has been a lot of buzz about this story ...
27063    it is not often that the word joyful can be us...
27064    i have read many of marquita valentines books ...
Name: review_clean, Length: 27065, dtype: object

In [8]:
stop = set(stopwords.words("english")) #setting stopwords

In [9]:
# taking only words which are not stopwords
df['word_without_stop'] = df['review_clean'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))

In [71]:
# taking only words which are not stopwords
df_full['word_without_stop'] = df_full['review_clean'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))


In [10]:
def cosine_similarity_of(text1, text2):
        #get words first
        first = re.compile(r"[\w']+").findall(text1)
        second = re.compile(r"[\w']+").findall(text2)

        #get dictionary with each word and count.
        vector1 = Counter(first)
        vector2 = Counter(second)

        #convert vectors to set to find common words as intersection
        common = set(vector1.keys()).intersection(set(vector2.keys()))

        dot_product = 0.0

        for i in common:
            #get amount of each common word for both vectors and multiply them then add them together.
            dot_product += vector1[i] * vector2[i]

        squared_sum_vector1 = 0.0
        squared_sum_vector2 = 0.0

        #get squared sum values of word counts from each vector.
        for i in vector1.keys():
            squared_sum_vector1 += vector1[i]**2

        for i in vector2.keys():
            squared_sum_vector2 += vector2[i]**2

        #calculate magnitude with squared sums.
        magnitude = math.sqrt(squared_sum_vector1) * math.sqrt(squared_sum_vector2)

        if not magnitude:
            return 0.0
        else:
            return float(dot_product) / magnitude

In [11]:
text1 = df['word_without_stop'][1]

In [12]:
text2 = df['word_without_stop'][2]

In [13]:
cosine_similarity_of(text1, text2)

0.13888386569176292

In [50]:
def get_recommendations(df,keywords):

        score_dict = {}

        for index, row in df.iterrows():
            score_dict[index] = cosine_similarity_of(row['word_without_stop'], keywords)

        #sort cities by score and index.
        sorted_scores = sorted(score_dict.items(), key=operator.itemgetter(1), reverse=True)

        counter = 0

        #create an empty results data frame.
        resultDF = pd.DataFrame(columns=('reviewerID', 'asin', 'title','word_without_stop', 'score'))

        #get highest scored 5 cities.
        for i in sorted_scores:
            #print index and score of the city.
            #print(i[0], i[1])
            resultDF = resultDF.append({'reviewerID': df.iloc[i[0]]['reviewerID'], 
                                        'asin': df.iloc[i[0]]['asin'], 'title': df.iloc[i[0]]['title'],
                                        'word_without_stop': df.iloc[i[0]]['word_without_stop'], 
                                        'score': i[1]}, ignore_index=True)
            counter += 1

            if counter>9:
                break

#         #convert DF to json.
#         json_result = json.dumps(resultDF.to_dict('records'))
        return resultDF

In [102]:
keywords = "best book ever"

In [103]:
top_5= get_recommendations(df,keywords)
top_5

Unnamed: 0,reviewerID,asin,title,word_without_stop,score
0,A1PIHRVCTCJQFL,B018RKHYMQ,The Stocking Was Hung (The Holidays #1) - Kind...,great christmas best years day picked book bes...,0.544331
1,A3CMIEYL0TJLC2,B01D9JB7DW,The Alien King&#39;s Baby: Sci-fi Alien Romanc...,best book wonderful time reading new author go...,0.522233
2,A1O2LQUYDVPOCJ,B00VPAHRLO,The Soccer Mom&#39;s Bad Boy - Kindle edition,wonderful always jordan silver best sexiest wr...,0.518476
3,A3CMIEYL0TJLC2,B01FIGE8YE,Beauty and the Blitz,good book far ready read next one soon best book,0.5
4,A3T5H007CVA99X,B00FR30D16,Her Best Match: A Clean Billionaire Romance (T...,saw book best romance book read like romance c...,0.5
5,A3PYB2CTKK93MG,B00EV9LSJI,Tears of Tess (Monsters in the Dark Book 1) - ...,book one best dark romances ever read love gen...,0.489898
6,A1L9WQBSQ5SEFE,B0066HBLUO,CAPTURING THE COWBOY&#39;S HEART - Kindle edition,loved book enjoyable read im going let go book...,0.473879
7,AZYERRDY2VW61,B00629ZTOU,Texas Secrets: The Gallaghers of Morning Star ...,love book super book,0.471405
8,A15EMPO8TCH2RB,B01D4RY4B6,You Don&#39;t Own Me: A Bad Boy Mafia Romance ...,book made cry laughed book great book get two ...,0.471405
9,A19LXU1TT72OKX,B01FRQ4Z8E,Dietrich (Bear Shifter Dating Agency Romance) ...,received arc honest review best book read writ...,0.471405


In [104]:
top_5= get_recommendations(df_full,keywords)

In [105]:
top_5

Unnamed: 0,reviewerID,asin,title,word_without_stop,score
0,A3CMIEYL0TJLC2,B015QBP4XU,Werewolf Romance: Seduced By The Alpha Wolf Bo...,best book ever read looking great werewolf boo...,0.666667
1,A3CMIEYL0TJLC2,B01B52FWAY,THAT MAN TRILOGY: A Sexy Romantic Comedy - Kin...,finally get read book best book,0.612372
2,A1PIHRVCTCJQFL,B018RKHYMQ,The Stocking Was Hung (The Holidays #1) - Kind...,great christmas best years day picked book bes...,0.544331
3,A2EZZ9UTFNNKRE,B00ZO50HZO,Hot &amp; Bothered (A Hostile Operations Team ...,oh gosh best series ever loved book much rest ...,0.544331
4,A3CMIEYL0TJLC2,B01D9JB7DW,The Alien King&#39;s Baby: Sci-fi Alien Romanc...,best book wonderful time reading new author go...,0.522233
5,A1O2LQUYDVPOCJ,B00VPAHRLO,The Soccer Mom&#39;s Bad Boy - Kindle edition,wonderful always jordan silver best sexiest wr...,0.518476
6,A3CMIEYL0TJLC2,B01FIGE8YE,Beauty and the Blitz,good book far ready read next one soon best book,0.5
7,A3T5H007CVA99X,B00FR30D16,Her Best Match: A Clean Billionaire Romance (T...,saw book best romance book read like romance c...,0.5
8,A3MQ4814P5LBTG,B018PPWK0E,The Way Back To Me (The Way Book 1) - Kindle e...,cant find words days completing book still can...,0.495074
9,A30TZBEYTQK723,B01A1E8WO0,Big Rock (Big Rock Book 1) - Kindle edition,book ok wasnt originality book like book invol...,0.493264


# Rating Effect Added

In [18]:
def get_rating_weight(rating, q):
#         if rating > 5 or rating < 0:
#             return None
#         else:
        m = (2*q) / 5 #10 because rating varies between 0 and 10
        b = -q
        return (m*rating) + b

In [19]:
def calculate_final_score(cs, r):
        amount = (cs / 100) * r

        return cs + amount

In [54]:
def get_recommendations_rat(df,keywords):

        score_dict = {}

        for index, row in df.iterrows():
            cs_score = cosine_similarity_of(row['word_without_stop'], keywords)

            rating = row['overall']
            
            rating_contribution = get_rating_weight(rating,10)

            final_score = calculate_final_score(cs_score, rating_contribution)

            score_dict[index] = final_score
            
        #sort cities by score and index.
        sorted_scores = sorted(score_dict.items(), key=operator.itemgetter(1), reverse=True)

        counter = 0

        #create an empty results data frame.
        resultDF = pd.DataFrame(columns=('reviewerID', 'asin','title','word_without_stop','overall', 'score'))

        #get highest scored 5 cities.
        for i in sorted_scores:
            #print index and score of the city.
            #print(i[0], i[1])
            resultDF = resultDF.append({'reviewerID': df.iloc[i[0]]['reviewerID'], 'asin': df.iloc[i[0]]['asin'],
                                        'title': df.iloc[i[0]]['title'],
                                        'word_without_stop': df.iloc[i[0]]['word_without_stop'],
                                        'overall': df.iloc[i[0]]['overall'],
                                        'score': i[1]}, ignore_index=True)
            counter += 1

            if counter>9:
                break

#         #convert DF to json.
#         json_result = json.dumps(resultDF.to_dict('records'))
        return resultDF

In [55]:
keywords = "best book ever"

In [76]:
top_5= get_recommendations_rat(df,keywords)

In [75]:
top_5= get_recommendations_rat(df_full,keywords)
top_5

Unnamed: 0,reviewerID,asin,title,word_without_stop,overall,score
0,A3CMIEYL0TJLC2,B015QBP4XU,Werewolf Romance: Seduced By The Alpha Wolf Bo...,best book ever read looking great werewolf boo...,5,0.733333
1,A3CMIEYL0TJLC2,B01B52FWAY,THAT MAN TRILOGY: A Sexy Romantic Comedy - Kin...,finally get read book best book,5,0.67361
2,A1PIHRVCTCJQFL,B018RKHYMQ,The Stocking Was Hung (The Holidays #1) - Kind...,great christmas best years day picked book bes...,5,0.598764
3,A2EZZ9UTFNNKRE,B00ZO50HZO,Hot &amp; Bothered (A Hostile Operations Team ...,oh gosh best series ever loved book much rest ...,5,0.598764
4,A3CMIEYL0TJLC2,B01D9JB7DW,The Alien King&#39;s Baby: Sci-fi Alien Romanc...,best book wonderful time reading new author go...,5,0.574456
5,A1O2LQUYDVPOCJ,B00VPAHRLO,The Soccer Mom&#39;s Bad Boy - Kindle edition,wonderful always jordan silver best sexiest wr...,5,0.570323
6,A3T5H007CVA99X,B00FR30D16,Her Best Match: A Clean Billionaire Romance (T...,saw book best romance book read like romance c...,5,0.55
7,A3MQ4814P5LBTG,B018PPWK0E,The Way Back To Me (The Way Book 1) - Kindle e...,cant find words days completing book still can...,5,0.544581
8,A2F5K1M79GW649,B01ACB72TS,Brothers Black: Wyatt the Heartbreaker - Kindl...,book book world great man like wyattthis book ...,5,0.540089
9,A3PYB2CTKK93MG,B00EV9LSJI,Tears of Tess (Monsters in the Dark Book 1) - ...,book one best dark romances ever read love gen...,5,0.538888


In [77]:
top_5

Unnamed: 0,reviewerID,asin,title,word_without_stop,overall,score
0,A1PIHRVCTCJQFL,B018RKHYMQ,The Stocking Was Hung (The Holidays #1) - Kind...,great christmas best years day picked book bes...,5,0.598764
1,A3CMIEYL0TJLC2,B01D9JB7DW,The Alien King&#39;s Baby: Sci-fi Alien Romanc...,best book wonderful time reading new author go...,5,0.574456
2,A1O2LQUYDVPOCJ,B00VPAHRLO,The Soccer Mom&#39;s Bad Boy - Kindle edition,wonderful always jordan silver best sexiest wr...,5,0.570323
3,A3T5H007CVA99X,B00FR30D16,Her Best Match: A Clean Billionaire Romance (T...,saw book best romance book read like romance c...,5,0.55
4,A3PYB2CTKK93MG,B00EV9LSJI,Tears of Tess (Monsters in the Dark Book 1) - ...,book one best dark romances ever read love gen...,5,0.538888
5,A3CMIEYL0TJLC2,B01FIGE8YE,Beauty and the Blitz,good book far ready read next one soon best book,4,0.53
6,A1L9WQBSQ5SEFE,B0066HBLUO,CAPTURING THE COWBOY&#39;S HEART - Kindle edition,loved book enjoyable read im going let go book...,5,0.521267
7,AZYERRDY2VW61,B00629ZTOU,Texas Secrets: The Gallaghers of Morning Star ...,love book super book,5,0.518545
8,A15EMPO8TCH2RB,B01D4RY4B6,You Don&#39;t Own Me: A Bad Boy Mafia Romance ...,book made cry laughed book great book get two ...,5,0.518545
9,A19LXU1TT72OKX,B01FRQ4Z8E,Dietrich (Bear Shifter Dating Agency Romance) ...,received arc honest review best book read writ...,5,0.518545


# rating count added

In [24]:
def get_rating_weight_with_quantity(rating, c, T, q):
        if rating > 5 or rating < 0:
            return None
        else:
            m = (2*q) / 5 #10 because rating varies between 0 and 10
            b = -q
            val = (m*rating) + b

            M = math.exp((-T*0.68)/c)

            return val * M

In [58]:
def get_recommendations_rat_count(df,keywords):

        score_dict = {}

        for index, row in df.iterrows():
            cs_score = cosine_similarity_of(row['word_without_stop'], keywords)

            rating = row['overall']
            rating_count = row['Book_Count']
            threshold = 50
            
            rating_contribution = get_rating_weight_with_quantity(rating,rating_count,threshold,10)

            final_score = calculate_final_score(cs_score, rating_contribution)

            score_dict[index] = final_score
            
        #sort cities by score and index.
        sorted_scores = sorted(score_dict.items(), key=operator.itemgetter(1), reverse=True)

        counter = 0

        #create an empty results data frame.
        resultDF = pd.DataFrame(columns=('reviewerID', 'asin','title','word_without_stop','overall', 'score'))

        #get highest scored 5 cities.
        for i in sorted_scores:
            #print index and score of the city.
            #print(i[0], i[1])
            resultDF = resultDF.append({'reviewerID': df.iloc[i[0]]['reviewerID'], 'asin': df.iloc[i[0]]['asin'],
                                        'title': df.iloc[i[0]]['title'],
                                        'word_without_stop': df.iloc[i[0]]['word_without_stop'],
                                        'overall': df.iloc[i[0]]['overall'],
                                        'score': i[1]}, ignore_index=True)
            counter += 1

            if counter>9:
                break

#         #convert DF to json.
#         json_result = json.dumps(resultDF.to_dict('records'))
        return resultDF

In [78]:
keywords = "best book ever"
top_5= get_recommendations_rat_count(df_full,keywords)
top_5

Unnamed: 0,reviewerID,asin,title,word_without_stop,overall,score
0,A3CMIEYL0TJLC2,B015QBP4XU,Werewolf Romance: Seduced By The Alpha Wolf Bo...,best book ever read looking great werewolf boo...,5,0.714278
1,A3CMIEYL0TJLC2,B01B52FWAY,THAT MAN TRILOGY: A Sexy Romantic Comedy - Kin...,finally get read book best book,5,0.659611
2,A1PIHRVCTCJQFL,B018RKHYMQ,The Stocking Was Hung (The Holidays #1) - Kind...,great christmas best years day picked book bes...,5,0.593178
3,A2EZZ9UTFNNKRE,B00ZO50HZO,Hot &amp; Bothered (A Hostile Operations Team ...,oh gosh best series ever loved book much rest ...,5,0.583206
4,A3CMIEYL0TJLC2,B01D9JB7DW,The Alien King&#39;s Baby: Sci-fi Alien Romanc...,best book wonderful time reading new author go...,5,0.565557
5,A1O2LQUYDVPOCJ,B00VPAHRLO,The Soccer Mom&#39;s Bad Boy - Kindle edition,wonderful always jordan silver best sexiest wr...,5,0.560052
6,A3T5H007CVA99X,B00FR30D16,Her Best Match: A Clean Billionaire Romance (T...,saw book best romance book read like romance c...,5,0.543871
7,A3PYB2CTKK93MG,B00EV9LSJI,Tears of Tess (Monsters in the Dark Book 1) - ...,book one best dark romances ever read love gen...,5,0.536694
8,A2F5K1M79GW649,B01ACB72TS,Brothers Black: Wyatt the Heartbreaker - Kindl...,book book world great man like wyattthis book ...,5,0.53432
9,A3MQ4814P5LBTG,B018PPWK0E,The Way Back To Me (The Way Book 1) - Kindle e...,cant find words days completing book still can...,5,0.533559


In [59]:
keywords = "best book ever"
top_5= get_recommendations_rat_count(df,keywords)
top_5

Unnamed: 0,reviewerID,asin,title,word_without_stop,overall,score
0,A1PIHRVCTCJQFL,B018RKHYMQ,The Stocking Was Hung (The Holidays #1) - Kind...,great christmas best years day picked book bes...,5,0.593178
1,A3CMIEYL0TJLC2,B01D9JB7DW,The Alien King&#39;s Baby: Sci-fi Alien Romanc...,best book wonderful time reading new author go...,5,0.565557
2,A1O2LQUYDVPOCJ,B00VPAHRLO,The Soccer Mom&#39;s Bad Boy - Kindle edition,wonderful always jordan silver best sexiest wr...,5,0.560052
3,A3T5H007CVA99X,B00FR30D16,Her Best Match: A Clean Billionaire Romance (T...,saw book best romance book read like romance c...,5,0.543871
4,A3PYB2CTKK93MG,B00EV9LSJI,Tears of Tess (Monsters in the Dark Book 1) - ...,book one best dark romances ever read love gen...,5,0.536694
5,A3CMIEYL0TJLC2,B01FIGE8YE,Beauty and the Blitz,good book far ready read next one soon best book,4,0.526426
6,A1L9WQBSQ5SEFE,B0066HBLUO,CAPTURING THE COWBOY&#39;S HEART - Kindle edition,loved book enjoyable read im going let go book...,5,0.51566
7,AZYERRDY2VW61,B00629ZTOU,Texas Secrets: The Gallaghers of Morning Star ...,love book super book,5,0.514811
8,A34QXHSIS1IFNS,B00HWKW7K2,Forty 2 Days (Billionaire Banker Series) - Kin...,book best needed great read get,5,0.510221
9,A15EMPO8TCH2RB,B01D4RY4B6,You Don&#39;t Own Me: A Bad Boy Mafia Romance ...,book made cry laughed book great book get two ...,5,0.509621


# Positive Effect

In [66]:
def get_recommendations_rat_count_pos(df,keywords):

        score_dict = {}

        for index, row in df.iterrows():
            cs_score = cosine_similarity_of(row['word_without_stop'], keywords)

            rating = row['overall']
            rating_count = row['Book_Count']
            
            if row['positive']==True:
                pos_dif = (7-rating)/2
                pos_rat = rating + pos_dif
            else:
                pos_dif = rating/2
                pos_rat = rating-pos_dif
            
            rating_contribution =get_rating_weight_with_quantity(pos_rat,rating_count,50,10)

            final_score = calculate_final_score(cs_score, rating_contribution)

            score_dict[index] = final_score
            
        #sort cities by score and index.
        sorted_scores = sorted(score_dict.items(), key=operator.itemgetter(1), reverse=True)

        counter = 0

        #create an empty results data frame.
        resultDF = pd.DataFrame(columns=('reviewerID', 'asin','title','word_without_stop','overall', 'score'))

        #get highest scored 5 cities.
        for i in sorted_scores:
            #print index and score of the city.
            #print(i[0], i[1])
            resultDF = resultDF.append({'reviewerID': df.iloc[i[0]]['reviewerID'], 'asin': df.iloc[i[0]]['asin'],
                                        'title': df.iloc[i[0]]['title'],
                                        'word_without_stop': df.iloc[i[0]]['word_without_stop'],
                                        'overall': df.iloc[i[0]]['overall'],
                                        'score': i[1]}, ignore_index=True)
            counter += 1

            if counter>9:
                break

#         #convert DF to json.
#         json_result = json.dumps(resultDF.to_dict('records'))
        return resultDF

In [67]:
df['positive'] = df['overall'].apply(lambda x: True if x>=3 else False)

In [80]:
df_full['positive'] = df_full['overall'].apply(lambda x: True if x>=3 else False)

In [81]:
df_full['negative'] = df_full['overall'].apply(lambda x: True if x<3 else False)

In [33]:
df['negative'] = df['overall'].apply(lambda x: True if x<3 else False)

In [82]:
keywords = "best book ever"
top_5= get_recommendations_rat_count_pos(df_full,keywords)
top_5

Unnamed: 0,reviewerID,asin,title,word_without_stop,overall,score
0,A3CMIEYL0TJLC2,B015QBP4XU,Werewolf Romance: Seduced By The Alpha Wolf Bo...,best book ever read looking great werewolf boo...,5,0.733323
1,A3CMIEYL0TJLC2,B01B52FWAY,THAT MAN TRILOGY: A Sexy Romantic Comedy - Kin...,finally get read book best book,5,0.678507
2,A1PIHRVCTCJQFL,B018RKHYMQ,The Stocking Was Hung (The Holidays #1) - Kind...,great christmas best years day picked book bes...,5,0.612717
3,A2EZZ9UTFNNKRE,B00ZO50HZO,Hot &amp; Bothered (A Hostile Operations Team ...,oh gosh best series ever loved book much rest ...,5,0.598755
4,A3CMIEYL0TJLC2,B01D9JB7DW,The Alien King&#39;s Baby: Sci-fi Alien Romanc...,best book wonderful time reading new author go...,5,0.582887
5,A1O2LQUYDVPOCJ,B00VPAHRLO,The Soccer Mom&#39;s Bad Boy - Kindle edition,wonderful always jordan silver best sexiest wr...,5,0.576683
6,A3T5H007CVA99X,B00FR30D16,Her Best Match: A Clean Billionaire Romance (T...,saw book best romance book read like romance c...,5,0.561419
7,A3PYB2CTKK93MG,B00EV9LSJI,Tears of Tess (Monsters in the Dark Book 1) - ...,book one best dark romances ever read love gen...,5,0.555412
8,A3CMIEYL0TJLC2,B01FIGE8YE,Beauty and the Blitz,good book far ready read next one soon best book,4,0.552851
9,A2F5K1M79GW649,B01ACB72TS,Brothers Black: Wyatt the Heartbreaker - Kindl...,book book world great man like wyattthis book ...,5,0.551652


In [68]:
keywords = "best book ever"
top_5= get_recommendations_rat_count_pos(df,keywords)
top_5

Unnamed: 0,reviewerID,asin,title,word_without_stop,overall,score
0,A1PIHRVCTCJQFL,B018RKHYMQ,The Stocking Was Hung (The Holidays #1) - Kind...,great christmas best years day picked book bes...,5,0.612717
1,A3CMIEYL0TJLC2,B01D9JB7DW,The Alien King&#39;s Baby: Sci-fi Alien Romanc...,best book wonderful time reading new author go...,5,0.582887
2,A1O2LQUYDVPOCJ,B00VPAHRLO,The Soccer Mom&#39;s Bad Boy - Kindle edition,wonderful always jordan silver best sexiest wr...,5,0.576683
3,A3T5H007CVA99X,B00FR30D16,Her Best Match: A Clean Billionaire Romance (T...,saw book best romance book read like romance c...,5,0.561419
4,A3PYB2CTKK93MG,B00EV9LSJI,Tears of Tess (Monsters in the Dark Book 1) - ...,book one best dark romances ever read love gen...,5,0.555412
5,A3CMIEYL0TJLC2,B01FIGE8YE,Beauty and the Blitz,good book far ready read next one soon best book,4,0.552851
6,A1L9WQBSQ5SEFE,B0066HBLUO,CAPTURING THE COWBOY&#39;S HEART - Kindle edition,loved book enjoyable read im going let go book...,5,0.532372
7,AZYERRDY2VW61,B00629ZTOU,Texas Secrets: The Gallaghers of Morning Star ...,love book super book,5,0.532174
8,A34QXHSIS1IFNS,B00HWKW7K2,Forty 2 Days (Billionaire Banker Series) - Kin...,book best needed great read get,5,0.525748
9,A15EMPO8TCH2RB,B01D4RY4B6,You Don&#39;t Own Me: A Bad Boy Mafia Romance ...,book made cry laughed book great book get two ...,5,0.524907
