### <span style='color:purple'>Cleaning the Dataframe</span>

In [2]:
import pandas as pd
df = pd.read_csv("./ready_files/random_500_reviews_df.csv")
df.drop(["designer", "perfume_group", "main_accords", "all_notes"], axis = 1, inplace = True)

df['perfume_name'] = df['perfume_name'].map(lambda x: x.strip())

df.rename(columns = {'review_test' : 'review', 
                     'customer-id' : 'customer_id'}, inplace = True)

# shuffling the dataframe, keeping it at its origianl size
df = df.sample(frac = 1)

# cleaning the reviews text
df['review'] = df['review'].map(lambda x: x[20:].strip().replace('\n',''))


### <span style='color:purple'>Vader Sentiment analysis, to create sentiment column </span> 
Resutls are between -1 to 1, similar to TextBlob, with 0 being neutral. Vader performs better than TextBlob. It is designed to anlayse text from social media. <br /> 
[About Vader, and Using it](https://medium.com/analytics-vidhya/simplifying-social-media-sentiment-analysis-using-vader-in-python-f9e6ec6fc52f)

In [3]:
# !pip install vaderSentiment

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()

Vader_sentiment = []
for v in df['review']:
    Vader_sentiment.append(analyser.polarity_scores(v)['compound']) # to get only the compound score
    
df['vader_sentiment'] = Vader_sentiment # sentiment is a float between -1 and 1, 0 is neutral


### <span style='color:purple'>Creating A User Based Recommender System</span> Based on cosine similarity

In [4]:
def user_based_recommender_df(df):
    """
    Returns a data frame of cosine similarity values of the users. 
    
    Parameters: 
    -----------
    df: 
        the data frame of the users' ids, the review text, the perfume name, and the sentiment analysis. 
        df columns should be names this: "customer_id", "review", "perfume_name", and "vader_sentiment"
    """
    
    df = df.drop('review', axis = 1) # we don't need the review column any more

    pivot_user = df.pivot_table(index = 'customer_id', columns = 'perfume_name', values = 'vader_sentiment')

    from scipy import sparse
    pivot_user_sparse = sparse.csr_matrix(pivot_user.fillna(0))

    from sklearn.metrics.pairwise import cosine_similarity
    recommender_user = cosine_similarity(pivot_user_sparse) # outputs dense matrix by default. 

    recommender_user_df = pd.DataFrame(recommender_user, 
                                 index = pivot_user.index, 
                                 columns = pivot_user.index)
    return recommender_user_df



In [5]:
# WHICH PERFUMES WE SHOULD RECOMMEND TO THE NEW/ENETERED USER
#-------------------------------------------------------------
def recommend_perfumes(q): 
    """
    Returns a list of perfumes names, user q has not yet tried, and a similar user already liked.
    
    Parameters:
    -----------
    q:
        User ID from available ones. See df['customer_id'] for available values
    """
    
    # find most similar user
    most_similar_user = user_based_recommender_df(df)[q].sort_values(ascending = False).index[1]
    
    # find the list of perfume_names reviewed by a the most_similar_user that he/she liked! (positive sentiment)
    reviewed_by_similar = list(df.loc[(df['customer_id'] == most_similar_user) & (df['vader_sentiment'] > 0)
       , ['perfume_name', 'customer_id']]['perfume_name'].values)
    
    # find list of perfumes reviewed by q, our "new" user
    q_reviewed = list(df.loc[df['customer_id'] == q
       , ['perfume_name', 'customer_id']]['perfume_name'].values)
    
    # find perfumes q should try
    return set(reviewed_by_similar).difference(set(q_reviewed)) # recommend these to "new" user



### <span style='color:purple'>Creating An Item Based Recommender System</span>

In [6]:
def item_based_recommender_df(df):
    """
    Returns a data frame of cosine similarity values of the items. 
    
    Parameters: 
    -----------
    df: 
        the data frame of the users' ids, the review text, the perfume name, and the sentiment analysis. 
        df columns should be names this: "customer_id", "review", "perfume_name", and "vader_sentiment"
    """
    
    
    # Step 1: create the pivot table
    # Must drop irralevant columns first! No need for the actual review text, we have the rating now

    # df = df.drop(['review'], axis = 1) # uncomment if this cell ran first or on its own

    pivot = df.pivot_table(index = 'perfume_name', columns ='customer_id' , values = 'vader_sentiment')

    # Step 2: turn it into a sparse matrix
    from scipy import sparse

    pivot_sparse = sparse.csr_matrix(pivot.fillna(0))

    # Step 3: creating the recommender
    from sklearn.metrics.pairwise import cosine_similarity

    recommender = cosine_similarity(pivot_sparse) # outputs a dense matrix by defalt.

    # Step 4: to visualize and search on
    recommender_df = pd.DataFrame(recommender, 
                                  index=pivot.index, 
                                  columns=pivot.index)
    return recommender_df



In [7]:
# SEARCHING ON THE ITEM-BASED RECOMMENDER
#------------------------------------------
def similar_perfumes(liked_perfume):
    """
    Returns a list of most similar 20 perfumes.
    
    Parameters:
    -----------
    liked_perfume:
        Name of the perfume you want to find similar perfumes to. MUST BE ENTERED EXACTLY AS IT IS, as a string.
        For an idea what to enter, type df['perfume_name']
    """
    return item_based_recommender_df(df)[liked_perfume].sort_values(ascending = False)[1:21]


### <span style='color:purple'> The Ready-to-use Function, combining everything </span>

In [8]:
# THE ONE EASY-TO-USE FUNCTION NEEDED NOW
#-----------------------------------------
def for_biggest_reviewers(user_id):
    """
    This function combines everything together. It first tries a user-based recommender to print out perfumes the 
    most similar user liked. If that is an empty set, it prints out similar perfumes to the top three liked by this 
    entered user. 
    
    Parameters:
    -----------
    user_id: 
        Pick from the user_id columns for users in the system. Type df['customer_id'] to see possible ones.
    """
    
    recommendations = len(recommend_perfumes(user_id))
    
    if recommendations != 0:
        print(recommend_perfumes(user_id))
    
    elif recommendations == 0:
        # get the top 3 liked perfume names
        top_liked = list(df.loc[df['customer_id']== user_id , :].sort_values(by = 'vader_sentiment', 
                                                           ascending = False)[:3]['perfume_name'].values)

        # run the item-based recommender, only , for each top liked perfume of the three
        for k in top_liked:
            print(similar_perfumes(k).head(3))

***

#### <span style='color:purple'> Usage Examples </span>

In [9]:
for_biggest_reviewers(1020788)

{'Cuir Amethyste Giorgio Armani for women and men'}


In [10]:
for_biggest_reviewers(28013) 

{'Allure Homme Edition Blanche Chanel for men', 'Pi Neo Givenchy for men', 'Dior Homme Intense 2007 Christian Dior for men'}


In [11]:
# generate a random user id, and run recommendations for her/him
random_user = df['customer_id'].sample(n=1).values[0]
print(random_user)
for_biggest_reviewers(random_user)

66262
perfume_name
L'Eau d'Issey Summer 2018 Issey Miyake for women           0.194002
Eau de Parfum With Holiday Decor Issey Miyake for women    0.173453
212 Ice Carolina Herrera for women                         0.135198
Name: Hypnose Hypnotizing Elixir Lancome for women, dtype: float64


In [12]:
# generate a random user id, and run recommendations for her/him
random_user = df['customer_id'].sample(n=1).values[0]
print(random_user)
for_biggest_reviewers(random_user)

7262
{'Amouage Epic Woman Amouage for women', 'Acqua di Gioia Essenza Giorgio Armani for women', 'Hermessence Santal Massoïa Hermès for women and men', "Safran Troublant L'Artisan Parfumeur for women and men", 'Cuir Amethyste Giorgio Armani for women and men', "Guerlain Chant d'Aromes Guerlain for women", 'Vanisia Creed for women', 'Ombre de Hyacinth Tom Ford for women and men', 'Reflection Woman Amouage for women', 'Aqua Allegoria Herba Fresca Guerlain for women and men', 'Flowerbomb La Vie en Rose 2011 Viktor&Rolf for women', 'Jonquille de Nuit Tom Ford for women and men', "L'eau d'Issey Eau de Parfum Issey Miyake for women", 'Woman Donna Karan for women', 'Pure DKNY Verbena Donna Karan for women', "Infusion d'Iris Eau de Parfum Absolue Prada for women", 'So Elixir Purple Eau de Parfum Yves Rocher for women', 'By Dolce&Gabbana for women', 'Amazone (1974) Hermès for women'}


In [13]:
# generate a random user id, and run recommendations for her/him
random_user = df['customer_id'].sample(n=1).values[0]
print(random_user)
for_biggest_reviewers(random_user)

24302
perfume_name
Champ de Fleurs L'Artisan Parfumeur for women and men    0.133438
Must de Cartier Clair De Jasmin Cartier for women        0.098301
No4 Fleurs d'Oranger Prada for women and men             0.097034
Name: De Profundis Serge Lutens for women and men, dtype: float64
perfume_name
Versense Versace for women             0.094061
Euphoria Calvin Klein for women        0.073945
Diorissimo Christian Dior for women    0.070983
Name: Weekend for Women Burberry for women, dtype: float64
perfume_name
Cuir Amethyste Giorgio Armani for women and men        0.171475
Amber & Patchouli Jo Malone London for men             0.165426
Riviera Dream - Lime Ralph Lauren for women and men    0.165426
Name: Fleur de Chine Tom Ford for women and men, dtype: float64
