In [1]:
import pandas as pd
import numpy as np
import re
from fuzzywuzzy import fuzz
from scipy.spatial.distance import pdist, squareform

genres_recommender = pd.read_pickle("Data_Hulu_Disney/genres_recommender_v2.pkl")

url = "https://docs.google.com/spreadsheets/d/1VsL_x7WAcEm-pLNmCqNwrDlFjTR_PUDpiadyUYJM-AQ/export?format=csv&gid=1360685521"

survey_shows = pd.read_pickle("Data_Hulu_Disney/survey_shows.pkl")

# Updates

#### Goals

- Update the streaming service recommender functions in order to input a name, which will match the row of the same user and use this information to create the user data frame
- Have the final ouput print: Matches for {name}, in order to personalize it

## 1. Import data

In [2]:
survey_df = pd.read_csv(url)


## 2. Update function

In [3]:
def get_recommender_df(survey_df, df_all_shows):
    
    # we will add try except blocks in order to ask for the name in which the survey was filled out, in case
    # the name doesn't match any of the names on our survey data frame, it will ask again
    
    while True:
        
        name = input("Please enter your user name:")
        
        try:

            if name in survey_df["Name"].values:
                break;
            
            else:
                print("That user name does not match. Please try again.")
                
        except ValueError:
            print("That user name does not match. Please try again.")

    # we will now define the user_df as the row that matches the name entered and has the newest entry, we could have
    # repeated names
    user_df = survey_df.loc[np.where(survey_df["Name"] == name)]
    
    user_df["Timestamp"] = pd.to_datetime(user_df["Timestamp"])
    
    user_df = user_df.iloc[np.where(user_df["Timestamp"] == user_df["Timestamp"].max())]
    
    # next, we will drop the timestamps and name columns, since they will no longer be needed
    user_df = user_df.drop(columns=["Timestamp", "Name"])
   
    genre_columns = ['Crime', 'Drama', 'Thriller', 'Fantasy', 'Horror', 'Mystery', 'Comedy', 'Sci-Fi', 'Biography',
                 'Action', 'Adventure', 'Romance', 'History', 'Documentary', 'Animation', 'War', 'Sport',
                 'Family', 'Western', 'Short', 'Reality-TV', 'Musical', 'Music', 'Game-Show', 'Talk-Show', 'News']
        
    all_shows_lst = [show for show in df_all_shows["show"]]
    
    titles = []
        
    # we will select just the last element using the tail pandas method
    for show in user_df.values[0]:
        # remove the year and description
        title_pattern = r"(.*)\s\(\d{4}\).*$"
        title = re.findall(title_pattern, show)
        titles.append(title[0])

    matches = []

    for title in titles:
        for show in all_shows_lst:
            ratio = fuzz.ratio(title.lower(), show.lower())
            if ratio == 100:
                matches.append(show)
                matches = list(set(matches))
                              
    user_df = df_all_shows[df_all_shows["show"].isin(matches)].reset_index(drop=True)
    
    user_genre_ratio = (pd.DataFrame(user_df[genre_columns].sum()
                                    .sort_values(ascending=False))
                       .reset_index()
                       .rename(columns={"index":"genre", 0:"frequency"}))

    user_genre_ratio["ratio"] = round((user_genre_ratio["frequency"] / 30), 4)

    user_genre_ratio["user"] = "User"

    user_recommender = user_genre_ratio.pivot_table(index="user",
                                          columns="genre",
                                          values="ratio")
    
    # our output will now be the user_recommender df and the name, in order to use both of them on the next function
    return user_recommender, name

In [4]:
# we will now need to input the name
def get_streaming_recommendation(genres_recommender, user_survey, name):
    
    recommendations = []
    
    user_recommender = genres_recommender.append(user_survey)
           
    distances_df = pd.DataFrame((squareform(pdist(user_recommender, 'euclidean'))), 
                                 index=user_recommender.index, columns=user_recommender.index)
    
    similar_streamings = list(distances_df['User'].sort_values()[1:].index)
    distances = list(distances_df['User'].sort_values()[1:].values)
    
    # we will increase the range in order to get the similarities for all 5 streaming services
    for i in range(5):
        recommendations.append(similar_streamings[i])
       
    # we will show the percentage of similarity, which is 1 minus the distance multiplied by a 100
    print("\n")
    # we will return a string saying: Matches for {name}
    print(f"Matches for {name}")
    print("--------------------------")
    print("\n")
    print('\033[1m' + "Your closest match is " + recommendations[0] + f" with {int(1/(1+distances[0])*100)}% genre similarity." + '\033[0;0m')
    print("\n")
    print("Your next matches are:")
    print("2. " + recommendations[1] + f" with {int(1/(1+distances[1])*100)}% genre similarity.")
    print("3. " + recommendations[2] + f" with {int(1/(1+distances[2])*100)}% genre similarity.")
    print("4. " + recommendations[3] + f" with {int(1/(1+distances[3])*100)}% genre similarity.")
    print("5. " + recommendations[4] + f" with {int(1/(1+distances[4])*100)}% genre similarity.")
    
    return ("")

## 3. Test
We will first test the get_recommender_df function and then the get_streaming_recommendations function.

In [5]:
df, user = get_recommender_df(survey_df, survey_shows)

Please enter your user name:ilse


In [6]:
df

genre,Action,Adventure,Animation,Biography,Comedy,Crime,Documentary,Drama,Family,Fantasy,...,News,Reality-TV,Romance,Sci-Fi,Short,Sport,Talk-Show,Thriller,War,Western
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
User,0.4,0.4,0.3333,0.0,0.8333,0.0,0.2333,0.2333,0.5333,0.0333,...,0.0,0.3,0.0333,0.0667,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
user

'ilse'

In [8]:
get_streaming_recommendation(genres_recommender, df, user)



Matches for ilse
--------------------------


[1mYour closest match is Disney with 64% genre similarity.[0;0m


Your next matches are:
2. Hulu with 54% genre similarity.
3. Amazon with 52% genre similarity.
4. Netflix with 52% genre similarity.
5. HBO with 50% genre similarity.


''

It works, this updates will be added to the Streaming Service Recommender v2