In [1]:
import pandas as pd
import numpy as np
import re 
from fuzzywuzzy import fuzz
from scipy.spatial.distance import pdist, squareform

pd.set_option("display.max_columns", None)

## Streaming Service Recommender Model

#### Goals

- Test the streaming service recommender model created previously on 06.3_streaming_service_recommender_model notebook

### 1. Import data


In [2]:
genres_recommender = pd.read_pickle("Data_Hulu_Disney/genres_recommender_v2.pkl")

In [3]:
genres_recommender

genre,Action,Adventure,Animation,Biography,Comedy,Crime,Documentary,Drama,Family,Fantasy,Game-Show,History,Horror,Music,Musical,Mystery,News,Reality-TV,Romance,Sci-Fi,Short,Sport,Talk-Show,Thriller,War,Western
streaming_service,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
Amazon,0.1212,0.1228,0.1619,0.0191,0.2432,0.1252,0.2018,0.3349,0.1045,0.0574,0.0152,0.0606,0.0263,0.0096,0.0032,0.0582,0.0048,0.1021,0.055,0.0327,0.0144,0.0152,0.0072,0.0287,0.0112,0.0199
HBO,0.0769,0.0533,0.0473,0.0355,0.3905,0.1538,0.1006,0.5089,0.0414,0.0473,0.0059,0.0769,0.0118,0.0237,0.0059,0.0828,0.0296,0.0237,0.0769,0.0237,0.0,0.0533,0.0414,0.0355,0.0059,0.0
Netflix,0.1399,0.119,0.1675,0.0221,0.2736,0.1466,0.1718,0.3767,0.062,0.0571,0.0123,0.0387,0.0344,0.0178,0.0061,0.0571,0.0018,0.0883,0.0785,0.0344,0.0025,0.016,0.011,0.0479,0.0098,0.0031
Disney,0.3061,0.4286,0.4898,0.0,0.4422,0.0,0.1361,0.068,0.3469,0.034,0.0,0.0,0.0,0.0408,0.0136,0.0068,0.0,0.1156,0.0068,0.0408,0.0,0.0,0.0,0.0,0.0,0.0
Hulu,0.1607,0.1251,0.2187,0.0154,0.3536,0.1251,0.0804,0.3319,0.0727,0.0566,0.0524,0.0196,0.0294,0.0377,0.0063,0.0671,0.0098,0.167,0.0664,0.0342,0.0049,0.0147,0.0231,0.0349,0.0049,0.0049


In [4]:
all_shows = pd.read_pickle("Data_Hulu_Disney/all_shows_genres_v2.pkl")

### 2. Define function get_streaming_recommendation

In [5]:
def get_streaming_recommendation(genres_recommender, user_survey):
    
    recommendations = []
    
    user_recommender = genres_recommender.append(user_survey)
        
    distances = squareform(pdist(user_recommender, 'euclidean'))
    
    distances_df = pd.DataFrame((squareform(pdist(user_recommender, 'euclidean'))), 
                         index=user_recommender.index, columns=user_recommender.index)
    
    similar_streamings = list(distances_df['User'].sort_values()[1:].index)
    distances = list(distances_df['User'].sort_values()[1:].values)
    
    # we will increase the range in order to get the similarities for all 5 streaming services
    for i in range(5):
        recommendations.append(similar_streamings[i])
       
    # we will show the percentage of similarity, whcih is 1 minus the distance multiplied by a 100
    print("Your closest match is " + recommendations[0] + f" with {int(100-distances[0]*100)}% genre similarity.")
    print("--------------------------")
    print("Your next matches are:")
    print("2. " + recommendations[1] + f" with {int(100-distances[1]*100)}% genre similarity.")
    print("3. " + recommendations[2] + f" with {int(100-distances[2]*100)}% genre similarity.")
    print("4. " + recommendations[3] + f" with {int(100-distances[3]*100)}% genre similarity.")
    print("5. " + recommendations[4] + f" with {int(100-distances[4]*100)}% genre similarity.")
    
    return ("")


### 3. Test

In [6]:
user_netflix = pd.read_pickle("../Data/netflix_test.pkl")
user_amazon = pd.read_pickle("../Data/amazon_test.pkl")
user_hbo = pd.read_pickle("../Data/hbo_test.pkl")
user_hulu = pd.read_pickle("Data_Hulu_Disney/hulu_test.pkl")
user_disney = pd.read_pickle("Data_Hulu_Disney/disney_test.pkl")

In [7]:
get_streaming_recommendation(genres_recommender, user_netflix)

Your closest match is Netflix with 67% genre similarity.
--------------------------
Your next matches are:
2. HBO with 65% genre similarity.
3. Amazon with 63% genre similarity.
4. Hulu with 63% genre similarity.
5. Disney with 17% genre similarity.


''

In [8]:
get_streaming_recommendation(genres_recommender, user_amazon)

Your closest match is Amazon with 75% genre similarity.
--------------------------
Your next matches are:
2. Netflix with 73% genre similarity.
3. Hulu with 71% genre similarity.
4. HBO with 65% genre similarity.
5. Disney with 32% genre similarity.


''

In [9]:
get_streaming_recommendation(genres_recommender, user_hbo)

Your closest match is HBO with 60% genre similarity.
--------------------------
Your next matches are:
2. Netflix with 42% genre similarity.
3. Hulu with 37% genre similarity.
4. Amazon with 36% genre similarity.
5. Disney with -10% genre similarity.


''

In [10]:
get_streaming_recommendation(genres_recommender, user_hulu)

Your closest match is Hulu with 65% genre similarity.
--------------------------
Your next matches are:
2. HBO with 61% genre similarity.
3. Netflix with 57% genre similarity.
4. Amazon with 52% genre similarity.
5. Disney with 24% genre similarity.


''

In [11]:
get_streaming_recommendation(genres_recommender, user_disney)

Your closest match is Disney with 44% genre similarity.
--------------------------
Your next matches are:
2. Hulu with 17% genre similarity.
3. Amazon with 9% genre similarity.
4. Netflix with 8% genre similarity.
5. HBO with 0% genre similarity.


''

The survey worked, we can now create a module and notebook for the new data frames including Hulu and Disney+.