In [1]:
import pandas as pd
import numpy as np

pd.set_option("display.max_columns", None)

## Streaming Service Recommender
#### Goals

- Build data frame to be used for recommender

We want our final data frame to llok like the following:

| streaming_service | Drama | Comedy | Animation | ... |
|-------------------|-------|--------|-----------|-----|
| **Netflix**       | 0.3765 | 0.2735 | 0.1680   | ....|
| **Amazon**        | 0.3349 | 0.2432 | 0.1619   | ... |
| ... | ... | ... |... | ...|

Our genres should be our columns and the streaming services should be the index.

### 1. Import data

In [2]:
genres_recommender = pd.read_pickle("../Data/genres_recommender.pkl")

In [4]:
hulu = pd.read_pickle("Data_Hulu_Disney/hulu_genres_ratio.pkl")

In [5]:
disney = pd.read_pickle("Data_Hulu_Disney/disney_genres_ratio.pkl")

### 2. Get list of total genres

We will first get the lists of genres for each streaming service, we will them add them together and afterwards convert it to a set to remove duplicates.

In [6]:
genres = genres_recommender.columns.to_list()

hulu_genres = hulu["genre"].to_list()

disney_genres = disney["genre"].to_list()

In [10]:
total_genres = list(set(genres + hulu_genres + disney_genres))


In [11]:
total_genres.sort()

In [12]:
len(total_genres)

26

In [13]:
total_genres

['Action',
 'Adventure',
 'Animation',
 'Biography',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Family',
 'Fantasy',
 'Game-Show',
 'History',
 'Horror',
 'Music',
 'Musical',
 'Mystery',
 'News',
 'Reality-TV',
 'Romance',
 'Sci-Fi',
 'Short',
 'Sport',
 'Talk-Show',
 'Thriller',
 'War',
 'Western']

### 3. Create new data frame

#### i. Prepare each data frame

In [14]:
hulu["streaming_service"] = "Hulu"

hulu_genres = hulu[["genre", "ratio", "streaming_service"]]

hulu_genres.head()

Unnamed: 0,genre,ratio,streaming_service
0,Comedy,0.3536,Hulu
1,Drama,0.3319,Hulu
2,Animation,0.2187,Hulu
3,Reality-TV,0.167,Hulu
4,Action,0.1607,Hulu


In [16]:
disney["streaming_service"] = "Disney"

disney_genres = disney[["genre", "ratio", "streaming_service"]]

disney_genres.head()

Unnamed: 0,genre,ratio,streaming_service
0,Animation,0.4898,Disney
1,Comedy,0.4422,Disney
2,Adventure,0.4286,Disney
3,Family,0.3469,Disney
4,Action,0.3061,Disney


#### ii. Append data frames

In [17]:
genres_features = hulu_genres.append(disney_genres).reset_index()

#### iii. Create pivot table

In [18]:
genres_recommender_2 = genres_features.pivot_table(index="streaming_service",
                                      columns="genre",
                                      values="ratio").fillna(0)

In [19]:
genres_recommender_2

genre,Action,Adventure,Animation,Biography,Comedy,Crime,Documentary,Drama,Family,Fantasy,Game-Show,History,Horror,Music,Musical,Mystery,News,Reality-TV,Romance,Sci-Fi,Short,Sport,Talk-Show,Thriller,War,Western
streaming_service,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
Disney,0.3061,0.4286,0.4898,0.0,0.4422,0.0,0.1361,0.068,0.3469,0.034,0.0,0.0,0.0,0.0408,0.0136,0.0068,0.0,0.1156,0.0068,0.0408,0.0,0.0,0.0,0.0,0.0,0.0
Hulu,0.1607,0.1251,0.2187,0.0154,0.3536,0.1251,0.0804,0.3319,0.0727,0.0566,0.0524,0.0196,0.0294,0.0377,0.0063,0.0671,0.0098,0.167,0.0664,0.0342,0.0049,0.0147,0.0231,0.0349,0.0049,0.0049


We will now append genres_recommender_2 to th genres_recommender data frame, which has Netflix, Amazon and HBO's genre information.

In [21]:
genres_recommender = genres_recommender.append(genres_recommender_2)

In [22]:
genres_recommender

genre,Action,Adventure,Animation,Biography,Comedy,Crime,Documentary,Drama,Family,Fantasy,Game-Show,History,Horror,Music,Musical,Mystery,News,Reality-TV,Romance,Sci-Fi,Short,Sport,Talk-Show,Thriller,War,Western
streaming_service,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
Amazon,0.1212,0.1228,0.1619,0.0191,0.2432,0.1252,0.2018,0.3349,0.1045,0.0574,0.0152,0.0606,0.0263,0.0096,0.0032,0.0582,0.0048,0.1021,0.055,0.0327,0.0144,0.0152,0.0072,0.0287,0.0112,0.0199
HBO,0.0769,0.0533,0.0473,0.0355,0.3905,0.1538,0.1006,0.5089,0.0414,0.0473,0.0059,0.0769,0.0118,0.0237,0.0059,0.0828,0.0296,0.0237,0.0769,0.0237,0.0,0.0533,0.0414,0.0355,0.0059,0.0
Netflix,0.1399,0.119,0.1675,0.0221,0.2736,0.1466,0.1718,0.3767,0.062,0.0571,0.0123,0.0387,0.0344,0.0178,0.0061,0.0571,0.0018,0.0883,0.0785,0.0344,0.0025,0.016,0.011,0.0479,0.0098,0.0031
Disney,0.3061,0.4286,0.4898,0.0,0.4422,0.0,0.1361,0.068,0.3469,0.034,0.0,0.0,0.0,0.0408,0.0136,0.0068,0.0,0.1156,0.0068,0.0408,0.0,0.0,0.0,0.0,0.0,0.0
Hulu,0.1607,0.1251,0.2187,0.0154,0.3536,0.1251,0.0804,0.3319,0.0727,0.0566,0.0524,0.0196,0.0294,0.0377,0.0063,0.0671,0.0098,0.167,0.0664,0.0342,0.0049,0.0147,0.0231,0.0349,0.0049,0.0049


### 4. Export final data frame

In [23]:
# genres_recommender.to_pickle("Data_Hulu_Disney/genres_recommender_v2.pkl")

In [3]:
import pandas as pd
df = pd.read_pickle("../Data/netflix_genres.pkl")

In [4]:
df[df["Horror"] == 1]

Unnamed: 0,show,genres,Crime,Drama,Thriller,Fantasy,Horror,Mystery,Comedy,Sci-Fi,...,Sport,Family,Western,Short,Reality-TV,Musical,Music,Game-Show,Talk-Show,News
1,Stranger Things,"Drama,Fantasy,Horror",0,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,The Walking Dead,"Drama,Horror,Thriller",0,1,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Supernatural,"Drama,Fantasy,Horror",0,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
30,The Vampire Diaries,"Drama,Fantasy,Horror",0,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
37,Penny Dreadful,"Drama,Fantasy,Horror",0,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
44,The Twilight Zone,"Drama,Fantasy,Horror",0,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
53,Kingdom,"Action,Drama,Horror",0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
64,Bates Motel,"Drama,Horror,Mystery",0,1,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
77,American Horror Story,"Drama,Horror,Thriller",0,1,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
88,Chilling Adventures of Sabrina,"Drama,Fantasy,Horror",0,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
