# Building a Netflix recommender system leveraging node2vec

Notebook Structure

1. Import necessary libraries

2. Import required dataset and perform baseline EDA

3. Model Development

4. Defining the recommender system and testing it

# 1. Import necessary libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
! pip install node2vec

Collecting node2vec
  Downloading node2vec-0.5.0-py3-none-any.whl.metadata (849 bytes)
Downloading node2vec-0.5.0-py3-none-any.whl (7.2 kB)
Installing collected packages: node2vec
Successfully installed node2vec-0.5.0


In [None]:
import networkx as nx  # create and store graph
from node2vec import Node2Vec  # To run node2vec algorithm

# 2. Import required dataset and perform baseline EDA

In [None]:
df_node2vec = pd.read_csv('netflix_titles.csv')

In [None]:
df_node2vec.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [None]:
df_node2vec.shape

(8807, 12)

In [None]:
df_node2vec.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   object
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description   8807 non-null   object
dtypes: int64(1), object(11)
memory usage: 825.8+ KB


In [None]:
df_node2vec.isnull().sum()

Unnamed: 0,0
show_id,0
type,0
title,0
director,2634
cast,825
country,831
date_added,10
release_year,0
rating,4
duration,3


In [None]:
df_node2vec = df_node2vec.dropna()

In [None]:
df_node2vec.shape

(5332, 12)

In [None]:
df_node2vec[df_node2vec.title == "Dark"]

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description


In [None]:
df_node2vec.isnull().sum()

Unnamed: 0,0
show_id,0
type,0
title,0
director,0
cast,0
country,0
date_added,0
release_year,0
rating,0
duration,0


In [None]:
df_node2vec.head(2)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
8,s9,TV Show,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",United Kingdom,"September 24, 2021",2021,TV-14,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...


In [None]:
df_node2vec.drop(
    columns=[
        "director",
        "cast",
        "country",
        "date_added",
        "release_year",
        "rating",
        "duration",
        "type",
    ],
    inplace=True,
)

In [None]:
df_node2vec.head(2)

Unnamed: 0,show_id,title,listed_in,description
7,s8,Sankofa,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
8,s9,The Great British Baking Show,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...


# 3. Model Development

In [None]:
# function that will create edges for given movie title and its genres
def addToGraph(movie_name, graph):
    genres = (
        df_node2vec[df_node2vec["title"] == movie_name]["listed_in"]
        .values[0]
        .rstrip()
        .lower()
        .split(", ")
    )
    for genre in genres:
        graph.add_edge(movie_name.strip(), genre)
    return graph


# function that will create graph for all the movies name
def createGraph():
    graph = nx.Graph()
    for movie_name in df_node2vec["title"]:
        graph = addToGraph(movie_name, graph)
    return graph

In [None]:
graph = createGraph()

In [None]:
df_node2vec.head()

Unnamed: 0,show_id,title,listed_in,description
7,s8,Sankofa,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
8,s9,The Great British Baking Show,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...
9,s10,The Starling,"Comedies, Dramas",A woman adjusting to life after a loss contend...
12,s13,Je Suis Karl,"Dramas, International Movies",After most of her family is murdered in a terr...
24,s25,Jeans,"Comedies, International Movies, Romantic Movies",When the father of the man she loves insists t...


In [None]:
# should be 2 since two genres are associated with it
print(graph.degree()["Norm of the North: King Sized Adventure"])
# should be 1 since 1 genres are associated with it
print(graph.degree()["#realityhigh"])

2
1


In [None]:
df_node2vec[df_node2vec.title == "Norm of the North: King Sized Adventure"]

Unnamed: 0,show_id,title,listed_in,description
7596,s7597,Norm of the North: King Sized Adventure,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...


In [None]:
df_node2vec[df_node2vec.title == "#realityhigh"]

Unnamed: 0,show_id,title,listed_in,description
5277,s5278,#realityhigh,Comedies,When nerdy high schooler Dani finally attracts...


In [None]:
node2vec = Node2Vec(graph, dimensions=20, walk_length=16, num_walks=10)

Computing transition probabilities:   0%|          | 0/5373 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [01:04<00:00,  6.40s/it]


In [None]:
model = node2vec.fit(window=5, min_count=1)

In [None]:
model.wv.get_vector("The Conjuring")

array([-0.39057815, -0.05282983, -0.25896534,  0.5573635 , -0.27500632,
        0.04780074, -0.31432876,  0.96265554, -1.0825372 ,  0.6146729 ,
        0.6508771 , -0.50286454,  0.7377844 ,  0.29231125,  0.64863324,
        0.85311836,  0.96137565,  0.77027947, -0.6244784 , -0.19032896],
      dtype=float32)

In [None]:
len(model.wv.get_vector("The Conjuring"))

20

In [None]:
model.wv.get_vector("Insidious")

array([-0.45415723, -0.12105472, -0.10463304,  0.5731137 , -0.26859346,
        0.39746314, -0.4285761 ,  0.9007071 , -1.055953  ,  0.69204986,
        0.6272208 , -0.49661463,  0.40043145,  0.3004164 ,  0.7750714 ,
        0.818114  ,  0.9774349 ,  0.68617076, -0.61918545, -0.11619588],
      dtype=float32)

In [None]:
model.wv.get_vector("Jeans")

array([-0.3293333 ,  0.484418  , -0.35894462, -0.29532745, -0.23164304,
        0.18015042, -0.08787249,  0.93689215,  0.22192018,  0.3697792 ,
        0.35850728, -0.7676775 ,  0.17584178, -0.39529833, -0.0882451 ,
        0.21252935,  0.583652  , -0.30674177,  0.14374952,  0.35147154],
      dtype=float32)

# 4. Defining the recommender system and testing it

In [None]:
# generate similar movies to given genre or title
def node2vec_recommender(name):
    for node, _ in model.wv.most_similar(name):
        print(node)

### Example 1

In [None]:
node2vec_recommender("Insidious")

The Detained
The Devil Inside
The Ring
Shelter
The Vatican Tapes
The Haunting of Molly Hartley
Population 436
Clinical
House at the End of the Street
The Charnel House


In [None]:
movies_to_check = ['Insidious' , 'The Detained' , 'The Devil Inside' , 'The Ring' , 'Shelter' , 'The Vatican Tapes' , 'The Haunting of Molly Hartley' , 'Population 436' , 'Clinical',
                      'House at the End of the Street' , 'The Charnel House']
df_node2vec[df_node2vec.title.isin(movies_to_check)]

Unnamed: 0,show_id,title,listed_in,description
804,s805,Population 436,"Horror Movies, Thrillers",A census taker sent to investigate why Rockwel...
1118,s1119,Insidious,"Horror Movies, Thrillers","A family moves into a new home, where their so..."
5637,s5638,Clinical,"Horror Movies, Thrillers",A psychiatrist is plagued by flashbacks to a t...
7009,s7010,House at the End of the Street,"Horror Movies, Thrillers",Moving to a new town proves even more stressfu...
8001,s8002,Shelter,"Horror Movies, Thrillers",A forensic psychiatrist has to think outside t...
8242,s8243,The Charnel House,"Horror Movies, Thrillers",After an architect transforms a slaughterhouse...
8274,s8275,The Detained,"Horror Movies, Thrillers",Five high school students serving Saturday det...
8276,s8277,The Devil Inside,"Horror Movies, Thrillers","At once spooky and grisly, this thriller follo..."
8338,s8339,The Haunting of Molly Hartley,"Horror Movies, Thrillers",A troubled teenage girl hopes to start fresh a...
8486,s8487,The Ring,"Horror Movies, Thrillers",A reporter watches a video that supposedly set...


Good recommendation :
"Insidious" is from the genre (Horror Movies, Thrillers) , the recommended movies are also from the same genre


### Example 2

In [None]:
node2vec_recommender("The Conjuring")

Population 436
Mara
Unfriended
In the Tall Grass
The Charnel House
The Ring
Insidious
Knock Knock
The Vatican Tapes
Havenhurst


In [None]:
movies_to_check = ['The Conjuring',
'Population 436',
'Mara',
'Unfriended',
'In the Tall Grass',
'The Charnel House',
'The Ring',
'Insidious',
'Knock Knock',
'The Vatican Tapes',
'Havenhurst']
df_node2vec[df_node2vec.title.isin(movies_to_check)]

Unnamed: 0,show_id,title,listed_in,description
804,s805,Population 436,"Horror Movies, Thrillers",A census taker sent to investigate why Rockwel...
1118,s1119,Insidious,"Horror Movies, Thrillers","A family moves into a new home, where their so..."
1283,s1284,The Conjuring,"Horror Movies, Thrillers",When a family starts experiencing supernatural...
1843,s1844,Unfriended,"Horror Movies, Thrillers",Six teens whose cyberbullying drove their clas...
3450,s3451,In the Tall Grass,"Horror Movies, Thrillers","After hearing a boy's cry for help, a pregnant..."
6942,s6943,Havenhurst,"Horror Movies, Thrillers",An alcoholic with a tragic past rents an apart...
7218,s7219,Knock Knock,"Horror Movies, Thrillers",A devoted husband and father on his own for th...
7402,s7403,Mara,"Horror Movies, Thrillers",When criminal psychologist Kate Fuller investi...
8242,s8243,The Charnel House,"Horror Movies, Thrillers",After an architect transforms a slaughterhouse...
8486,s8487,The Ring,"Horror Movies, Thrillers",A reporter watches a video that supposedly set...


Good recommendation :
"The Conjuring" is from the genre (Horror Movies, Thrillers) , the recommended movies are also from the same genre

### Example 3

In [None]:
node2vec_recommender("Jeans")

Romantik Komedi 2: Bekarlığa Veda
Bewildered Bolbol
You're My Boss
10 Days in Sun City
Love, Rosie
It's Her Day
Eyyvah Eyyvah 2
Don't Go Breaking My Heart 2
Disconnect
Man Up


In [None]:
movies_to_check = ['Jeans',
'Romantik Komedi 2: Bekarlığa Veda',
'Bewildered Bolbol',
"You're My Boss",
'10 Days in Sun City',
'Love, Rosie',
"It's Her Day",
'Eyyvah Eyyvah 2',
"Don't Go Breaking My Heart 2",
'Disconnect',
'Man Up']
df_node2vec[df_node2vec.title.isin(movies_to_check)]

Unnamed: 0,show_id,title,listed_in,description
24,s25,Jeans,"Comedies, International Movies, Romantic Movies",When the father of the man she loves insists t...
1216,s1217,Romantik Komedi 2: Bekarlığa Veda,"Comedies, International Movies, Romantic Movies",Didem tries everything to get actor Cem to pro...
1836,s1837,Disconnect,"Comedies, International Movies, Romantic Movies",A group of singletons stumbles through the wil...
2049,s2050,You're My Boss,"Comedies, International Movies, Romantic Movies","Pong, a corporate assistant, poses as his comp..."
2195,s2196,It's Her Day,"Comedies, International Movies, Romantic Movies",After a man promises his fiancé a dream weddin...
3392,s3393,10 Days in Sun City,"Comedies, International Movies, Romantic Movies",After his girlfriend wins the Miss Nigeria pag...
4338,s4339,Don't Go Breaking My Heart 2,"Comedies, International Movies, Romantic Movies","Zixin is about to marry Qihong, but her galliv..."
6290,s6291,Bewildered Bolbol,"Comedies, International Movies, Romantic Movies",A man suffering from amnesia can't seem to cho...
6726,s6727,Eyyvah Eyyvah 2,"Comedies, International Movies, Romantic Movies","Hüseyin has found his father, and now has one ..."
7354,s7355,"Love, Rosie","Comedies, International Movies, Romantic Movies","Over the years, as they come and go in each ot..."


Good recommendation :
"Jeans" is from the genre (Comedies, International Movies, Romantic Movies) , the recommended movies are also from the same genre

### Example 4

In [None]:
node2vec_recommender("Sankofa")

Time Share
A Heavy Heart
El silencio es bienvenido
La diosa del asfalto
Ajeeb Daastaans
Plaire, aimer et courir vite
Jal
Manu
Komola Rocket
18 Presents


In [None]:
movies_to_check = ['Sankofa',
'Time Share',
'A Heavy Heart',
'El silencio es bienvenido',
'La diosa del asfalto',
'Ajeeb Daastaans',
'Plaire, aimer et courir vite',
'Jal',
'Manu',
'Komola Rocket',
'18 Presents']
df_node2vec[df_node2vec.title.isin(movies_to_check)]

Unnamed: 0,show_id,title,listed_in,description
7,s8,Sankofa,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
282,s283,La diosa del asfalto,"Dramas, Independent Movies, International Movies",A woman from a tough neighborhood outside Mexi...
1023,s1024,Ajeeb Daastaans,"Dramas, Independent Movies, International Movies",Four shorts explore the surprising ways in whi...
2550,s2551,18 Presents,"Dramas, Independent Movies, International Movies",A pregnant mother with terminal cancer leaves ...
2789,s2790,El silencio es bienvenido,"Dramas, Independent Movies, International Movies",As a teen languishes during a spiritless road ...
3631,s3632,Manu,"Dramas, Independent Movies, International Movies",The relationship between a painter and his adm...
4210,s4211,Komola Rocket,"Dramas, Independent Movies, International Movies",As the lives of rich and poor passengers aboar...
4361,s4362,Time Share,"Dramas, Independent Movies, International Movies",Husband and father Pedro becomes increasingly ...
5600,s5601,A Heavy Heart,"Dramas, Independent Movies, International Movies","Long past his heyday, a still-virile boxer tri..."
7117,s7118,Jal,"Dramas, Independent Movies, International Movies",A gifted young man from a drought-stricken vil...


Good recommendation :
"Sankofa" is from the genre (Dramas, Independent Movies, International Movies) , the recommended movies are also from the same genre