# Import

In [19]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [20]:
import pandas as pd
import numpy as np

filepath = '/content/drive/My Drive/Projects/Project Netflix/netflix_titles.csv'

df = pd.read_csv(filepath)

In [21]:
#Erzeugung eines Datensatzes mit 20% der Daten
df = df.sample(int(df.shape[0]*0.2))

df.head(7)


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
5196,70295170,Movie,Queen of the Desert,Werner Herzog,"Nicole Kidman, James Franco, Damian Lewis, Jay...","United States, Morocco","April 6, 2019",2015,PG-13,128 min,Dramas,"Driven beyond the bounds of convention, advent..."
4743,81087761,Movie,Roll Red Roll,Nancy Schwartzman,,United States,"August 8, 2019",2018,TV-MA,80 min,Documentaries,This compelling documentary follows the 2012 S...
3607,80016588,Movie,Chappie,Neill Blomkamp,"Sharlto Copley, Hugh Jackman, Sigourney Weaver...","South Africa, United States","January 1, 2019",2015,R,121 min,"Action & Adventure, Sci-Fi & Fantasy",In a futuristic society where a robot police f...
5852,80044950,TV Show,The OA,,"Brit Marling, Jason Isaacs, Emory Cohen, Scott...",United States,"March 22, 2019",2019,TV-MA,2 Seasons,"TV Dramas, TV Mysteries, TV Sci-Fi & Fantasy","Seven years after vanishing from her home, a y..."
984,80038288,Movie,Umrika,Prashant Nair,"Suraj Sharma, Tony Revolori, Smita Tambe, Adil...",India,"October 1, 2016",2015,TV-14,96 min,"Dramas, Independent Movies, International Movies",An ambitious man leaves his small Indian villa...
3495,81184681,TV Show,Chhota Bheem Kung Fu Dhamaka Series,,"Pinky Pal Rajput, Sonal Kaushal, Julie Tejwani...",,"January 1, 2020",2019,TV-Y7,1 Season,Kids' TV,"From kung fu battles to run-ins with bandits, ..."
5802,80020540,TV Show,Richie Rich,,"Jake Brennan, Joshua Carlon, Lauren Taylor, Je...",United States,"May 22, 2015",2015,TV-G,2 Seasons,"Kids' TV, TV Comedies","After turning his veggies into green energy, R..."


In [22]:
#Splitten der Spalte genres in einzelne Genres
df['temp_genre'] = df['listed_in'].apply(lambda genres: genres.split(',')[0])
df['temp_genre']

5196                Dramas
4743         Documentaries
3607    Action & Adventure
5852             TV Dramas
984                 Dramas
               ...        
5703              Kids' TV
5486      British TV Shows
226               Comedies
6071             TV Dramas
1451              Comedies
Name: temp_genre, Length: 1246, dtype: object

#Input of selected movie and number of recommendations

In [50]:
selected_movie = 'Queen of the Desert'  #Input movie title
num_rec = 10 #Input number of recommendations

In [51]:
df1 = pd.DataFrame( np.concatenate( (df[['title', 'temp_genre']].values, df[['title', 'rating']].values), axis=0 ) )
df1.columns = ['title', 'value']
df1

Unnamed: 0,title,value
0,Queen of the Desert,Dramas
1,Roll Red Roll,Documentaries
2,Chappie,Action & Adventure
3,The OA,TV Dramas
4,Umrika,Dramas
...,...,...
2487,Noddy Toyland Detective,TV-Y
2488,Secrets of Great British Castles,TV-PG
2489,Arisan 2,TV-14
2490,Greenleaf,TV-14


In [52]:
df['title'].unique().shape[0] * df['temp_genre'].unique().shape[0] * df['rating'].unique().shape[0]

573804

In [53]:
# Graphen erstellen
import networkx as nx
G = nx.from_pandas_edgelist(df1, source='title', target='value')

In [54]:
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 1289
Number of edges: 2491
Average degree:   3.8650


In [55]:
import matplotlib.pyplot as plt

#plt.figure(figsize=(20, 20))
#nx.draw_networkx(G, with_labels=False, alpha=0.5, node_size=50

In [56]:
[n for n in G.neighbors(selected_movie)]

['Dramas', 'PG-13']

In [57]:
i = 0
for neighbor in G.neighbors(selected_movie):
  print(neighbor)
  i += 1
  if i == 5:
    break

Dramas
PG-13


In [58]:
G.number_of_edges()

2491

#Jaccard coeffizient

In [76]:
# Ermittlung Jaccard Coeffizient
jacc_gen = nx.jaccard_coefficient(G) #Generator für Jaccard Koeffizient

In [77]:
jacc_gen

<generator object _apply_prediction.<locals>.<genexpr> at 0x7fc454427fc0>

In [78]:
jacc_dict = {}
for u,v,p in jacc_gen:
  jacc_dict[(u, v)] = p

In [79]:
for key in jacc_dict:
  print(key, jacc_dict[key])
  break

('The Yard', 'Woodstock') 0.3333333333333333


#Adam - Adar Coeffizient

In [80]:
# Ermittlung Adamic Adar Coeffizient
adam_ad = nx.adamic_adar_index(G)  #Generator für Adamic_Adar_Index

In [81]:
adam_ad_dict = {}
for u,v,p in adam_ad:
  adam_ad_dict[(u, v)] = p

In [82]:
for key in adam_ad_dict:
  print(key, adam_ad_dict[key])
  break

('The Yard', 'Woodstock') 0.16649037667746924


In [83]:
results_adam = {}
for key in adam_ad_dict:
  if selected_movie in key:
    results_adam[key] = adam_ad_dict[key]

In [95]:
results_adam

{('The Yard', 'Queen of the Desert'): 0,
 ('Docuseries', 'Queen of the Desert'): 0,
 ('The Ultimatum', 'Queen of the Desert'): 0,
 ('Woodstock', 'Queen of the Desert'): 0,
 ('Go! Live Your Way', 'Queen of the Desert'): 0,
 ('Love In A Puff', 'Queen of the Desert'): 0,
 (nan, 'Queen of the Desert'): 0,
 ('Judwaa', 'Queen of the Desert'): 0,
 ('Rebirth', 'Queen of the Desert'): 0.18821028282068325,
 ('Memory Love', 'Queen of the Desert'): 0,
 ('Tales From the Hood 2', 'Queen of the Desert'): 0,
 ('Deathgrip', 'Queen of the Desert'): 0,
 ('The Irregular at Magic High School', 'Queen of the Desert'): 0,
 ('Larva Island', 'Queen of the Desert'): 0,
 ('Cam', 'Queen of the Desert'): 0,
 ('46', 'Queen of the Desert'): 0,
 ('The Wishing Tree', 'Queen of the Desert'): 0,
 ('Namastey London', 'Queen of the Desert'): 0.18821028282068325,
 ('The Human Factor: The Untold Story of the Bombay Film Orchestras',
  'Queen of the Desert'): 0,
 ('Simon', 'Queen of the Desert'): 0,
 ('Shark Night', 'Queen o

#Results

In [94]:
#Import der Bibliothek operator umd spezielle Funktione zu verwenden zb. itemgetter
import operator
results_adam_sorted = sorted(results_adam.items(), key=operator.itemgetter(1), reverse=True)
results_adam_sorted

[(('Center Stage', 'Queen of the Desert'), 0.43663578121915236),
 (('How to Make an American Quilt', 'Queen of the Desert'),
  0.43663578121915236),
 (('The Two Popes', 'Queen of the Desert'), 0.43663578121915236),
 (('Emo the Musical', 'Queen of the Desert'), 0.43663578121915236),
 (('Seven Pounds', 'Queen of the Desert'), 0.43663578121915236),
 (("The Time Traveler's Wife", 'Queen of the Desert'), 0.43663578121915236),
 (('Walking Out', 'Queen of the Desert'), 0.43663578121915236),
 (('Mona Lisa Smile', 'Queen of the Desert'), 0.43663578121915236),
 (('The Innocents', 'Queen of the Desert'), 0.43663578121915236),
 (('Traitor', 'Queen of the Desert'), 0.43663578121915236),
 (('The Rainmaker', 'Queen of the Desert'), 0.43663578121915236),
 (('Queen of the Desert', 'Victor'), 0.43663578121915236),
 (('Shark Night', 'Queen of the Desert'), 0.24842549839846914),
 (('The Saint', 'Queen of the Desert'), 0.24842549839846914),
 (('The Breadwinner', 'Queen of the Desert'), 0.24842549839846914)

In [85]:
results_adam_sorted[:num_rec]

[(('Center Stage', 'Queen of the Desert'), 0.43663578121915236),
 (('How to Make an American Quilt', 'Queen of the Desert'),
  0.43663578121915236),
 (('The Two Popes', 'Queen of the Desert'), 0.43663578121915236),
 (('Emo the Musical', 'Queen of the Desert'), 0.43663578121915236),
 (('Seven Pounds', 'Queen of the Desert'), 0.43663578121915236),
 (("The Time Traveler's Wife", 'Queen of the Desert'), 0.43663578121915236),
 (('Walking Out', 'Queen of the Desert'), 0.43663578121915236),
 (('Mona Lisa Smile', 'Queen of the Desert'), 0.43663578121915236),
 (('The Innocents', 'Queen of the Desert'), 0.43663578121915236),
 (('Traitor', 'Queen of the Desert'), 0.43663578121915236)]

In [86]:
results_jacc = {}
for key in jacc_dict:
  if selected_movie in key:
    results_jacc[key] = jacc_dict[key]

In [87]:
import operator
results_jacc_sorted = sorted(results_jacc.items(), key=operator.itemgetter(1), reverse=True)

results_jacc_sorted

[(('Center Stage', 'Queen of the Desert'), 1.0),
 (('How to Make an American Quilt', 'Queen of the Desert'), 1.0),
 (('The Two Popes', 'Queen of the Desert'), 1.0),
 (('Emo the Musical', 'Queen of the Desert'), 1.0),
 (('Seven Pounds', 'Queen of the Desert'), 1.0),
 (("The Time Traveler's Wife", 'Queen of the Desert'), 1.0),
 (('Walking Out', 'Queen of the Desert'), 1.0),
 (('Mona Lisa Smile', 'Queen of the Desert'), 1.0),
 (('The Innocents', 'Queen of the Desert'), 1.0),
 (('Traitor', 'Queen of the Desert'), 1.0),
 (('The Rainmaker', 'Queen of the Desert'), 1.0),
 (('Queen of the Desert', 'Victor'), 1.0),
 (('Rebirth', 'Queen of the Desert'), 0.3333333333333333),
 (('Namastey London', 'Queen of the Desert'), 0.3333333333333333),
 (('Shark Night', 'Queen of the Desert'), 0.3333333333333333),
 (('Menorca', 'Queen of the Desert'), 0.3333333333333333),
 (('The Saint', 'Queen of the Desert'), 0.3333333333333333),
 (('Ten Years', 'Queen of the Desert'), 0.3333333333333333),
 (('Mallesham', 

In [88]:
import numpy as np
erg_jacc = results_jacc_sorted[:num_rec]

erg_jacc_only = [tup[1] for tup in erg_jacc]
erg_jacc_only

erg_jacc_recom = [tup[0] for tup in erg_jacc ]
erg_jacc_rec =[tup[0] if tup[0]!=selected_movie else tup[1] for tup in erg_jacc_recom ]
erg_jacc_rec

erg_jacc_array = np.column_stack((erg_jacc_rec, erg_jacc_only))
erg_jacc_array

set_jacc = set(erg_jacc_rec)

set_jacc #Ergebnis als Menge

erg_jacc_top = [tup[1] for tup in erg_jacc]

erg_jacc_array

array([['Center Stage', '1.0'],
       ['How to Make an American Quilt', '1.0'],
       ['The Two Popes', '1.0'],
       ['Emo the Musical', '1.0'],
       ['Seven Pounds', '1.0'],
       ["The Time Traveler's Wife", '1.0'],
       ['Walking Out', '1.0'],
       ['Mona Lisa Smile', '1.0'],
       ['The Innocents', '1.0'],
       ['Traitor', '1.0']], dtype='<U32')

In [89]:
erg_adam = results_adam_sorted[:num_rec]

erg_adam_only = [tup[1] for tup in erg_adam]

erg_adam_recom = [tup[0] for tup in erg_adam ]
erg_adam_rec =[tup[0] if tup[0]!=selected_movie else tup[1] for tup in erg_adam_recom ]

erg_adam_array = np.column_stack((erg_adam_rec, erg_adam_only))
erg_adam_array

set_adam = set(erg_adam_rec)

erg_adam_array



array([['Center Stage', '0.43663578121915236'],
       ['How to Make an American Quilt', '0.43663578121915236'],
       ['The Two Popes', '0.43663578121915236'],
       ['Emo the Musical', '0.43663578121915236'],
       ['Seven Pounds', '0.43663578121915236'],
       ["The Time Traveler's Wife", '0.43663578121915236'],
       ['Walking Out', '0.43663578121915236'],
       ['Mona Lisa Smile', '0.43663578121915236'],
       ['The Innocents', '0.43663578121915236'],
       ['Traitor', '0.43663578121915236']], dtype='<U32')

In [90]:
intersec = set_adam.intersection(set_jacc)
len(intersec)

10

#Final recommendations

In [91]:
intersec #Recommendations based on both measures (jacc and adam) 

{'Center Stage',
 'Emo the Musical',
 'How to Make an American Quilt',
 'Mona Lisa Smile',
 'Seven Pounds',
 'The Innocents',
 "The Time Traveler's Wife",
 'The Two Popes',
 'Traitor',
 'Walking Out'}

In [92]:
#Proportion of common top recommendations (jacc and adam) #with respect to chosen number of recommendations (num_rec)
prop_com_rec = len(intersec)/(num_rec)*100
print(f'Proportion of common top recommendations {prop_com_rec} %')

Proportion of common top recommendations 100.0 %
