<a href="https://colab.research.google.com/github/igoekce/netflixrecommender/blob/master/Master_Netflix_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Data Preprocessing

## Import

In [1]:
# import librairies
import networkx as nx
import pandas as pd
import numpy as np
import math as math
import time 
import operator
from tabulate import tabulate

In [2]:
filepath = 'https://raw.githubusercontent.com/igoekce/netflixrecommender/master/netflix_titles.csv'
df = pd.read_csv(filepath)

##Variables

In [3]:
selected_movie = 'The Ridiculous 6'  #Input movie title
num_rec = 10 #Input max number of recommendations
# possible cols w/o title: 'type','director', 'cast', 'country', 'date_added', 'release_year', 'rating', 'duration', 'listed_in', 'description'
features = ['rating', 'cast', 'country', 'listed_in'] 
k=0.3

## Separation

In [4]:
df = df.dropna()
df = df.drop_duplicates()
df['title'].sample

<bound method NDFrame.sample of 0       Norm of the North: King Sized Adventure
4                                  #realityhigh
6                                      Automata
7            Fabrizio Copano: Solo pienso en mi
9                                   Good People
                         ...                   
6142              The Great British Baking Show
6158    Miraculous: Tales of Ladybug & Cat Noir
6167                               Sacred Games
6182                           Men on a Mission
6213                           Leyla and Mecnun
Name: title, Length: 3774, dtype: object>

In [5]:
df = df.sample(int(k*df.shape[0]), random_state=42)

In [6]:
def splitDataFrameList(df,target_column,separator):
    ''' df = dataframe to split,
    target_column = the column containing the values to split
    separator = the symbol used to perform the split
    returns: a dataframe with each entry for the target column separated, with each element moved into a new row. 
    The values in the other columns are duplicated across the newly divided rows.
    '''
    def splitListToRows(row,row_accumulator,target_column,separator):
        split_row = row[target_column].split(separator)
        for s in split_row:
            new_row = row.to_dict()
            new_row[target_column] = s
            row_accumulator.append(new_row)
    new_rows = []
    df.apply(splitListToRows,axis=1,args = (new_rows,target_column,separator))
    new_df = pd.DataFrame(new_rows)
    return new_df

In [7]:
df = splitDataFrameList(df,'cast',',')
df = splitDataFrameList(df,'country',',')
df = splitDataFrameList(df,'listed_in',',')

##Function generating Edge List

In [8]:
def eg_generator(features):
  df1 = pd.DataFrame(columns = ['title', 'value']) 
  for feature in features:
    print(feature)
    df_temp = df[['title', feature]].copy()
    df_temp.columns = ['title', 'value']
    df1 = pd.concat([df1, df_temp], axis=0)
  return df1

In [9]:
print('possible cols w/o title:', df.columns)

possible cols w/o title: Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')


In [10]:
df_model = eg_generator(features)

rating
cast
country
listed_in


In [11]:
df_model = df_model.drop_duplicates()

# Modelling

##Generating G

In [12]:
# Graphen erstellen
G = nx.from_pandas_edgelist(df_model, source='title', target='value')

## Calculation of Adar and Jaccard

In [13]:
# Ermittlung Jaccard Coeffizient
jacc_gen = nx.jaccard_coefficient(G) #Generator für Jaccard Koeffizient

In [14]:
# Ermittlung Adamic Adar Coeffizient
adam_ad = nx.adamic_adar_index(G)  #Generator für Adamic_Adar_Index

In [15]:
jacc_dict = {}
for u,v,p in jacc_gen:
  jacc_dict[(u, v)] = p

In [16]:
adam_ad_dict = {}
for u,v,p in adam_ad:
  adam_ad_dict[(u, v)] = p

In [17]:
results_jacc = {}
for key in jacc_dict:
  if selected_movie in key:
    results_jacc[key] = jacc_dict[key]

In [18]:
results_adam = {}
for key in adam_ad_dict:
  if selected_movie in key:
    results_adam[key] = adam_ad_dict[key]

##Preparing and sorting final recommendation

In [19]:
results_jacc_sorted = sorted(results_jacc.items(), key=operator.itemgetter(1), reverse=True)
results_adam_sorted = sorted(results_adam.items(), key=operator.itemgetter(1), reverse=True)

In [20]:
erg_jacc = results_jacc_sorted[:num_rec]
erg_jacc_only = [tup[1] for tup in erg_jacc]
erg_jacc_recom = [tup[0] for tup in erg_jacc ]
erg_jacc_rec =[tup[0] if tup[0]!=selected_movie else tup[1] for tup in erg_jacc_recom ]
erg_jacc_array = np.column_stack((erg_jacc_rec, erg_jacc_only))
set_jacc = set(erg_jacc_rec)
erg_jacc_top = [tup[1] for tup in erg_jacc]

In [21]:
erg_adam = results_adam_sorted[:num_rec]
erg_adam_only = [tup[1] for tup in erg_adam]
erg_adam_recom = [tup[0] for tup in erg_adam ]
erg_adam_rec =[tup[0] if tup[0]!=selected_movie else tup[1] for tup in erg_adam_recom ]
erg_adam_array = np.column_stack((erg_adam_rec, erg_adam_only))
set_adam = set(erg_adam_rec)

In [22]:
erg_jacc_array

array([["You Don't Mess with the Zohan", '0.18181818181818182'],
       ["Charlie's Angels", '0.10526315789473684'],
       ['Spy Kids 2: The Island of Lost Dreams', '0.10526315789473684'],
       ['The Amazing Praybeyt Benjamin', '0.0967741935483871'],
       ['Rembat', '0.09375'],
       ['National Treasure', '0.09090909090909091'],
       ['Bitcoin Heist', '0.09090909090909091'],
       ['Antidote', '0.08823529411764706'],
       ['Stuart Little 2', '0.08823529411764706'],
       ['Death Race: Beyond Anarchy', '0.08823529411764706']],
      dtype='<U37')

In [23]:
intersec = set_adam.intersection(set_jacc)
len(intersec)

3

In [24]:
intersec

{"Charlie's Angels",
 'Spy Kids 2: The Island of Lost Dreams',
 "You Don't Mess with the Zohan"}

In [25]:
#Proportion of common top recommendations (jacc and adam) #with respect to chosen number of recommendations (num_rec)
prop_com_rec = len(intersec)/(num_rec)*100
print(f'Proportion of common top recommendations {prop_com_rec} %')

Proportion of common top recommendations 30.0 %


# User Recommendation

In [26]:
def returnrecommendations(title, movielist, verbose=False):
  print('For the movie:' + '\n')
  print('*'*40)
  print(title)
  print('*'*40 + '\n')
  print('We can recommend the following movies:' + '\n')

  table = []

  if verbose == False:
    for movie, value in movielist:
      value = round(float(value), 2) * 100
      table.append([movie, value])

    return print(tabulate(table, headers=["Movie Name", ' Score'], tablefmt='rst'))
  
  if verbose == True:
    for movie, value in movielist:
      value = round(float(value), 2) * 100
      # check if description is available
      try:
        description = df.loc[df['title'] == movie, 'description'].values[0]
      except:
        description = 'No description available'      
      table.append([movie, value, description])

    return print(tabulate(table, headers=["Movie Name", ' Score', 'Description'], tablefmt='rst'))

In [27]:
returnrecommendations(selected_movie, erg_jacc_array, verbose=True)

For the movie:

****************************************
The Ridiculous 6
****************************************

We can recommend the following movies:

Movie Name                                Score  Description
You Don't Mess with the Zohan                18  An Israeli counterterrorism soldier with a secretly fabulous ambition to become a Manhattan hairstylist will do anything to make his dreams come true.
Charlie's Angels                             11  Three beauties use their looks, charm and martial-arts training to kick butt on the trail of stolen software in this reboot of the classic TV series.
Spy Kids 2: The Island of Lost Dreams        11  Spy kids Carmen and Juni Cortez team up with two other pint-size secret agents, Gary and Gerti Giggles, in order to take on an evil scientist.
The Amazing Praybeyt Benjamin                10  Now a celebrity, Benjie is tasked with taking care of a General's young son, who has information about a looming terrorist threat.
Rembat      

In [28]:
returnrecommendations(selected_movie, erg_adam_array, verbose=True)

For the movie:

****************************************
The Ridiculous 6
****************************************

We can recommend the following movies:

Movie Name                                Score  Description
You Don't Mess with the Zohan               384  An Israeli counterterrorism soldier with a secretly fabulous ambition to become a Manhattan hairstylist will do anything to make his dreams come true.
Charlie's Angels                            202  Three beauties use their looks, charm and martial-arts training to kick butt on the trail of stolen software in this reboot of the classic TV series.
The Rugrats Movie                           183  When his baby brother Dil is born, Tommy Pickles and his pals decide that he's too much responsibility and try to return him to the hospital.
Spy Kids 2: The Island of Lost Dreams       181  Spy kids Carmen and Juni Cortez team up with two other pint-size secret agents, Gary and Gerti Giggles, in order to take on an evil scientist.
T