# Movie Recommendation System
Create a title recommendation system based on cast, director, and title genre
Reference: https://www.datacamp.com/community/tutorials/recommender-systems-python

In [141]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [142]:
original_df = pd.read_csv('netflix_ratings_data.csv',sep = ',')
original_df = original_df.dropna(subset=['director','cast'])

In [143]:
working_df = original_df.copy()
working_df = working_df.drop(columns=['rating', 'duration','year','originalTitle','averageRating', 'numVotes'])

In [144]:
working_df = working_df.dropna(subset=['director','cast'])

In [145]:
#Split columns
working_df['director'] = working_df['director'].str.split(',') 
working_df['cast'] = working_df['cast'].str.split(',') 
working_df['genres'] = working_df['genres'].str.split(',') 

In [146]:
def clean_data(x):
    return [str.lower(i.replace(" ", "")) for i in x]

working_df['director'] = working_df['director'].apply(clean_data)
working_df['cast'] = working_df['cast'].apply(clean_data)
working_df['genres'] = working_df['genres'].apply(clean_data)


In [147]:
def create_soup(x):
    return ' '.join(x['cast']) + ' ' + ' '.join(x['director']) + ' ' + ' '.join(x['genres'])

In [148]:
working_df['soup'] = working_df.apply(create_soup, axis=1)

In [149]:
working_df[['soup']]

Unnamed: 0,soup
1,samibouajila tracygotoas samueljouy nabihaakka...
3,katesiegel zachgilford hamishlinklater henryth...
4,vanessahudgens kimikoglenn jamesmarsden sofiac...
5,kofighanaba oyafunmikeogunlano alexandraduah n...
6,melissamccarthy chriso'dowd kevinkline timothy...
...,...
6414,santoshjuvekar siddharthchandekar sachitpatil ...
6416,markruffalo jakegyllenhaal robertdowneyjr. ant...
6417,jesseeisenberg woodyharrelson emmastone abigai...
6418,timallen courteneycox chevychase katemara ryan...


In [150]:
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(working_df['soup'])

In [151]:
cosine_sim2 = cosine_similarity(count_matrix, count_matrix)


In [152]:
working_df = working_df.reset_index()
indices = pd.Series(working_df.index, index=working_df['title'])

# Write function to return the recommendation list

In [153]:
def get_recommendations(title, cosine_sim):
    # Get the index of the movie that matches the title
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return original_df.iloc[movie_indices]

In [156]:
get_recommendations('dark skies', cosine_sim2)

Unnamed: 0,show_id,type,title,director,cast,rating,duration,originalTitle,year,genres,averageRating,numVotes
1797,s2560,Movie,becoming,Nadia Hallgren,Michelle Obama,PG,89 min,Becoming,2020,"Drama,Horror,Sci-Fi",4.7,1192
4623,s6398,Movie,cabin fever,Travis Zariwny,"Gage Golightly, Matthew Daddario, Samuel Davis...",R,98 min,Cabin Fever,2016,"Horror,Sci-Fi,Thriller",3.7,9988
4772,s6601,Movie,devil's gate,Clay Staub,"Amanda Schull, Milo Ventimiglia, Shawn Ashmore...",TV-MA,94 min,Devil's Gate,2017,"Horror,Sci-Fi,Thriller",5.1,6176
3455,s4813,Movie,tau,Federico D'Alessandro,"Maika Monroe, Ed Skrein, Gary Oldman",R,98 min,Tau,2018,"Sci-Fi,Thriller",5.8,30732
3596,s5029,Movie,mute,Duncan Jones,"Alexander Skarsgård, Paul Rudd, Justin Theroux",TV-MA,127 min,Mute,2018,"Mystery,Sci-Fi,Thriller",5.4,34581
4845,s6712,Movie,event horizon,Paul W.S. Anderson,"Laurence Fishburne, Sam Neill, Kathleen Quinla...",R,96 min,Event Horizon,1997,"Horror,Sci-Fi,Thriller",6.6,173627
6075,s8359,Movie,the incident,Isaac Ezban,"Raúl Méndez, Nailea Norvind, Hernán Mendoza, H...",TV-MA,100 min,El incidente,2014,"Horror,Sci-Fi,Thriller",6.3,3678
664,s1000,Movie,stowaway,Joe Penna,"Anna Kendrick, Toni Collette, Daniel Dae Kim, ...",TV-MA,116 min,Stowaway,2021,"Adventure,Sci-Fi,Thriller",5.6,44336
4489,s6209,Movie,backcountry,Adam MacDonald,"Missy Peregrym, Jeff Roop, Eric Balfour, Nicho...",NR,92 min,Backcountry,2014,"Comedy,Horror,Sci-Fi",5.8,5
2204,s3103,Movie,sweetheart,J.D. Dillard,"Kiersey Clemons, Emory Cohen, Hanna Mangan Law...",PG-13,83 min,Wyspa przetrwania,2019,"Adventure,Horror,Sci-Fi",5.8,7991
