# Movie Recommendation System
Create a title recommendation system based on cast, director, and title genre
Reference: https://www.datacamp.com/community/tutorials/recommender-systems-python

In [30]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [31]:
original_df = pd.read_csv('netflix_ratings_data.csv',sep = ',')
original_df = original_df.dropna(subset=['director','cast'])

In [32]:
working_df = original_df.copy()
working_df = working_df.drop(columns=['rating', 'duration','year','originalTitle','averageRating', 'numVotes'])

In [33]:
working_df = working_df.dropna(subset=['director','cast'])

In [34]:
#Split columns
working_df['director'] = working_df['director'].str.split(',') 
working_df['cast'] = working_df['cast'].str.split(',') 
working_df['genres'] = working_df['genres'].str.split(',') 

In [35]:
def clean_data(x):
    return [str.lower(i.replace(" ", "")) for i in x]

working_df['director'] = working_df['director'].apply(clean_data)
working_df['cast'] = working_df['cast'].apply(clean_data)
working_df['genres'] = working_df['genres'].apply(clean_data)


In [36]:
def create_soup(x):
    return ' '.join(x['cast']) + ' ' + ' '.join(x['director']) + ' ' + ' '.join(x['genres'])

In [37]:
working_df['soup'] = working_df.apply(create_soup, axis=1)

In [38]:
working_df[['title','director','cast','genres']]

Unnamed: 0,title,director,cast,genres
1,ganglands,[julienleclercq],"[samibouajila, tracygotoas, samueljouy, nabiha...","[action, crime, drama]"
3,midnight mass,[mikeflanagan],"[katesiegel, zachgilford, hamishlinklater, hen...","[drama, fantasy, horror]"
4,my little pony: a new generation,"[robertcullen, joséluisucha]","[vanessahudgens, kimikoglenn, jamesmarsden, so...","[adventure, animation, comedy]"
5,sankofa,[hailegerima],"[kofighanaba, oyafunmikeogunlano, alexandradua...",[drama]
6,the starling,[theodoremelfi],"[melissamccarthy, chriso'dowd, kevinkline, tim...","[comedy, drama]"
...,...,...,...,...
6414,zenda,[avadhootgupte],"[santoshjuvekar, siddharthchandekar, sachitpat...","[drama, thriller]"
6416,zodiac,[davidfincher],"[markruffalo, jakegyllenhaal, robertdowneyjr.,...","[crime, drama, mystery]"
6417,zombieland,[rubenfleischer],"[jesseeisenberg, woodyharrelson, emmastone, ab...","[action, comedy, horror]"
6418,zoom,[peterhewitt],"[timallen, courteneycox, chevychase, katemara,...","[action, adventure, comedy]"


In [39]:
working_df[['soup']]

Unnamed: 0,soup
1,samibouajila tracygotoas samueljouy nabihaakka...
3,katesiegel zachgilford hamishlinklater henryth...
4,vanessahudgens kimikoglenn jamesmarsden sofiac...
5,kofighanaba oyafunmikeogunlano alexandraduah n...
6,melissamccarthy chriso'dowd kevinkline timothy...
...,...
6414,santoshjuvekar siddharthchandekar sachitpatil ...
6416,markruffalo jakegyllenhaal robertdowneyjr. ant...
6417,jesseeisenberg woodyharrelson emmastone abigai...
6418,timallen courteneycox chevychase katemara ryan...


In [40]:
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(working_df['soup'])

In [41]:
cosine_sim2 = cosine_similarity(count_matrix, count_matrix)


In [42]:
working_df = working_df.reset_index()
indices = pd.Series(working_df.index, index=working_df['title'])

# Write function to return the recommendation list

In [43]:
def get_recommendations(title, cosine_sim):
    # Get the index of the movie that matches the title
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return original_df.iloc[movie_indices]

In [45]:
original_df[original_df['title']=='before i wake']

Unnamed: 0,show_id,type,title,director,cast,rating,duration,originalTitle,year,genres,averageRating,numVotes,ranking
3638,s5092,Movie,before i wake,Mike Flanagan,"Kate Bosworth, Thomas Jane, Jacob Tremblay, An...",PG-13,97 min,Before I Wake,2016,"Drama,Fantasy,Horror",6.2,45365,Positive


In [44]:
get_recommendations('before i wake', cosine_sim2)

Unnamed: 0,show_id,type,title,director,cast,rating,duration,originalTitle,year,genres,averageRating,numVotes,ranking
3,s6,TV Show,midnight mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",TV-MA,1 Season,Midnight Mass,2021,"Drama,Fantasy,Horror",7.7,100194,Positive
254,s354,Movie,the haunting in connecticut 2: ghosts of georgia,Tom Elkins,"Abigail Spencer, Chad Michael Murray, Katee Sa...",R,101 min,The Haunting in Connecticut 2: Ghosts of Georgia,2013,"Drama,Horror,Mystery",5.3,17158,Medium
165,s230,Movie,the old ways,Christopher Alender,"Brigitte Kali Canales, Andrea Cortes, Julia Ve...",TV-MA,90 min,The Old Ways,2020,"Drama,Fantasy,Horror",5.4,5603,Medium
3354,s4686,TV Show,ghoul,Patrick Graham,"Radhika Apte, Manav Kaul, Ratnabali Bhattachar...",TV-MA,1 Season,Ghoul,2018,"Drama,Fantasy,Horror",7.1,13711,Positive
2672,s3758,Movie,the 3rd eye 2,Rocky Soraya,"Jessica Mila, Bianca Hello, Nabilah Ayu, Sophi...",TV-MA,117 min,Mata Batin 2,2019,"Drama,Fantasy,Horror",5.4,583,Medium
3754,s5253,Movie,gerald's game,Mike Flanagan,"Carla Gugino, Bruce Greenwood, Henry Thomas, C...",TV-MA,103 min,Gerald's Game,2017,"Drama,Horror,Thriller",6.5,104267,Positive
1941,s2750,Movie,wildling,Fritz Böhm,"Bel Powley, Brad Dourif, Liv Tyler, Collin Kel...",R,93 min,Wildling,2018,"Drama,Fantasy,Horror",5.5,13722,Medium
3450,s4806,Movie,the maus,Yayo Herrero,"Alma Terzic, August Wittgenstein, Aleksandar S...",TV-MA,90 min,The Maus,2017,"Drama,Fantasy,Horror",4.7,6344,Medium
4711,s6524,Movie,compulsion,Craig Goodwill,"Analeigh Tipton, Jakob Cedergren, Marta Gastin...",TV-MA,84 min,Sadie,2016,"Drama,Fantasy,Horror",3.7,1499,Negative
5275,s7303,Movie,lifechanger,Justin McConnell,"Lora Burke, Jack Foley, Bill Oberst Jr., Elits...",TV-MA,84 min,Lifechanger,2018,"Drama,Fantasy,Horror",5.4,2419,Medium
