In [93]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

#Import TfIdfVectorizer from scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer
# Import linear_kernel
from sklearn.metrics.pairwise import linear_kernel

In [94]:
q_movies = pd.read_parquet("../tmbd_exports/quality_movs_weighted_rating.parquet")
#Sort movies based on score calculated above
q_movies.shape

(607, 20)

In [18]:
indices = pd.Series(q_movies.index, index = q_movies["title"])
indices.shape

(607,)

In [19]:
#Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')

#Replace NaN with an empty string
q_movies['overview'] = q_movies['overview']#.fillna('')

#Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(q_movies['overview'])

#Output the shape of tfidf_matrix
tfidf_matrix.shape

(607, 6462)

In [20]:
# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim.shape

(607, 607)

In [73]:
# Function that takes in movie title (and a method as for the return-type), 
# as input and outputs most similar movies in form of a datafrme or series

def get_recommendations(title, cosine_sim=cosine_sim, method="series"):

    # Check if movie part of database
    if indices.get(title) == None:
        return("Title not found.")    
        
    # Get the index of the movie that matches the title
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    if method == "df":
        # Return the top 10 most similar movies
        return q_movies.iloc[movie_indices]

    return q_movies["title"].iloc[movie_indices]

In [90]:
# Returns a dataframe object, with moe recommendations
get_recommendations('Ex Machina', method="df")

Unnamed: 0,budget,genres,homepage,movie_id,keywords,original_language,original_title,overview,popularity,release_date,revenue,runtime,status,tagline,title,vote_average,vote_count,cast,director,weighted_rating
516,58000000,Comedy,http://www.theinternshipmovie.com/,116741,"job interview, loss of job, intern, reference ...",en,The Internship,Two recently laid-off men in their 40s try to ...,1.551497,2013-06-07,44000000,119.0,Released,Hiring them was a brilliant mistake.,The Internship,6.1,1658,"Owen Wilson, Vince Vaughn, Rose Byrne, Max Min...",Shawn Levy,6.055339
18,15000000,"Fantasy, Adventure, Animation, Family",http://movies.disney.com/spirited-away,129,"witch, parents kids relationship, magic, twili...",ja,千と千尋の神隠し,A ten year old girl who wanders away from her ...,118.968562,2001-07-20,274925095,125.0,Released,The tunnel led Chihiro to a mysterious town...,Spirited Away,8.3,3840,"Rumi Hiiragi, Miyu Irino, Mari Natsuki, Takash...",Hayao Miyazaki,7.624982
319,150000000,"Adventure, Comedy, Family, Fantasy",https://www.warnerbros.com/charlie-and-chocola...,118,"london england, father son relationship, choco...",en,Charlie and the Chocolate Factory,A young boy wins a tour through the most magni...,53.905592,2005-07-13,474968763,115.0,Released,Willy Wonka is semi-sweet and nuts.,Charlie and the Chocolate Factory,6.7,3624,"Johnny Depp, Freddie Highmore, David Kelly, An...",Tim Burton,6.488032
163,120000000,"Adventure, Drama, Action",http://www.lifeofpimovie.com/,87827,"ocean, shipwreck, hindu, tiger, faith",en,Life of Pi,"The story of an Indian boy named Pi, a zookeep...",51.328145,2012-11-20,609016565,127.0,Released,Believe The Unbelievable,Life of Pi,7.2,5797,"Suraj Sharma, Irrfan Khan, Ayush Tandon, Gauta...",Ang Lee,6.941891
407,30000000,"Horror, Thriller",http://www.discoverthecabininthewoods.com/,22970,"cabin, plot twist, cabin in the woods, filmed ...",en,The Cabin in the Woods,Five college friends spend the weekend at a re...,73.987775,2012-04-12,66486080,95.0,Released,If you hear a strange sound outside... have sex,The Cabin in the Woods,6.5,2263,"Kristen Connolly, Chris Hemsworth, Anna Hutchi...",Drew Goddard,6.296397
526,82500000,"Comedy, Drama, Fantasy, Romance",,9339,"regret, workaholic, heart attack, architect, d...",en,Click,A workaholic architect finds a universal remot...,41.176631,2006-06-22,237681299,107.0,Released,What If You Had A Remote... That Controlled Yo...,Click,6.0,2104,"Adam Sandler, Kate Beckinsale, Christopher Wal...",Frank Coraci,6.003972
41,180000000,"Animation, Family",http://disney.go.com/disneypictures/wall-e/,10681,romantic comedy,en,WALL·E,WALL·E is the last robot left on an Earth that...,66.390712,2008-06-22,521311860,98.0,Released,An adventure beyond the ordinar-E.,WALL·E,7.8,6296,"Ben Burtt, Elissa Knight, Jeff Garlin, Fred Wi...",Andrew Stanton,7.436359
420,15000000,"Crime, Thriller, Horror, Drama",http://www.thelasthouseontheleft.com/,18405,"lake, rape, kidnapping, psychopath, sadistic, ...",en,The Last House on the Left,When athletic teen Mari Collingwood opts to ha...,8.672,2009-03-13,32721635,113.0,Released,"If bad people hurt someone you love, how far w...",The Last House on the Left,6.5,1780,"Tony Goldwyn, Monica Potter, Sara Paxton, Garr...",Dennis Iliadis,6.267339
537,10000000,"Horror, Mystery, Thriller",,321258,"mask, suicide, fire, england, loss of loved on...",en,The Boy,A young American woman takes a job as a nanny ...,7.841,2016-01-22,73929392,98.0,Released,Every child needs to feel loved.,The Boy,5.959,3771,"Lauren Cohan, Rupert Evans, James Russell, Jim...",William Brent Bell,5.973976
17,170000000,"Action, Science Fiction, Adventure",http://marvel.com/guardians,118340,"marvel comic, spaceship, space, outer space, o...",en,Guardians of the Galaxy,"Light years from Earth, 26 years after being a...",481.098624,2014-07-30,773328629,121.0,Released,All heroes start somewhere.,Guardians of the Galaxy,7.9,9742,"Chris Pratt, Zoe Saldana, Dave Bautista, Vin D...",James Gunn,7.632664


In [91]:
# Returns a dataframe object, with moe recommendations
recommendations = get_recommendations('Ex Machina', method="df")
recommendations

Unnamed: 0,budget,genres,homepage,movie_id,keywords,original_language,original_title,overview,popularity,release_date,revenue,runtime,status,tagline,title,vote_average,vote_count,cast,director,weighted_rating
516,58000000,Comedy,http://www.theinternshipmovie.com/,116741,"job interview, loss of job, intern, reference ...",en,The Internship,Two recently laid-off men in their 40s try to ...,1.551497,2013-06-07,44000000,119.0,Released,Hiring them was a brilliant mistake.,The Internship,6.1,1658,"Owen Wilson, Vince Vaughn, Rose Byrne, Max Min...",Shawn Levy,6.055339
18,15000000,"Fantasy, Adventure, Animation, Family",http://movies.disney.com/spirited-away,129,"witch, parents kids relationship, magic, twili...",ja,千と千尋の神隠し,A ten year old girl who wanders away from her ...,118.968562,2001-07-20,274925095,125.0,Released,The tunnel led Chihiro to a mysterious town...,Spirited Away,8.3,3840,"Rumi Hiiragi, Miyu Irino, Mari Natsuki, Takash...",Hayao Miyazaki,7.624982
319,150000000,"Adventure, Comedy, Family, Fantasy",https://www.warnerbros.com/charlie-and-chocola...,118,"london england, father son relationship, choco...",en,Charlie and the Chocolate Factory,A young boy wins a tour through the most magni...,53.905592,2005-07-13,474968763,115.0,Released,Willy Wonka is semi-sweet and nuts.,Charlie and the Chocolate Factory,6.7,3624,"Johnny Depp, Freddie Highmore, David Kelly, An...",Tim Burton,6.488032
163,120000000,"Adventure, Drama, Action",http://www.lifeofpimovie.com/,87827,"ocean, shipwreck, hindu, tiger, faith",en,Life of Pi,"The story of an Indian boy named Pi, a zookeep...",51.328145,2012-11-20,609016565,127.0,Released,Believe The Unbelievable,Life of Pi,7.2,5797,"Suraj Sharma, Irrfan Khan, Ayush Tandon, Gauta...",Ang Lee,6.941891
407,30000000,"Horror, Thriller",http://www.discoverthecabininthewoods.com/,22970,"cabin, plot twist, cabin in the woods, filmed ...",en,The Cabin in the Woods,Five college friends spend the weekend at a re...,73.987775,2012-04-12,66486080,95.0,Released,If you hear a strange sound outside... have sex,The Cabin in the Woods,6.5,2263,"Kristen Connolly, Chris Hemsworth, Anna Hutchi...",Drew Goddard,6.296397
526,82500000,"Comedy, Drama, Fantasy, Romance",,9339,"regret, workaholic, heart attack, architect, d...",en,Click,A workaholic architect finds a universal remot...,41.176631,2006-06-22,237681299,107.0,Released,What If You Had A Remote... That Controlled Yo...,Click,6.0,2104,"Adam Sandler, Kate Beckinsale, Christopher Wal...",Frank Coraci,6.003972
41,180000000,"Animation, Family",http://disney.go.com/disneypictures/wall-e/,10681,romantic comedy,en,WALL·E,WALL·E is the last robot left on an Earth that...,66.390712,2008-06-22,521311860,98.0,Released,An adventure beyond the ordinar-E.,WALL·E,7.8,6296,"Ben Burtt, Elissa Knight, Jeff Garlin, Fred Wi...",Andrew Stanton,7.436359
420,15000000,"Crime, Thriller, Horror, Drama",http://www.thelasthouseontheleft.com/,18405,"lake, rape, kidnapping, psychopath, sadistic, ...",en,The Last House on the Left,When athletic teen Mari Collingwood opts to ha...,8.672,2009-03-13,32721635,113.0,Released,"If bad people hurt someone you love, how far w...",The Last House on the Left,6.5,1780,"Tony Goldwyn, Monica Potter, Sara Paxton, Garr...",Dennis Iliadis,6.267339
537,10000000,"Horror, Mystery, Thriller",,321258,"mask, suicide, fire, england, loss of loved on...",en,The Boy,A young American woman takes a job as a nanny ...,7.841,2016-01-22,73929392,98.0,Released,Every child needs to feel loved.,The Boy,5.959,3771,"Lauren Cohan, Rupert Evans, James Russell, Jim...",William Brent Bell,5.973976
17,170000000,"Action, Science Fiction, Adventure",http://marvel.com/guardians,118340,"marvel comic, spaceship, space, outer space, o...",en,Guardians of the Galaxy,"Light years from Earth, 26 years after being a...",481.098624,2014-07-30,773328629,121.0,Released,All heroes start somewhere.,Guardians of the Galaxy,7.9,9742,"Chris Pratt, Zoe Saldana, Dave Bautista, Vin D...",James Gunn,7.632664


In [92]:
%%writefile recommender.py

def get_recommendations(title, cosine_sim, indices, q_movies, method="series"):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]

    if method == "df":
        return q_movies.iloc[movie_indices]
    return q_movies["title"].iloc[movie_indices]

Writing recommender.py
