In [1]:
import re
import csv
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
# Reading ratings file
# Ignore the timestamp column
ratings = pd.read_csv('ratings.csv', encoding='latin-1', usecols=['userId', 'movieId', 'rating']).dropna()
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,2,3.5
1,1,29,3.5
2,1,32,3.5
3,1,47,3.5
4,1,50,3.5


In [3]:
# Reading movies file
movies = pd.read_csv('movies.csv', encoding='latin-1', usecols=['movieId', 'title', 'genres']).dropna()
for i in movies.index:
    name = movies.loc[i, 'title']
    if ', The' in name:
        name = name.split(',')
        name = '{} {}'.format(name[1], name[0])
    temp = re.split(r' \((\d{4})\)', name)[0].strip()
    if 'The'==temp:
        name = re.split(r' \((\d{4})\)', name)
        name = '{}{}'.format(name[0], name[2])
        movies.loc[i, 'title'] = name.strip()
    else:
        movies.loc[i, 'title'] = temp

movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji,Adventure|Children|Fantasy
2,3,Grumpier Old Men,Comedy|Romance
3,4,Waiting to Exhale,Comedy|Drama|Romance
4,5,Father of the Bride Part II,Comedy


In [4]:
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(movies['genres'])

In [5]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [19]:
# Build a 1-dimensional array with movie titles
titles = movies['title']
indices = pd.Series(movies.index, index=movies['title'])

# Function that get movie recommendations based on the cosine similarity score of movie genres
def genre_recommendations(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return pd.DataFrame(titles.iloc[movie_indices])

In [20]:
genre_recommendations('Chicken Run').head(10)

Unnamed: 0,title
2269,The Rugrats Movie
3520,Saludos Amigos
3660,Chicken Run
3896,Rugrats in Paris: The Movie
5666,Looney Looney
5859,The Wild Thornberrys Movie
7482,"Bon Voyage, Charlie Brown (and Don't Come Back!)"
7773,Garfield: The Movie
8224,Shark Tale
8251,Bebe's Kids
