# Recommendation Movie

In [48]:
import re
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

movies = pd.read_csv('data/movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [49]:
def clean_title(title):
    return re.sub(r'\(\d{4}\)', '', title).strip()
movies["title"] = movies["title"].apply(clean_title)
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji,Adventure|Children|Fantasy
2,3,Grumpier Old Men,Comedy|Romance
3,4,Waiting to Exhale,Comedy|Drama|Romance
4,5,Father of the Bride Part II,Comedy


In [50]:
movies["genres"] = movies["genres"].apply(lambda x: x.replace("|", " "))
movies["combine"] = movies["title"] + " " + movies["genres"]
movies.head()

Unnamed: 0,movieId,title,genres,combine
0,1,Toy Story,Adventure Animation Children Comedy Fantasy,Toy Story Adventure Animation Children Comedy ...
1,2,Jumanji,Adventure Children Fantasy,Jumanji Adventure Children Fantasy
2,3,Grumpier Old Men,Comedy Romance,Grumpier Old Men Comedy Romance
3,4,Waiting to Exhale,Comedy Drama Romance,Waiting to Exhale Comedy Drama Romance
4,5,Father of the Bride Part II,Comedy,Father of the Bride Part II Comedy


In [51]:
vectorizer = TfidfVectorizer(ngram_range=(1, 2))
tfidf = vectorizer.fit_transform(movies["combine"])
tfidf.shape

(62423, 154860)

In [52]:
title = "Old Men"
title = clean_title(title)
query_vector = vectorizer.transform([title])
query_vector.shape

(1, 154860)

In [53]:
cosine_sim = linear_kernel(query_vector, tfidf)
cosine_sim = cosine_sim.flatten()

related_movie_indices = cosine_sim.argsort()[:-6:-1]
result = movies.iloc[related_movie_indices]
result

Unnamed: 0,movieId,title,genres,combine
3354,3450,Grumpy Old Men,Comedy,Grumpy Old Men Comedy
32828,141820,Old Men: Robbers,Comedy,Old Men: Robbers Comedy
2,3,Grumpier Old Men,Comedy Romance,Grumpier Old Men Comedy Romance
11932,55820,No Country for Old Men,Crime Drama,No Country for Old Men Crime Drama
47016,173637,Seven Old Men and One Girl,Comedy,Seven Old Men and One Girl Comedy
