# **Data Obtaining**

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter('ignore')


In [2]:
movies = pd.read_csv('datasets/movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [3]:
movies.shape

(9742, 3)

In [4]:
movies.columns

Index(['movieId', 'title', 'genres'], dtype='object')

In [5]:
rating = pd.read_csv('datasets/ratings.csv')
rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [6]:
rating.shape

(100836, 4)

# **Data Preprocessing**

In [7]:
df = movies.merge(rating, how="left", on="movieId")
df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1.0,4.0,964982700.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5.0,4.0,847435000.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7.0,4.5,1106636000.0
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15.0,2.5,1510578000.0
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17.0,4.5,1305696000.0


In [8]:
df = df.drop('timestamp', axis=1)
df.head()

Unnamed: 0,movieId,title,genres,userId,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1.0,4.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5.0,4.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7.0,4.5
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15.0,2.5
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17.0,4.5


In [9]:
print(df.isnull().sum())

movieId     0
title       0
genres      0
userId     18
rating     18
dtype: int64


In [10]:
df = df.dropna()
print(df.isnull().sum())

movieId    0
title      0
genres     0
userId     0
rating     0
dtype: int64


# **Data Modelling**

In [11]:
df["title"].nunique()

9719

In [12]:
df["title"].value_counts().head()

Forrest Gump (1994)                 329
Shawshank Redemption, The (1994)    317
Pulp Fiction (1994)                 307
Silence of the Lambs, The (1991)    279
Matrix, The (1999)                  278
Name: title, dtype: int64

In [13]:
comment = pd.DataFrame(df["title"].value_counts())
rareMovies = comment[comment["title"] <= 100].index
commonMovies = df[~df["title"].isin(rareMovies)]
commonMovies.shape
commonMovies["title"].nunique()

134

In [14]:
user_data = commonMovies.pivot_table(index=["userId"], columns=["title"], values="rating")
user_data.shape

(597, 134)

In [15]:
def item_recommendation(movie_title, user_movie_df):
    movie = user_movie_df[movie_title]
    correlations = user_movie_df.corrwith(movie).sort_values(ascending=False)
    top_correlated_movies = correlations.head(10)
    return top_correlated_movies

In [16]:
def checkMovies(keyword, user_movie_df):
    return [col for col in user_data.columns if keyword in col]

# Result

In [17]:
item_recommendation("Matrix, The (1999)", user_data)

title
Matrix, The (1999)       1.000000
Die Hard (1988)          0.544466
Inception (2010)         0.514767
Braveheart (1995)        0.496045
Aliens (1986)            0.470865
Lion King, The (1994)    0.444932
Monsters, Inc. (2001)    0.441205
Batman Begins (2005)     0.440338
Jurassic Park (1993)     0.427936
Fight Club (1999)        0.417196
dtype: float64

In [18]:
item_recommendation("Toy Story (1995)", user_data)

title
Toy Story (1995)                                        1.000000
Incredibles, The (2004)                                 0.643301
Finding Nemo (2003)                                     0.618701
Aladdin (1992)                                          0.611892
Monsters, Inc. (2001)                                   0.490231
Mrs. Doubtfire (1993)                                   0.446261
Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)    0.438237
American Pie (1999)                                     0.420117
Die Hard: With a Vengeance (1995)                       0.410939
E.T. the Extra-Terrestrial (1982)                       0.409216
dtype: float64

In [19]:
checkMovies("Back to the Future", user_data)

['Back to the Future (1985)']

In [20]:
checkMovies('Lord', user_data)

['Lord of the Rings: The Fellowship of the Ring, The (2001)',
 'Lord of the Rings: The Return of the King, The (2003)',
 'Lord of the Rings: The Two Towers, The (2002)']