# Item Based Recommender

* Import Dataset and Data Preprocessing

* Create User Movie DataFrame

* Create Item-Based Recommender

* Process Functionalization

# Import Necessary Libraries

In [1]:
import numpy as np
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.width", 500)

# Import Dataset

In [2]:
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")
df = movies.merge(ratings, how="left", on="movieId")
df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.0,4.0,944919400.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,6.0,5.0,858275500.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,8.0,4.0,833981900.0
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,10.0,4.0,943497900.0
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,11.0,4.5,1230859000.0


# Create User Movie DataFrame

In [3]:
df.shape

(20000797, 6)

In [4]:
df["title"].nunique()

27262

In [5]:
df["title"].value_counts().head()

Pulp Fiction (1994)                 67310
Forrest Gump (1994)                 66172
Shawshank Redemption, The (1994)    63366
Silence of the Lambs, The (1991)    63299
Jurassic Park (1993)                59715
Name: title, dtype: int64

In [6]:
df["title"].value_counts().tail()

Rapture (Arrebato) (1980)                    1
Education of Mohammad Hussein, The (2013)    1
Satanas (2007)                               1
Psychosis (2010)                             1
Innocence (2014)                             1
Name: title, dtype: int64

In [11]:
rating_counts = pd.DataFrame(df["title"].value_counts())

In [12]:
rating_counts.head()

Unnamed: 0,title
Pulp Fiction (1994),67310
Forrest Gump (1994),66172
"Shawshank Redemption, The (1994)",63366
"Silence of the Lambs, The (1991)",63299
Jurassic Park (1993),59715


In [13]:
rating_counts.tail()

Unnamed: 0,title
Rapture (Arrebato) (1980),1
"Education of Mohammad Hussein, The (2013)",1
Satanas (2007),1
Psychosis (2010),1
Innocence (2014),1


In [14]:
# 1000 altinda yapilan yorumlari devre disi birakiyorum
rating_counts[rating_counts["title"] < 1000].head()

Unnamed: 0,title
"Bear, The (Ours, L') (1988)",999
Rosewood (1997),999
Ted (2012),999
One Night at McCool's (2001),999
Marked for Death (1990),998


In [15]:
rating_counts[rating_counts["title"] < 1000].shape

(24103, 1)

In [19]:
rare_movies = rating_counts[rating_counts["title"] < 1000].index

In [20]:
rare_movies[0:5]

Index(['Bear, The (Ours, L') (1988)', 'Rosewood (1997)', 'Ted (2012)', 'One Night at McCool's (2001)', 'Marked for Death (1990)'], dtype='object')

In [21]:
common_movies = df[~df["title"].isin(rare_movies)]

In [22]:
common_movies.shape

(17766015, 6)

In [23]:
df.shape

(20000797, 6)

In [25]:
common_movies["title"].nunique()

3159

In [26]:
df["title"].nunique()

27262

In [28]:
user_movie_df = common_movies.pivot_table(index=["userId"], columns=["title"], values="rating")

In [29]:
user_movie_df.shape

(138493, 3159)

# Create Item-Based Recommender

In [33]:
movie_name = "12 Angry Men (1957)"
movie_name = user_movie_df[movie_name]
user_movie_df.corrwith(movie_name).sort_values(ascending=False).head()

title
12 Angry Men (1957)                    1.000000
Witness for the Prosecution (1957)     0.503740
Inherit the Wind (1960)                0.440727
City Lights (1931)                     0.412992
Mr. Smith Goes to Washington (1939)    0.412903
dtype: float64

In [35]:
# Rastgele secim yapmak 
movie_name = pd.Series(user_movie_df.columns).sample(1).values[0]
movie_name = user_movie_df[movie_name]
user_movie_df.corrwith(movie_name).sort_values(ascending=False).head()

title
Richard III (1995)                                 1.000000
Futurama: The Beast with a Billion Backs (2008)    0.761937
Chorus, The (Choristes, Les) (2004)                0.665781
A-Team, The (2010)                                 0.571577
Half Nelson (2006)                                 0.564896
dtype: float64

In [36]:
# belirli bir anahtara gore film secmek ve oneride bulunmak
def check_film(keyword, user_movie_df):
    return [col for col in user_movie_df.columns if keyword in col ]

In [39]:
check_film("Batman", user_movie_df)

['Batman & Robin (1997)',
 'Batman (1989)',
 'Batman Begins (2005)',
 'Batman Forever (1995)',
 'Batman Returns (1992)',
 'Batman: Mask of the Phantasm (1993)']

In [40]:
movie_name = "Batman Forever (1995)"
movie_name = user_movie_df[movie_name]
user_movie_df.corrwith(movie_name).sort_values(ascending=False).head()

title
Batman Forever (1995)                                1.000000
Batman & Robin (1997)                                0.620899
Ferngully: The Last Rainforest (1992)                0.456663
Lara Croft Tomb Raider: The Cradle of Life (2003)    0.447672
Universal Soldier: The Return (1999)                 0.445979
dtype: float64

# Process Functionalization

In [41]:
def data_prerp(data_movie, data_rating, on="movieId"):
    movies = pd.read_csv(data_movie)
    ratings = pd.read_csv(data_rating)
    df = movies.merge(ratings, how="left", on=on)
    return df

In [42]:
df = data_prerp("movies.csv", "ratings.csv")

In [43]:
df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.0,4.0,944919400.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,6.0,5.0,858275500.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,8.0,4.0,833981900.0
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,10.0,4.0,943497900.0
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,11.0,4.5,1230859000.0


In [44]:
def create_user_movie_df(dataframe):
    rating_counts = pd.DataFrame(dataframe["title"].value_counts())
    rare_movies = rating_counts[rating_counts["title"] < 1000].index
    common_movies = dataframe[~dataframe["title"].isin(rare_movies)]
    user_movie_df = common_movies.pivot_table(index=["userId"], columns=["title"], values="rating")
    return user_movie_df

In [45]:
user_movie_df = create_user_movie_df(df)

In [46]:
def item_based_recommender(movie_name, user_movie_df, head=10):
    movie_name = user_movie_df[movie_name]
    return user_movie_df.corrwith(movie_name).sort_values(ascending=False).head(head)

In [47]:
item_based_recommender("Batman Forever (1995)", user_movie_df)

title
Batman Forever (1995)                                1.000000
Batman & Robin (1997)                                0.620899
Ferngully: The Last Rainforest (1992)                0.456663
Lara Croft Tomb Raider: The Cradle of Life (2003)    0.447672
Universal Soldier: The Return (1999)                 0.445979
Teenage Mutant Ninja Turtles III (1993)              0.445910
Catwoman (2004)                                      0.442391
Flintstones in Viva Rock Vegas, The (2000)           0.441362
Three Men and a Little Lady (1990)                   0.440241
Don't Say a Word (2001)                              0.438625
dtype: float64