## Item-Based Collaborative Filtering
**Item-item collaborative filtering is a type of recommendation system that is based on the similarity between items calculated using the rating users have given to items.**

In [1]:
# Importing library:
import pandas as pd
pd.set_option('display.max_columns', 20)

In [2]:
# Importing files:
movie = pd.read_csv('/kaggle/input/movie-lens-dataset/movies.csv')
rating = pd.read_csv('/kaggle/input/movie-lens-dataset/ratings.csv')

In [3]:
# Merging:
df = movie.merge(rating, how="left", on="movieId")
df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1.0,4.0,964982700.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5.0,4.0,847435000.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7.0,4.5,1106636000.0
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15.0,2.5,1510578000.0
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17.0,4.5,1305696000.0


In [4]:
# Unique films
df["title"].nunique()

9737

In [5]:
# How many comments do films have ? 
df["title"].value_counts().head()

Forrest Gump (1994)                 329
Shawshank Redemption, The (1994)    317
Pulp Fiction (1994)                 307
Silence of the Lambs, The (1991)    279
Matrix, The (1999)                  278
Name: title, dtype: int64

In [6]:
# The films which have over 100 comments are selected:

comment_counts = pd.DataFrame(df["title"].value_counts())

rare_movies = comment_counts[comment_counts["title"] <= 100].index

# Extract from rare_movies:
common_movies = df[~df["title"].isin(rare_movies)]

In [7]:
# user movie df'inin oluşturulması.
user_movie_df = common_movies.pivot_table(index=["userId"], columns=["title"], values="rating")

user_movie_df.head(2)

title,2001: A Space Odyssey (1968),Ace Ventura: Pet Detective (1994),Aladdin (1992),Alien (1979),Aliens (1986),"Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)",American Beauty (1999),American History X (1998),American Pie (1999),Apocalypse Now (1979),...,True Lies (1994),"Truman Show, The (1998)",Twelve Monkeys (a.k.a. 12 Monkeys) (1995),Twister (1996),Up (2009),"Usual Suspects, The (1995)",WALL·E (2008),Waterworld (1995),Willy Wonka & the Chocolate Factory (1971),X-Men (2000)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1.0,,,,4.0,,,5.0,5.0,,4.0,...,,,,3.0,,5.0,,,5.0,5.0
2.0,,,,,,,,,,,...,,,,,,,,,,


In [8]:
movie_name = "Matrix, The (1999)"
movie_name = user_movie_df[movie_name] # Her user Matrix filmini oyları
movie_name.head()

userId
1.0    5.0
2.0    NaN
3.0    NaN
4.0    1.0
5.0    NaN
Name: Matrix, The (1999), dtype: float64

In [9]:
# Corralations scores matrix with others:
user_movie_df.corrwith(movie_name).sort_values(ascending=False).head(10)

title
Matrix, The (1999)       1.000000
Die Hard (1988)          0.544466
Inception (2010)         0.514767
Braveheart (1995)        0.496045
Aliens (1986)            0.470865
Lion King, The (1994)    0.444932
Monsters, Inc. (2001)    0.441205
Batman Begins (2005)     0.440338
Jurassic Park (1993)     0.427936
Fight Club (1999)        0.417196
dtype: float64

In [10]:
movie_name = "Inception (2010)"
movie_name = user_movie_df[movie_name]
user_movie_df.corrwith(movie_name).sort_values(ascending=False).head(10)

title
Inception (2010)                                             1.000000
Interview with the Vampire: The Vampire Chronicles (1994)    0.789059
Dances with Wolves (1990)                                    0.734847
Firm, The (1993)                                             0.722633
Saving Private Ryan (1998)                                   0.700970
Full Metal Jacket (1987)                                     0.607265
Ocean's Eleven (2001)                                        0.581871
Heat (1995)                                                  0.565437
Braveheart (1995)                                            0.555414
Outbreak (1995)                                              0.549277
dtype: float64

In [11]:
# Let's make a function:

def create_user_movie_df():
    import pandas as pd
    movie = pd.read_csv('/kaggle/input/movie-lens-dataset/movies.csv')
    rating = pd.read_csv('/kaggle/input/movie-lens-dataset/ratings.csv')
    df = movie.merge(rating, how="left", on="movieId")
    comment_counts = pd.DataFrame(df["title"].value_counts())
    rare_movies = comment_counts[comment_counts["title"] <= 100].index
    common_movies = df[~df["title"].isin(rare_movies)]
    user_movie_df = common_movies.pivot_table(index=["userId"], columns=["title"], values="rating")
    return user_movie_df

user_movie_df = create_user_movie_df()

In [12]:
def item_based_recommender(movie_name, user_movie_df):
    movie_name = user_movie_df[movie_name]
    return user_movie_df.corrwith(movie_name).sort_values(ascending=False).head(10)


In [13]:
item_based_recommender("Matrix, The (1999)", user_movie_df)

title
Matrix, The (1999)       1.000000
Die Hard (1988)          0.544466
Inception (2010)         0.514767
Braveheart (1995)        0.496045
Aliens (1986)            0.470865
Lion King, The (1994)    0.444932
Monsters, Inc. (2001)    0.441205
Batman Begins (2005)     0.440338
Jurassic Park (1993)     0.427936
Fight Club (1999)        0.417196
dtype: float64