# This is an Item Based Collaborative Filtering Recommender System that uses two datasets to predict similar anime based on the similarity of the target anime's user rating with the other. These predictions will be further used to find the most popular anime by observing their number of ratings (votes).

### **Instructions**
1. Change the intended values in the initialization before starting the kernel.
2. Follow Cell > Run All to execute the recommender system.
3. The result will be shown at the bottom of this notebook after the execution is completed.
4. Repeat the program execution by following Cell > Run All or Kernel > Restart & Run All.

# **Initialization & Testing**

In [1]:
import time
import numpy as np
import pandas as pd

In [2]:
start_time = time.time()               # Start Time of Program Execution
anime_dataset_name = "anime.csv"       # Anime Dataset
rating_dataset_name = "rating.csv"     # User Dataset
target_anime_id = 1                    # anime_id of the Target
n = 10                                 # Amount of Results Needed
min = 100                              # Minimum Votes Required

In [3]:
anime_dataset = pd.read_csv(anime_dataset_name)
rating_dataset = pd.read_csv(rating_dataset_name)

In [4]:
anime_dataset.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [5]:
rating_dataset.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


In [6]:
sorted_anime = anime_dataset.sort_values(by=['anime_id'], ignore_index = True)
sorted_anime.head(n = 10)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
1,5,Cowboy Bebop: Tengoku no Tobira,"Action, Drama, Mystery, Sci-Fi, Space",Movie,1,8.4,137636
2,6,Trigun,"Action, Comedy, Sci-Fi",TV,26,8.32,283069
3,7,Witch Hunter Robin,"Action, Drama, Magic, Mystery, Police, Superna...",TV,26,7.36,64905
4,8,Beet the Vandel Buster,"Adventure, Fantasy, Shounen, Supernatural",TV,52,7.06,9848
5,15,Eyeshield 21,"Action, Comedy, Shounen, Sports",TV,145,8.08,83648
6,16,Hachimitsu to Clover,"Comedy, Drama, Josei, Romance",TV,24,8.18,130646
7,17,Hungry Heart: Wild Striker,"Comedy, Shounen, Slice of Life, Sports",TV,52,7.74,13469
8,18,Initial D Fourth Stage,"Action, Cars, Drama, Seinen, Sports",TV,24,8.24,41584
9,19,Monster,"Drama, Horror, Mystery, Police, Psychological,...",TV,74,8.72,247562


In [7]:
sorted_rating = rating_dataset.sort_values(by=['user_id', 'anime_id'], ignore_index = True)
sorted_rating.head(n = 10)

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1
5,1,355,-1
6,1,356,-1
7,1,442,-1
8,1,487,-1
9,1,846,-1


# **Correlation of Items based on Users**

## **Steps**
1. Create merged dataset based on the anime_id.
2. Create rating_anime_matrix.
3. Create series using target anime which will be used to test the correlation with other animes.
4. Sort the series to get the anime with highest correlation against target anime.
5. Count the number of rating of the animes to get the most popular anime.

## STEP 1: Create merged dataset based on the anime_id.

In [8]:
data = pd.merge(sorted_rating, sorted_anime, on = 'anime_id', sort = True)
data

Unnamed: 0,user_id,anime_id,rating_x,name,genre,type,episodes,rating_y,members
0,13,1,-1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
1,19,1,10,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
2,21,1,9,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
3,23,1,9,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
4,32,1,10,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
...,...,...,...,...,...,...,...,...,...
7813722,53492,34475,5,Monster Strike: Rain of Memories,"Action, Fantasy, Game",ONA,1,6.58,313
7813723,70547,34475,9,Monster Strike: Rain of Memories,"Action, Fantasy, Game",ONA,1,6.58,313
7813724,70768,34475,-1,Monster Strike: Rain of Memories,"Action, Fantasy, Game",ONA,1,6.58,313
7813725,48766,34476,-1,Platonic Chain: Ansatsu Jikkouchuu,"Sci-Fi, Slice of Life",Special,1,1.67,51


## STEP 2: Create rating_anime_matrix.

In [9]:
rating_anime_matrix = data.pivot_table(index = 'user_id', columns = ['anime_id'], values = 'rating_x', fill_value = -1)
rating_anime_matrix

anime_id,1,5,6,7,8,15,16,17,18,19,...,34283,34324,34325,34349,34358,34367,34412,34475,34476,34519
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
4,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
5,-1,-1,8,-1,-1,6,-1,6,6,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73512,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
73513,9,8,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
73514,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
73515,10,10,10,-1,-1,-1,-1,-1,-1,9,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1


## STEP 3: Create series using target anime which will be used to test the correlation with other animes.

In [10]:
# If encounter an exception means that the target_anime_id is not exist.
ratings = rating_anime_matrix[target_anime_id]
ratings

user_id
1        -1
2        -1
3        -1
4        -1
5        -1
         ..
73512    -1
73513     9
73514    -1
73515    10
73516    -1
Name: 1, Length: 73515, dtype: int64

In [11]:
similar_animes = rating_anime_matrix.corrwith(ratings)
similar_animes

anime_id
1        1.000000
5        0.562078
6        0.455518
7        0.209106
8        0.050991
           ...   
34367    0.006371
34412         NaN
34475    0.009471
34476         NaN
34519         NaN
Length: 11197, dtype: float64

In [12]:
similar_animes = pd.DataFrame(similar_animes, columns = ['correlation'])
similar_animes

Unnamed: 0_level_0,correlation
anime_id,Unnamed: 1_level_1
1,1.000000
5,0.562078
6,0.455518
7,0.209106
8,0.050991
...,...
34367,0.006371
34412,
34475,0.009471
34476,


## STEP 4: Sort the series to get the anime with highest correlation against target anime.

In [13]:
sorted_similar_animes = similar_animes.sort_values(by = 'correlation', ascending = False)
sorted_similar_animes

Unnamed: 0_level_0,correlation
anime_id,Unnamed: 1_level_1
1,1.000000
5,0.562078
205,0.458533
6,0.455518
227,0.413058
...,...
34173,
34358,
34412,
34476,


## STEP 5: Count the number of rating of the animes to get the most popular anime.

In [14]:
rating_votes = pd.DataFrame(data.groupby('anime_id')['rating_x'].count())
rating_votes

Unnamed: 0_level_0,rating_x
anime_id,Unnamed: 1_level_1
1,15509
5,6927
6,11077
7,2629
8,413
...,...
34367,5
34412,1
34475,4
34476,1


In [15]:
similar_animes_ratings = sorted_similar_animes.join(rating_votes['rating_x']).sort_values(by = 'correlation', ascending = False)
similar_animes_ratings

Unnamed: 0_level_0,correlation,rating_x
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.000000,15509
5,0.562078,6927
205,0.458533,14068
6,0.455518,11077
227,0.413058,13152
...,...,...
34173,,1
34358,,1
34412,,1
34476,,1


In [16]:
similar_popular_animes = sorted_similar_animes.loc[similar_animes_ratings['rating_x'] >= min].dropna()
n_results = similar_popular_animes.loc[similar_popular_animes.index != target_anime_id].head(n)
n_results

Unnamed: 0_level_0,correlation
anime_id,Unnamed: 1_level_1
5,0.562078
205,0.458533
6,0.455518
227,0.413058
30,0.387421
47,0.381951
43,0.375667
467,0.364145
889,0.353996
2001,0.341886


In [17]:
print(f'Target Anime ({target_anime_id}):')
target_anime = sorted_anime.loc[sorted_anime['anime_id'] == target_anime_id]
target_anime

Target Anime (1):


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824


In [18]:
print(f'The {n} Most Similar Popular Anime:')
results_anime = sorted_anime.loc[sorted_anime['anime_id'].isin(n_results.index.tolist())].reset_index(drop = True)
results_anime

The 10 Most Similar Popular Anime:


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,5,Cowboy Bebop: Tengoku no Tobira,"Action, Drama, Mystery, Sci-Fi, Space",Movie,1,8.4,137636
1,6,Trigun,"Action, Comedy, Sci-Fi",TV,26,8.32,283069
2,30,Neon Genesis Evangelion,"Action, Dementia, Drama, Mecha, Psychological,...",TV,26,8.32,461946
3,43,Ghost in the Shell,"Action, Mecha, Police, Psychological, Sci-Fi, ...",Movie,1,8.34,223036
4,47,Akira,"Action, Adventure, Horror, Military, Sci-Fi, S...",Movie,1,8.15,215897
5,205,Samurai Champloo,"Action, Adventure, Comedy, Historical, Samurai...",TV,26,8.5,390076
6,227,FLCL,"Action, Comedy, Dementia, Mecha, Parody, Sci-Fi",OVA,6,8.06,305165
7,467,Ghost in the Shell: Stand Alone Complex,"Action, Mecha, Military, Police, Sci-Fi, Seinen",TV,26,8.47,194491
8,889,Black Lagoon,"Action, Seinen",TV,12,8.17,332562
9,2001,Tengen Toppa Gurren Lagann,"Action, Adventure, Comedy, Mecha, Sci-Fi",TV,27,8.78,562962


In [19]:
print (f"Execution Time: {time.time() - start_time} seconds")

Execution Time: 534.1490397453308 seconds


In [None]:
# End of Program