## A Recommendation System for Anime movies is built using Collaborative Filtering.

In [1]:
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity
# The python file collaborative_filtering contains the algorithm
from collaborative_filtering import *

### Anime and rating dataset has been imported using pandas

In [2]:
# There are over 12000 anime movies
anime.shape

(12294, 7)

In [3]:
# There are over 7 million ratings
rating.shape

(7813737, 3)

In [4]:
# Anime datset contains all the anime movies with its features such as genre,type,no of episodes and average rating.
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [5]:
rating.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,
1,1,24,
2,1,79,
3,1,226,
4,1,241,


### The user and anime datasets have been merged

In [6]:
combined.head()

Unnamed: 0,user_id,name,user_rating
0,1,Naruto,
1,3,Naruto,8.0
2,5,Naruto,6.0
3,6,Naruto,
4,10,Naruto,


In [7]:
# Only the user ratings are being considered, not the item features because we are only building a collaborative
# filtering algorithm. We only run the algorithm for the first 20000 user ids for computational reasons
combined=combined[['user_id', 'name', 'user_rating']]

combined_small= combined[combined.user_id <= 20000]
combined_small.head()

Unnamed: 0,user_id,name,user_rating
0,1,Naruto,
1,3,Naruto,8.0
2,5,Naruto,6.0
3,6,Naruto,
4,10,Naruto,


In [8]:
combined_small.shape

(2065588, 3)

### A Pivot table is created to convert the dataframe into a format that is proper for the algorithm 

In [9]:
collab = combined_small.pivot_table(index=['user_id'], columns=['name'], values='user_rating')

In [10]:
# We are subtracting mean from the rating of the user to normalize. Some users me be more generous in their rating,
# and some may be less generous in their rating. Normalization can deal with it.
collab_normalize = collab.apply(lambda x: (x-np.mean(x))/(np.max(x)-np.min(x)), axis=1)

# The users who haven't rated any anime movies are removed.
collab_normalize.fillna(0, inplace=True)

collab_normalize = collab_normalize.T
collab_normalize = collab_normalize.loc[:, (collab_normalize != 0).any(axis=0)]

In [11]:
# The data is present in sparse format i.e. it contains a lot of zeros and is converted to a better format. 

collab_sparse = sp.sparse.csr_matrix(collab_normalize.values)

In [12]:
collab_sparse

<9013x17353 sparse matrix of type '<class 'numpy.float64'>'
	with 1660773 stored elements in Compressed Sparse Row format>

### Cosine Similarity is computed to find correlation 

In [13]:
# Item similarity finds correlation between anime movies.
item_similarity = cosine_similarity(collab_sparse)
# Converting the item similarity into pandas dataframe
collab_item_sim = pd.DataFrame(item_similarity, index = collab_normalize.index, columns = collab_normalize.index)

In [14]:
# User similarity finds correlation between users.
collab_user_sim = pd.DataFrame(user_similarity, index = collab_normalize.columns, columns = collab_normalize.columns)
# Converting the user similarity into pandas dataframe
user_similarity = cosine_similarity(collab_sparse.T)

### Finding movies similar to the given movie 

In [15]:
similar_movies('Fate/Zero')

Similar anime to Fate/Zero are 

Anime 1 is Fate/Zero 2nd Season
Anime 2 is Fate/stay night: Unlimited Blade Works
Anime 3 is Fate/stay night: Unlimited Blade Works 2nd Season
Anime 4 is Fate/stay night: Unlimited Blade Works - Prologue
Anime 5 is Steins;Gate
Anime 6 is Fullmetal Alchemist: Brotherhood
Anime 7 is Psycho-Pass
Anime 8 is Kara no Kyoukai 5: Mujun Rasen
Anime 9 is Code Geass: Hangyaku no Lelouch
Anime 10 is Code Geass: Hangyaku no Lelouch R2
Anime 11 is Kara no Kyoukai 7: Satsujin Kousatsu (Kou)
Anime 12 is Monogatari Series: Second Season
Anime 13 is Suzumiya Haruhi no Shoushitsu
Anime 14 is Tengen Toppa Gurren Lagann
Anime 15 is Bakemonogatari
Anime 16 is Kara no Kyoukai 3: Tsuukaku Zanryuu
Anime 17 is Kiseijuu: Sei no Kakuritsu
Anime 18 is Mahou Shoujo Madoka★Magica
Anime 19 is One Punch Man
Anime 20 is Hunter x Hunter (2011)


###  Finding users that have similar tastes

In [16]:
similar_users(10)

Users with similar tastes are 

Other users are 504, How Similar = 0.87
Other users are 11944, How Similar = 0.80
Other users are 18837, How Similar = 0.75
Other users are 4149, How Similar = 0.68
Other users are 726, How Similar = 0.68
Other users are 17508, How Similar = 0.67
Other users are 16952, How Similar = 0.67
Other users are 15741, How Similar = 0.67
Other users are 8949, How Similar = 0.66
Other users are 9794, How Similar = 0.64


### Finding the rating for the anime

In [17]:
user_anime_rating('Code Geass: Hangyaku no Lelouch', 11)

8.834884278162885