In [12]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import process

In [13]:
df_anime = pd.read_csv('anime.csv', usecols=['anime_id', 'name'], dtype={'anime_id': 'int32', 'name': 'str'})
df_ratings = pd.read_csv('rating.csv', dtype={'anime_id': 'int32', 'user_id': 'int32', 'rating':'float32'})[['anime_id', 'user_id', 'rating']]


In [3]:
# Sparse Matrix: Creates a matrix with animes as the rows, users as columns, and ratings as the entries
# Creating table that'll be converted to sparse matrix; anime_id for rows and user_id for cols
anime_users = df_ratings.pivot_table(index='anime_id',columns='user_id',values='rating').fillna(0)
# convert dataframe of anime features to scipy sparse matrix
anime_user_matrix = csr_matrix(anime_users.values)

user_id   1      2      3      4      5      6      7      8      9      \
anime_id                                                                  
1           0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
5           0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
6           0.0    0.0    0.0   -1.0    8.0    0.0    0.0    0.0    0.0   
7           0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
8           0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
...         ...    ...    ...    ...    ...    ...    ...    ...    ...   
34367       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
34412       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
34475       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
34476       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
34519       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   

user_id   10     ...  73

In [14]:
# Formulas for getting distance between the vectors: Euclidean Distance, Manhattan Distance, Minkowski Distance
# Cosine Similarity calculates the percentage of similarity
# Algorithms: Brute force, ball tree, and KD tree
NUM_RECOMMENDED = 15
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=NUM_RECOMMENDED)   

In [9]:
# Training the model
model_knn.fit(anime_user_matrix)

NearestNeighbors(algorithm='brute', metric='cosine', n_neighbors=15)

In [21]:
def get_anime_id(name):
    anime_details = process.extractOne(name, df_anime['name'])
    # Ex: ('Naruto: Shippuden', 91, 615) gives the name, accuracy, and id number
    id = anime_details[2]
    return id

In [72]:
def get_anime_names(indices):
    names = [str(df_anime['name'][x]) for x in indices[0][1:]]
    return names


In [74]:
# Recommender(movie_name) => List of Recommended Anime
def recommender(anime_name, data_matrix, model, n_recommendations):
    model.fit(data_matrix)
    id = get_anime_id(anime_name)
    print(f'Recommended Anime For {anime_name}(ID {id}) ->')
    distances, indices = model.kneighbors(data_matrix[id], n_neighbors=n_recommendations)
    recommended = get_anime_names(indices)
    print("\n".join(recommended))
recommender('Naruto', anime_user_matrix, model_knn, 20)

Recommended Anime For Naruto(ID 841) ->
Wasurenagumo
Urayasu Tekkin Kazoku
Kuruneko Season 2
Mobile Suit Gundam: Char&#039;s Counterattack
Precure All Stars Movie DX2: Kibou no Hikari☆Rainbow Jewel wo Mamore!
Lupin III: The Last Job
SKET Dance: SD Character Flash Anime
Genshiken Nidaime OVA
School Rumble
Shinmai Maou no Testament Specials
Maria-sama ga Miteru 4th Specials
Star Driver: Kagayaki no Takuto
One Piece: Glorious Island
Rose of Versailles Movie
Doraemon Movie 13: Nobita to Kumo no Oukoku
Yes! Precure 5 Movie: Kagami no Kuni no Miracle Daibouken!
Doraemon: The Day When I Was Born
The Cockpit
Shaman King
