In [1]:
import numpy as np
import pandas as pd

from surprise import Dataset
from surprise import Reader
from surprise import SVD
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise import KNNBasic,KNNWithMeans,KNNWithZScore,KNNBaseline

In [2]:
animes = pd.read_csv('anime.csv')
ratings = pd.read_csv('rating.csv')

In [3]:
animes = animes[animes['type'] == 'TV']
animes

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
5,32935,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351
...,...,...,...,...,...,...,...
11104,34522,"Wake Up, Girls! Shin Shou","Drama, Music",TV,Unknown,,381
11106,34467,Yami Shibai 4th Season,"Dementia, Horror, Supernatural",TV,Unknown,,1838
11107,32615,Youjo Senki,"Magic, Military",TV,Unknown,,6652
11110,34284,Yuuki Yuuna wa Yuusha de Aru: Washio Sumi no Shou,"Drama, Fantasy, Magic, Slice of Life",TV,6,,2593


In [4]:
ratings.rating.replace({-1: 0}, inplace = True)
ratings.dropna(axis = 0, inplace=True)

In [5]:
ratings = ratings.groupby('anime_id').filter(lambda x : (x['anime_id'].count()>=50).any())

In [6]:
ratings

Unnamed: 0,user_id,anime_id,rating
0,1,20,0
1,1,24,0
2,1,79,0
3,1,226,0
4,1,241,0
...,...,...,...
7813732,73515,16512,7
7813733,73515,17187,9
7813734,73515,22145,10
7813735,73516,790,9


In [7]:
def get_title_from_index(movie_id):
    title = animes[animes['name'] == movie_id]
    title = list(title['name'])
    return title[0]

def get_index_from_title(title):
    anime_id = animes[animes['name'] == title]
    anime_id = list(anime_id['anime_id'])
    return anime_id[0]

In [8]:
ratings_dict =  {'Fullmetal Alchemist: Brotherhood':10,
            'Dragon Ball Z':10,
            'Code Geass: Hangyaku no Lelouch R2':8,
            'Steins;Gate':6,
            'Tengen Toppa Gurren Lagann':8,
             'Hunter x Hunter (2011)':10,
             'Boku no Hero Academia':8,
             'Berserk':9,
             'Death Note':9,
             'Monster':8
                }

user_id = 99999

ids = []

for movie_titles in ratings_dict.keys():
    x = get_index_from_title(movie_titles)
    ids.append(x)
    
id_list = [user_id] * len(ratings_dict)

user_ratings = list(zip(id_list,ids, ratings_dict.values()))

user_ratings = pd.DataFrame(user_ratings, columns=ratings.columns)
user_ratings

Unnamed: 0,user_id,anime_id,rating
0,99999,5114,10
1,99999,813,10
2,99999,2904,8
3,99999,9253,6
4,99999,2001,8
5,99999,11061,10
6,99999,31964,8
7,99999,33,9
8,99999,1535,9
9,99999,19,8


In [9]:
ratings = pd.concat([ratings, user_ratings])
ratings

Unnamed: 0,user_id,anime_id,rating
0,1,20,0
1,1,24,0
2,1,79,0
3,1,226,0
4,1,241,0
...,...,...,...
5,99999,11061,10
6,99999,31964,8
7,99999,33,9
8,99999,1535,9


In [10]:
reader = Reader(rating_scale=(0, 10))
data = Dataset.load_from_df(ratings[['user_id', 'anime_id', 'rating']], reader)

In [11]:
svd = SVD(verbose=True,n_epochs=10)

In [12]:
np.random.seed(1)

#trainset, testset = train_test_split(data, test_size=.20)

full_trainset = data.build_full_trainset()

svd.fit(full_trainset)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x257e2e4c3c8>

In [13]:
#predictions = svd.test(testset)
#accuracy.rmse(predictions)

In [14]:
anime_ids = list(animes.anime_id)
len(anime_ids)

3787

In [15]:
animes_not_rated = np.setdiff1d(anime_ids,ids)
print(len(animes_not_rated))
animes_not_rated

3777


array([    1,     6,     7, ..., 34503, 34522, 34525])

In [16]:
rated_df = user_ratings.copy()

user_predictions = []

for i in ids:
    rating = svd.predict(uid=user_id, iid=i)
    user_predictions.append(rating[3])    
    
rated_df['Predicted Rating'] = user_predictions
rated_df.index = rated_df['user_id']
rated_df = pd.merge(animes,rated_df,on='anime_id')
rated_df.drop(['user_id'], axis = 1, inplace = True)

rated_df

Unnamed: 0,anime_id,name,genre,type,episodes,rating_x,members,rating_y,Predicted Rating
0,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,10,9.056166
1,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572,6,7.625526
2,11061,Hunter x Hunter (2011),"Action, Adventure, Shounen, Super Power",TV,148,9.13,425855,10,9.14327
3,2904,Code Geass: Hangyaku no Lelouch R2,"Action, Drama, Mecha, Military, Sci-Fi, Super ...",TV,25,8.98,572888,8,8.018362
4,2001,Tengen Toppa Gurren Lagann,"Action, Adventure, Comedy, Mecha, Sci-Fi",TV,27,8.78,562962,8,7.891715
5,19,Monster,"Drama, Horror, Mystery, Police, Psychological,...",TV,74,8.72,247562,8,8.109274
6,1535,Death Note,"Mystery, Police, Psychological, Supernatural, ...",TV,37,8.71,1013917,9,9.111089
7,33,Berserk,"Action, Adventure, Demons, Drama, Fantasy, Hor...",TV,25,8.4,226430,9,8.8374
8,31964,Boku no Hero Academia,"Action, Comedy, School, Shounen, Super Power",TV,13,8.36,282002,8,7.6183
9,813,Dragon Ball Z,"Action, Adventure, Comedy, Fantasy, Martial Ar...",TV,291,8.32,375662,10,9.429856


In [17]:
unrated = []

for i in animes_not_rated:
    rating = svd.predict(uid=user_id, iid=i)
    unrated.append(rating[3])
    
new_ratings = list(zip(animes_not_rated, unrated))

In [18]:
new_ratings

[(1, 8.626831321625751),
 (6, 8.70251074083588),
 (7, 6.8706289821495385),
 (8, 6.909181522052074),
 (15, 7.631302073729442),
 (16, 7.898583632445978),
 (17, 7.77578480996305),
 (18, 7.556330929671054),
 (20, 8.216019320640724),
 (21, 6.535712734646543),
 (22, 7.619746252557312),
 (23, 6.535712734646543),
 (24, 8.074851861619184),
 (25, 7.1340192491294045),
 (26, 6.8939885321288505),
 (27, 7.17718607868099),
 (28, 7.813830830147721),
 (29, 7.348791322481491),
 (30, 8.354357637680058),
 (45, 8.433121941134866),
 (48, 7.795250374133962),
 (50, 7.840291067805743),
 (52, 8.059019229073778),
 (53, 7.192308274092364),
 (55, 6.324735705732598),
 (56, 5.818495885930215),
 (57, 8.328769347847206),
 (58, 6.6753316106299625),
 (59, 7.181914334765454),
 (60, 7.715988003477227),
 (61, 6.867734498729575),
 (62, 7.1506445615540235),
 (63, 6.349784138330826),
 (64, 6.735894724363054),
 (65, 6.841629669743783),
 (66, 8.015704077460226),
 (67, 7.992035902700133),
 (68, 7.160190073227775),
 (69, 6.497106

In [19]:
new_ratings = pd.DataFrame(new_ratings, columns=['anime_id', 'Predicted Rating'])
new_ratings

Unnamed: 0,anime_id,Predicted Rating
0,1,8.626831
1,6,8.702511
2,7,6.870629
3,8,6.909182
4,15,7.631302
...,...,...
3772,34501,6.535713
3773,34502,6.535713
3774,34503,6.535713
3775,34522,6.535713


In [20]:
new_ratings = pd.merge(animes,new_ratings,on='anime_id')
new_ratings

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,Predicted Rating
0,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262,8.356970
1,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266,7.850056
2,32935,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351,8.767205
3,15417,Gintama&#039;: Enchousen,"Action, Comedy, Historical, Parody, Samurai, S...",TV,13,9.11,81109,8.451647
4,4181,Clannad: After Story,"Drama, Fantasy, Romance, Slice of Life, Supern...",TV,24,9.06,456749,8.677988
...,...,...,...,...,...,...,...,...
3772,34522,"Wake Up, Girls! Shin Shou","Drama, Music",TV,Unknown,,381,6.535713
3773,34467,Yami Shibai 4th Season,"Dementia, Horror, Supernatural",TV,Unknown,,1838,6.535713
3774,32615,Youjo Senki,"Magic, Military",TV,Unknown,,6652,6.535713
3775,34284,Yuuki Yuuna wa Yuusha de Aru: Washio Sumi no Shou,"Drama, Fantasy, Magic, Slice of Life",TV,6,,2593,6.535713


In [21]:
new_ratings.sort_values(by='Predicted Rating', ascending=False, inplace=True)
new_ratings.head(20)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,Predicted Rating
219,7791,K-On!!,"Comedy, Music, School, Slice of Life",TV,26,8.14,246276,9.106341
203,223,Dragon Ball,"Adventure, Comedy, Fantasy, Martial Arts, Shou...",TV,153,8.16,316102,8.981772
54,5941,Cross Game,"Comedy, Drama, Romance, School, Sports",TV,50,8.53,53108,8.981661
27,31043,Boku dake ga Inai Machi,"Mystery, Psychological, Seinen, Supernatural",TV,12,8.65,402381,8.859494
42,5028,Major S5,"Comedy, Drama, Romance, Sports",TV,25,8.58,28653,8.849693
47,170,Slam Dunk,"Comedy, Drama, School, Shounen, Sports",TV,101,8.56,82570,8.843435
5,918,Gintama,"Action, Comedy, Historical, Parody, Samurai, S...",TV,201,9.04,336376,8.794026
74,12531,Sakamichi no Apollon,"Drama, Josei, Music, Romance, School",TV,12,8.48,146592,8.777541
2,32935,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351,8.767205
399,5680,K-On!,"Comedy, Music, School, Slice of Life",TV,13,7.87,386048,8.760558
