In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
import time

In [2]:
animes = pd.read_csv('anime.csv')
ratings = pd.read_csv('rating.csv')

In [3]:
animes = animes[animes['type'] == 'TV']
animes

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
5,32935,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351
...,...,...,...,...,...,...,...
11104,34522,"Wake Up, Girls! Shin Shou","Drama, Music",TV,Unknown,,381
11106,34467,Yami Shibai 4th Season,"Dementia, Horror, Supernatural",TV,Unknown,,1838
11107,32615,Youjo Senki,"Magic, Military",TV,Unknown,,6652
11110,34284,Yuuki Yuuna wa Yuusha de Aru: Washio Sumi no Shou,"Drama, Fantasy, Magic, Slice of Life",TV,6,,2593


In [4]:
ratings.rating.replace({-1: 0}, inplace = True)
ratings.dropna(axis = 0, inplace=True)

In [5]:
user_counts = ratings.groupby('user_id').count().sort_values(by='rating', ascending=False)
user_counts

Unnamed: 0_level_0,anime_id,rating
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1
48766,10227,10227
42635,3747,3747
53698,2905,2905
57620,2702,2702
59643,2633,2633
...,...,...
11323,1,1
48775,1,1
11328,1,1
48743,1,1


In [6]:
top_users = list(user_counts.head(10000).index)

len(top_users)

10000

In [7]:
ratings = ratings[ratings['user_id'].isin(top_users)]
ratings

Unnamed: 0,user_id,anime_id,rating
302,5,6,8
303,5,15,6
304,5,17,6
305,5,18,6
306,5,20,6
...,...,...,...
7813330,73507,8231,5
7813331,73507,8348,5
7813332,73507,8440,7
7813333,73507,8769,8


In [8]:
def get_title_from_index(anime_id):
    title = animes[animes['name'] == movie_id]
    title = list(title['name'])
    return title[0]

def get_index_from_title(title):
    anime_id = animes[animes['name'] == title]
    anime_id = list(anime_id['anime_id'])
    return anime_id[0]

In [9]:
ratings_dict =  {'Fullmetal Alchemist: Brotherhood':10,
            'Dragon Ball Z':10,
            'Code Geass: Hangyaku no Lelouch R2':8,
            'Steins;Gate':6,
            'Tengen Toppa Gurren Lagann':8,
             'Hunter x Hunter (2011)':10,
             'Boku no Hero Academia':8,
             'Berserk':9,
             'Death Note':9,
             'Monster':8
                }

user_id = 99999

ids = []

for movie_titles in ratings_dict.keys():
    x = get_index_from_title(movie_titles)
    ids.append(x)
    
id_list = [user_id] * len(ratings_dict)

user_ratings = list(zip(id_list,ids, ratings_dict.values()))

user_ratings = pd.DataFrame(user_ratings, columns=ratings.columns)
user_ratings

Unnamed: 0,user_id,anime_id,rating
0,99999,5114,10
1,99999,813,10
2,99999,2904,8
3,99999,9253,6
4,99999,2001,8
5,99999,11061,10
6,99999,31964,8
7,99999,33,9
8,99999,1535,9
9,99999,19,8


In [10]:
ratings = pd.concat([ratings, user_ratings])
ratings

Unnamed: 0,user_id,anime_id,rating
302,5,6,8
303,5,15,6
304,5,17,6
305,5,18,6
306,5,20,6
...,...,...,...
5,99999,11061,10
6,99999,31964,8
7,99999,33,9
8,99999,1535,9


In [11]:
combined = pd.merge(animes, ratings, on='anime_id')
combined

Unnamed: 0,anime_id,name,genre,type,episodes,rating_x,members,user_id,rating_y
0,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,17,10
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,38,10
2,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,43,4
3,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,46,9
4,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,129,9
...,...,...,...,...,...,...,...,...,...
2521262,7808,Zukkoke Knight: Don De La Mancha,"Adventure, Comedy, Historical, Romance",TV,23,6.47,172,1822,6
2521263,7808,Zukkoke Knight: Don De La Mancha,"Adventure, Comedy, Historical, Romance",TV,23,6.47,172,48766,0
2521264,7808,Zukkoke Knight: Don De La Mancha,"Adventure, Comedy, Historical, Romance",TV,23,6.47,172,49127,6
2521265,7808,Zukkoke Knight: Don De La Mancha,"Adventure, Comedy, Historical, Romance",TV,23,6.47,172,51693,7


In [12]:
anime_mat = combined.pivot_table(index='user_id',columns='name',values='rating_y').fillna(0)
anime_mat

name,.hack//Roots,.hack//Sign,.hack//Tasogare no Udewa Densetsu,0-sen Hayato,009-1,07-Ghost,11eyes,12-sai.: Chicchana Mune no Tokimeki,12-sai.: Chicchana Mune no Tokimeki 2nd Season,2020 Nyeon Ujuui Wonder Kiddy,...,Zukkoke Knight: Don De La Mancha,Zumomo to Nupepe,ef: A Tale of Melodies.,ef: A Tale of Memories.,gdgd Fairies,gdgd Fairies 2,iDOLM@STER Xenoglossia,s.CRY.ed,xxxHOLiC,xxxHOLiC Kei
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,8.0,6.0,6.0,0.0,0.0,0.0,0.0
38,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
43,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,7.0,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73499,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,10.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0
73502,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,9.0,10.0,0.0,0.0,0.0,0.0,10.0,10.0
73503,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,8.0
73507,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,9.0


In [13]:
anime_mat_sparse = csr_matrix(anime_mat.values)
anime_mat_sparse

<9996x3399 sparse matrix of type '<class 'numpy.float64'>'
	with 2058791 stored elements in Compressed Sparse Row format>

In [14]:
cosine_sim = cosine_similarity(anime_mat_sparse)

In [15]:
k = 10

In [16]:
def user_based_recomm():
    #combined = pd.merge(animes,ratings,on='movieId')

    #moviemat = combined.pivot_table(index='userId',columns='title',values='rating').fillna(0)
    #moviemat_sparse = csr_matrix(moviemat.values)
    #cosine_sim = cosine_similarity(moviemat_sparse)

    k = 10

    #moviemat 
    recommender_df = pd.DataFrame(cosine_sim, 
                                  columns=anime_mat.index,
                                  index=anime_mat.index)


    ## Item Rating Based Cosine Similarity
    cosine_df = pd.DataFrame(recommender_df[user_id].sort_values(ascending=False))
    cosine_df.reset_index(level=0, inplace=True)
    cosine_df.columns = ['user_id','cosine_sim']
    similar_usr = list(cosine_df['user_id'][1:k+1].values)
    similarities = list(cosine_df['cosine_sim'][1:k+1].values)

    sims_dict = dict(zip(similar_usr, similarities))

    similar_usr_df = anime_mat.T[similar_usr].fillna(0)

    for i, j in sims_dict.items():
        similar_usr_df[i] = similar_usr_df[i] * j

    similar_usr_df['mean rating'] = similar_usr_df[list(sims_dict.keys())].mean(numeric_only=True,axis=1)
    similar_usr_df.sort_values('mean rating', ascending=False,inplace = True)

    watched = list(ratings_dict.keys())

    similar_usr_df = similar_usr_df[~similar_usr_df.index.isin(watched)]
    
    titles = similar_usr_df.index
    mean_rating = list(similar_usr_df['mean rating'])
    
    recos = pd.DataFrame(columns=['name','mean rating'])
    recos['name'] = titles
    recos['mean rating'] = mean_rating
    
    recos = pd.merge(animes,recos,on='name')
    
    recos.sort_values(by='mean rating', ascending = False, inplace=True)
    recos.reset_index(drop=True, inplace=True)

    return recos.head(20)

In [17]:
df = user_based_recomm()

df

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,mean rating
0,6,Trigun,"Action, Comedy, Sci-Fi",TV,26,8.32,283069,1.221625
1,1575,Code Geass: Hangyaku no Lelouch,"Action, Mecha, Military, School, Sci-Fi, Super...",TV,25,8.83,715151,1.059099
2,121,Fullmetal Alchemist,"Action, Adventure, Comedy, Drama, Fantasy, Mag...",TV,51,8.33,600384,0.953255
3,392,Yuu☆Yuu☆Hakusho,"Action, Comedy, Demons, Fantasy, Martial Arts,...",TV,112,8.47,195017,0.879045
4,16498,Shingeki no Kyojin,"Action, Drama, Fantasy, Shounen, Super Power",TV,25,8.54,896229,0.801306
5,136,Hunter x Hunter,"Action, Adventure, Shounen, Super Power",TV,62,8.48,166255,0.769938
6,263,Hajime no Ippo,"Comedy, Drama, Shounen, Sports",TV,75,8.83,157670,0.757421
7,857,Air Gear,"Action, Comedy, Ecchi, Shounen, Sports",TV,25,7.69,194611,0.731873
8,245,Great Teacher Onizuka,"Comedy, Drama, School, Shounen, Slice of Life",TV,43,8.77,268487,0.716518
9,226,Elfen Lied,"Action, Drama, Horror, Psychological, Romance,...",TV,13,7.85,623511,0.695365
