In [1]:
import numpy as np
import pandas as pd



In [2]:
rating_df = pd.read_csv('../../data/anime/rating.csv')

In [3]:
rating_df = rating_df[(rating_df['rating'] > 0) & (rating_df['user_id'] != 42653)]

In [4]:
rating_df

Unnamed: 0,user_id,anime_id,rating
47,1,8074,10
81,1,11617,10
83,1,11757,10
101,1,15451,10
153,2,11771,10
...,...,...,...
7813732,73515,16512,7
7813733,73515,17187,9
7813734,73515,22145,10
7813735,73516,790,9


In [8]:
anime_df = pd.read_csv('../../data/anime/anime.csv')

# Pivot ratings

In [5]:
r_df = rating_df.reset_index().pivot(index='user_id', columns='anime_id', values='rating').fillna(0) # slow

In [9]:
r_df

anime_id,1,5,6,7,8,15,16,17,18,19,...,34238,34239,34240,34252,34283,34324,34325,34349,34367,34475
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,8.0,0.0,0.0,6.0,0.0,6.0,6.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
73513,9.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
73514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
73515,10.0,10.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
r = r_df.to_numpy()

## Model-Based CF (svd)

In [11]:
rating_mean = np.mean(r, axis=1)
r_demeaned = r - rating_mean.reshape(-1, 1)

In [12]:
from scipy.sparse.linalg import svds

In [13]:
U, sigma, Vt = svds(r_demeaned, k=20) # slow

In [14]:
sigma = np.diag(sigma)

In [15]:
preds = np.dot(np.dot(U, sigma), Vt) + rating_mean.reshape(-1, 1) # slow

In [16]:
pred_df = pd.DataFrame(preds, columns=r_df.columns)

In [17]:
pred_df

anime_id,1,5,6,7,8,15,16,17,18,19,...,34238,34239,34240,34252,34283,34324,34325,34349,34367,34475
0,0.089580,0.047646,0.099810,0.012229,-0.003883,-0.022197,-0.210999,-0.008665,0.038016,0.006859,...,-0.005819,0.003069,-0.062132,0.003280,0.000879,0.001889,0.000351,0.002417,0.002380,0.002822
1,-0.022364,-0.024853,-0.025923,-0.040448,0.007292,0.105692,0.028727,0.031682,0.067349,0.060930,...,-0.000887,-0.002281,-0.019833,-0.002644,-0.001730,-0.002390,-0.002397,-0.002203,-0.002365,-0.002433
2,-1.060832,-1.041978,-0.637602,-0.231377,0.134907,0.740965,-0.274167,0.199829,0.184385,0.322335,...,0.006195,-0.005582,0.357138,-0.008975,-0.009779,-0.004203,0.008490,-0.006798,-0.007129,-0.005055
3,3.438751,1.610084,3.335405,0.308518,0.302808,2.516022,1.853274,0.702383,1.210717,2.514673,...,0.021639,-0.023569,0.297816,-0.025057,-0.016289,-0.011872,0.003365,-0.019720,-0.018891,-0.021881
4,2.973376,1.706210,2.037864,0.396708,0.040940,0.081795,-0.334462,0.108283,0.288207,0.490681,...,0.045626,-0.043061,0.875342,-0.035410,-0.013402,-0.022022,-0.004508,-0.025891,-0.027263,-0.041955
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69594,0.514331,0.309438,0.413956,0.293898,0.000143,-0.121581,0.385831,-0.019764,-0.041938,0.213500,...,-0.008750,-0.007958,0.022841,-0.007633,-0.010055,-0.008305,-0.008639,-0.007926,-0.008197,-0.008525
69595,2.768507,1.628949,1.786033,0.457885,0.022244,0.034066,0.707464,-0.014031,0.173089,0.339161,...,-0.012198,-0.010695,0.132717,-0.011140,-0.016425,-0.011858,-0.011570,-0.011363,-0.012107,-0.011035
69596,0.068889,0.072560,0.018157,0.012274,-0.001247,-0.013499,0.021001,-0.006580,-0.013538,-0.002999,...,-0.000350,-0.001233,0.014203,-0.001242,-0.001542,-0.001090,-0.000927,-0.000989,-0.001139,-0.001111
69597,7.125935,3.773151,5.915998,2.027060,0.115315,0.317967,0.413548,0.014931,0.508228,2.821812,...,-0.034482,-0.015058,-0.347330,-0.014578,-0.003261,-0.007963,-0.033608,-0.008566,-0.010940,-0.009641


In [18]:
def recommend_movie(pred_df, user_id, movie_df, origin_rating_df, num=5):
    user_index = user_id - 1
    sorted_user_preds = pred_df.iloc[user_index].sort_values(ascending=False)
    
    # existing rating dataset, for comparison
    existing_user_ratings = origin_rating_df[origin_rating_df['user_id'] == user_id]
    existing_ratings_df = existing_user_ratings.merge(movie_df, how='left', left_on='anime_id', right_on='anime_id'). \
                          sort_values(['rating'], ascending=False)
    
    print(f"User {user_id} has already rated {existing_ratings_df.shape[0]} movies")
    
    recommends = (movie_df[~movie_df['anime_id'].isin(existing_user_ratings['anime_id'])]). \
                 merge(sorted_user_preds.reset_index(), how='left', left_on='anime_id', right_on='anime_id'). \
                 rename(columns={user_index: 'Predictions'}). \
                 sort_values('Predictions', ascending=False). \
                 iloc[:num]
        
    return existing_ratings_df, recommends

In [34]:
anime_df = anime_df.rename(columns={'rating': 'old_rating'})
already_rated, user_preds = recommend_movie(pred_df, 426, anime_df, rating_df, num=10)

User 426 has already rated 10 movies


In [35]:
already_rated

Unnamed: 0,user_id,anime_id,rating,name,genre,type,episodes,old_rating,members
2,426,6211,10,Tokyo Magnitude 8.0,Drama,TV,11,8.19,121349
8,426,21105,10,Love Stage!!,"Comedy, Romance, Shounen Ai",TV,10,7.69,83397
9,426,23441,10,Love Stage!! OVA,"Comedy, Romance, Shounen Ai",OVA,1,7.74,23631
3,426,6702,9,Fairy Tail,"Action, Adventure, Comedy, Fantasy, Magic, Sho...",TV,175,8.22,584590
4,426,9926,9,Sekaiichi Hatsukoi,"Comedy, Drama, Romance, Shounen Ai",TV,12,8.15,94820
6,426,11123,9,Sekaiichi Hatsukoi 2,"Comedy, Drama, Romance, Shounen Ai",TV,12,8.31,69253
7,426,13125,9,Shinsekai yori,"Drama, Horror, Mystery, Sci-Fi, Supernatural",TV,25,8.53,288376
0,426,20,8,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220,7.81,683297
1,426,442,8,Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...,"Adventure, Comedy, Drama, Historical, Shounen,...",Movie,1,7.17,120571
5,426,9982,8,Fairy Tail OVA,"Comedy, Ecchi, Fantasy, Magic, Shounen",OVA,5,7.83,83421


In [36]:
user_preds

Unnamed: 0,anime_id,name,genre,type,episodes,old_rating,members,Predictions
15,199,Sen to Chihiro no Kamikakushi,"Adventure, Drama, Supernatural",Movie,1,8.93,466254,8.184818
35,431,Howl no Ugoku Shiro,"Adventure, Drama, Fantasy, Romance",Movie,1,8.74,333186,6.982521
24,164,Mononoke Hime,"Action, Adventure, Fantasy",Movie,1,8.81,339556,6.113251
114,523,Tonari no Totoro,"Adventure, Comedy, Supernatural",Movie,1,8.48,271484,5.836887
40,1535,Death Note,"Mystery, Police, Psychological, Supernatural, ...",TV,37,8.71,1013917,4.850615
237,512,Majo no Takkyuubin,"Adventure, Comedy, Drama, Fantasy, Magic, Romance",Movie,1,8.27,152331,4.01966
169,513,Tenkuu no Shiro Laputa,"Adventure, Fantasy, Romance, Sci-Fi",Movie,1,8.38,151061,3.914698
116,572,Kaze no Tani no Nausicaä,"Adventure, Fantasy",Movie,1,8.47,143273,3.467404
86,16498,Shingeki no Kyojin,"Action, Drama, Fantasy, Shounen, Super Power",TV,25,8.54,896229,3.452228
755,226,Elfen Lied,"Action, Drama, Horror, Psychological, Romance,...",TV,13,7.85,623511,3.393225
