In [94]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
! cp /content/drive/My\ Drive/kaggle.json /root/.kaggle/

In [0]:
! kaggle datasets download -d CooperUnion/anime-recommendations-database -p /content/drive/My\ Drive/Recommendation\ Systems/Anime\ Recommendation/data/ --unzip

Downloading anime-recommendations-database.zip to /content/drive/My Drive/Recommendation Systems/Anime Recommendation/data
 36% 9.00M/25.0M [00:00<00:01, 14.5MB/s]
100% 25.0M/25.0M [00:00<00:00, 28.7MB/s]


In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from collections import defaultdict
import pickle as pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error

In [0]:
DATA_DIR='/content/drive/My Drive/Recommendation Systems/Anime Recommendation/data/'

In [0]:
anime_df = pd.read_csv(DATA_DIR+'anime.csv')
rating_df = pd.read_csv(DATA_DIR+'rating.csv')

In [98]:
anime_df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [99]:
rating_df.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


In [0]:
def anime_id_name_mapping(df,anime_id):
  return df[df['anime_id']==anime_id]['name'].values[0]

In [0]:
#Function that computes the root mean squared error (or RMSE)
def rmse(y_true, y_pred):
  return np.sqrt(mean_squared_error(y_true, y_pred))

# **Recommendation using Collaborative filtering**

In [0]:
#Creating a rating matrix on entire rating dataframe to fetch the information on the movies watched by a user but not rated it
r_matrix = pd.pivot_table(rating_df,index='user_id',columns='anime_id',values='rating')

In [9]:
r_matrix.head()

anime_id,1,5,6,7,8,15,16,17,18,19,20,22,23,24,25,26,27,28,29,30,31,32,33,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,...,33906,33907,33908,33909,33910,33911,33912,33914,33915,33934,33964,33978,33979,33998,33999,34000,34015,34036,34048,34078,34085,34103,34107,34119,34136,34173,34238,34239,34240,34252,34283,34324,34325,34349,34358,34367,34412,34475,34476,34519
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,,,,,,,,,,,-1.0,,,-1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,,,-1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,,,8.0,,,6.0,,6.0,6.0,,6.0,5.0,,1.0,,,,,,1.0,,,,,,7.0,,8.0,,,,,,,,,,7.0,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [122]:
#Create a dict where key is the userid and value=list of movies that the user has watched but not rated it
usr_watched_not_rated = defaultdict(list)
users = r_matrix.index
for user_id in tqdm(users):
  mids = list(r_matrix.loc[user_id][r_matrix.loc[user_id]==-1].index)
  usr_watched_not_rated[user_id] = mids

100%|██████████| 38779/38779 [00:41<00:00, 943.07it/s]


In [0]:
with open(DATA_DIR+'usr_watched_not_rated.pkl','wb') as f:
  pickle.dump(usr_watched_not_rated,f)

In [0]:
with open(DATA_DIR+'usr_watched_not_rated.pkl','rb') as f:
  usr_watched_not_rated = pickle.load(f)

In [0]:
# Considering only 40,000 users due to memory issues while creating rating matrix

rating_df = rating_df[rating_df['user_id']<=40000]

In [104]:
#Before splitting into train and test, we should remove the users which have only one entry in rating dataframe. 
#This is because, we're stratifying based on user_id. So a user_id should present atleast two times in the data. 
#So one will be in train and the other will be in test

'''
classes, y_indices = np.unique(y, return_inverse=True)
class_counts = np.bincount(y_indices)
user_to_bremoved = np.nonzero(class_counts<2)[0]
'''

user_rating_cnt = rating_df.groupby(['user_id']).count()
user_id_cnt_lt_2 = user_rating_cnt[user_rating_cnt['anime_id']<2].index.values

indexes_to_drop = []
for uid in tqdm(list(user_id_cnt_lt_2)):
  idxs = list(rating_df[rating_df['user_id']==uid].index.values)
  indexes_to_drop.extend(idxs)

rating_df = rating_df.drop(index=indexes_to_drop)

100%|██████████| 1221/1221 [00:06<00:00, 189.28it/s]


In [0]:
#Split the rating dataframe into train and test set and stratify along user_id to make sure that users in train = users in test

X = rating_df.copy()
y = rating_df['user_id']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25,stratify=y, random_state=42)

In [106]:
print('No of users in train set: ',X_train['user_id'].nunique())
print('No of users in test set: ',X_test['user_id'].nunique())

No of users in train set:  38779
No of users in test set:  37924


In [0]:
#Build r_matrix using pivot_table method on X_train

r_matrix = X_train.pivot_table(values='rating',index='user_id',columns='anime_id')

In [108]:
r_matrix.head()

anime_id,1,5,6,7,8,15,16,17,18,19,20,22,23,24,25,26,27,28,29,30,31,32,33,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,...,33696,33708,33709,33712,33713,33734,33740,33741,33750,33775,33779,33791,33798,33808,33846,33871,33902,33905,33909,33910,33911,33912,33934,33964,33979,34015,34036,34048,34085,34103,34107,34238,34239,34240,34283,34324,34325,34349,34367,34475
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,,,,,,,,,,,,,,-1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,,,-1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,,,8.0,,,6.0,,6.0,6.0,,6.0,5.0,,1.0,,,,,,1.0,,,,,,7.0,,8.0,,,,,,,,,,7.0,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [0]:
with open(DATA_DIR+'r_matrix_with_NaN.pkl','wb') as f:
  pickle.dump(r_matrix,f)

In [0]:
with open(DATA_DIR+'r_matrix_with_NaN.pkl','rb') as f:
  r_matrix=pickle.load(f)

In [0]:
#Replacing -1 with 0's
r_matrix = r_matrix.replace(to_replace=-1.0,value=0)
#Filling NaN with 0's
r_matrix = r_matrix.fillna(value = 0)

In [110]:
r_matrix.head()

anime_id,1,5,6,7,8,15,16,17,18,19,20,22,23,24,25,26,27,28,29,30,31,32,33,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,...,33696,33708,33709,33712,33713,33734,33740,33741,33750,33775,33779,33791,33798,33808,33846,33871,33902,33905,33909,33910,33911,33912,33934,33964,33979,34015,34036,34048,34085,34103,34107,34238,34239,34240,34283,34324,34325,34349,34367,34475
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,8.0,0.0,0.0,6.0,0.0,6.0,6.0,0.0,6.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### User-User similarity

In [0]:
'''
# Method which takes user_id and recommends the anime which similar users liked the most
def usr_usr_similarity(user_id,n_movies=10):
  n_sim_users = n_movies 
  #If user in training data
  if user_id in r_matrix.index:
    #Calculate the user similarities
    for 
    
    usr_similarities = cosine_similarity(r_matrix.loc[user_id].values.reshape(1,-1),r_matrix.drop(index=[user_id]))[0]
    top_n_sim_users = np.argsort(usr_similarities)[::-1][:n_sim_users]
    movie_ids = r_matrix.loc[top_n_sim_users].idxmax(axis=1).values
    movies = [anime_id_name_mapping(anime_df,movie_id) for movie_id in movie_ids]
    
    return movies
'''

In [0]:
'''
#Anime recommendation for user_id=25
usr_usr_similarity(25)
'''

['Cowboy Bebop',
 'Kuroshitsuji II',
 'Code Geass: Hangyaku no Lelouch',
 'Wild Arms: Twilight Venom',
 'Bleach',
 'Steins;Gate',
 'Sora no Otoshimono: Forte',
 'Shinryaku! Ika Musume',
 'Rurouni Kenshin: Meiji Kenkaku Romantan',
 'Clannad']

In [0]:
#Method which takes the user_id, anime_id and outputs the predicted rating
def user_user_similarity(user_id,anime_id):
  n_sim_users = 100
  usr_similarity_dict=defaultdict(list)
  pred_rating = 0
  #If anime_id is present in training data
  if anime_id in r_matrix:
    if user_id in r_matrix.index:
      for user in r_matrix.index:
        if user!=user_id:
          usr_similarity_dict[user] = cosine_similarity(r_matrix.loc[user_id].values.reshape(1,-1),r_matrix.loc[user].values.reshape(1,-1))[0][0]
      
      usr_similarity = [(k,v) for (k,v) in usr_similarity_dict.items()]
      usr_similarity_sorted = sorted(usr_similarity,key=lambda x:x[1],reverse=True)
      top_n_sim_users = [x[0] for x in usr_similarity_sorted[:n_sim_users]]
      #print('Computer top similar users')
      anime_rating = 0
      no_users_rated = 0
      for i,usr in enumerate(top_n_sim_users):
        if r_matrix.loc[usr][anime_id]!=0:
          anime_rating += r_matrix.loc[usr][anime_id]
          no_users_rated += 1
        #if i%10 ==0:
          #print('anime rating: ',anime_rating)
      #print('no of users rated ',no_users_rated)
      if no_users_rated == 0:
        pred_rating = 5.0
      else:
        pred_rating = anime_rating/no_users_rated
      #print('anime rating',anime_rating)
      #print('pred_rating ',pred_rating)
    else:
      anim_ratings = r_matrix[anime_id].values
      pred_rating = anim_ratings.sum()/sum(anim_ratings!=0)
  else:
    pred_rating = 5.0
  
  return pred_rating

In [0]:
#Function to compute the RMSE score obtained on the testing set by a model
def score(test_data,cf_model):
  #Construct a list of user-movie tuples from the testing dataset
  print('Preparing test data ...')
  id_pairs = zip(test_data['user_id'], test_data['anime_id'])
  #Predict the rating for every user-movie tuple
  print('Predicting ratings....')
  y_pred = np.array([cf_model(user, anime) for (user, anime) in id_pairs])
  #Extract the actual ratings given by the users in the test data
  y_true = np.array(test_data['rating'])
  #Return the final RMSE score
  return rmse(y_true, y_pred)

In [0]:
X_test_copy = X_test.copy()
X_test_copy = X_test_copy.drop(index=list(X_test_copy[X_test_copy['rating']==-1].index.values))

In [93]:
score(X_test_copy.iloc[:10],user_user_similarity)

Preparing test data ...
Predicting ratings....


1.6990862650582066

In [0]:
#Recommend movies to a user

def recommend_anime(user,model):
  all_anime = r_matrix.columns.values
  anime_with_zero_ratings = r_matrix.loc[user][r_matrix.loc[user]==0].index.values
  anime_rating_dict = defaultdict(float)
  for anime in anime_with_zero_ratings:
    anime_rating_dict[anime] = model(user,anime)
  anime_rating = [(k,v) for k,v in anime_rating_dict.items()]
  anime_rating_sorted = sorted(anime_rating,key=lambda x:x[1],reverse=True)
  top_20_recommended_anime = [anime_id_name_mapping(anime_df,k) for (k,v) in anime_rating_sorted][:20]
  return top_20_recommended_anime

In [0]:
recommend_anime(20,user_user_similarity)