# Imports
Just the standard. Jikanpy is a python wrapper for the unofficial MAL API.

In [1]:
import torch
import pandas as pd
import numpy as np
import os
from jikanpy import Jikan
jikan = Jikan()

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device, torch.cuda.get_device_name(torch.cuda.current_device())

(device(type='cuda'), 'NVIDIA GeForce RTX 3070 Laptop GPU')

# The recommender
A pipelined implementation that takes care of the boring processing work. <br>
Given a username, it outputs the top n 'seen' and 'unseen' recommendations. The 'seen' recommendations are just to see how the models thinks the user would rate everything they've already rated.<br>
It first checks if the user is already trained on, and if not, it fetches their ratings details from MAL, trains their specific weights, and then appends them to the existing user_matrix.

In [3]:
class Recommender:
    def __init__(
        self,
        anime_list,
        user_list,
        aid_mapping,
        uid_mapping,
        anime_matrix,
        user_matrix,
        a_avs,
        a_stds,
        mask
    ):
        self.al = pd.read_csv(anime_list)
        self.ul = pd.read_csv(user_list)
        self.ul_dir = user_list
        self.aidm = pd.read_csv(aid_mapping)
        self.uidm = pd.read_csv(uid_mapping)
        self.uidm_dir = uid_mapping
        self.anime_matrix = torch.load(anime_matrix)
        self.user_matrix = torch.load(user_matrix)
        self.um_dir = user_matrix
        self.a_avs = torch.load(a_avs)
        self.a_stds = torch.load(a_stds)
        self.mask = torch.load(mask).cpu()
        self.mask_dir = mask
        self.latent_features = self.user_matrix.shape[0]
    
    def fetch_user(self, username, ntop):
        print("User not found. Fetching from MAL...")
        user = self.ul.loc[self.ul["username"]==username]
        user_id = -1
        if user.empty:
            user_id = jikan.user(username=username)['user_id']
            self.ul.loc[len(self.ul)] = {"username": username, "user_id": user_id}
            self.ul.to_csv(self.ul_dir, index=False)
        else:
            user_id = user['user_id']
            
        if type(user_id) is not int:
            user_id = list(user_id)[0]
        
        animelist = jikan.user(username=username, request='animelist')['anime']
        print("Fetched!")
        prefs = []
        for anime in animelist:
            if anime['watching_status'] == 2 and anime['score'] != 0:
                prefs.append({'anime_id': anime['mal_id'], 'my_score': anime['score']})
        prefs = pd.DataFrame(prefs)
        prefs = prefs.loc[prefs['anime_id'].isin(recom.aidm['anime_id'])]
        
        normalized = pd.merge(prefs, recom.aidm[['anime_id', 'aid']])[['aid', 'my_score']].sort_values(by='aid')
        
        new_index = pd.Index(np.arange(0,5000,1), name='aid')
        normalized = normalized.set_index('aid').reindex(new_index).reset_index().fillna(0)
        trainable = torch.tensor(normalized['my_score'].values).reshape(-1,1)
        return self.train_user_id(user_id, trainable, ntop)
    
    def train_user_id(self, user_id, scores, ntop):
        # user_id MUST be present in UserList.csv
        print("Training features for user...")
        new_user = torch.randn(self.latent_features, 1, requires_grad=True, device=device)
        new_optim = torch.optim.SGD([{'params': new_user}], lr=2e-3, momentum=0.9)
        clone_anime = self.anime_matrix
        scores = scores.to(device)
        mask = torch.where(scores>0, 1, 0)
        scores = mask * (scores - self.a_avs) / self.a_stds
        
        for epoch in range(10000):
#             if epoch%100 == 0:
#                 preds = torch.matmul(clone_anime, new_user) * self.a_stds + self.a_avs
#                 rmse = float(torch.sum(torch.square(mask * preds - mask * (scores * self.a_stds + self.a_avs))) / torch.count_nonzero(scores))
#                 rmse = np.sqrt(rmse)
#                 print(rmse)
                
            new_optim.zero_grad()    
            loss = torch.norm(mask * torch.matmul(clone_anime, new_user) - scores) + 2e-3 * torch.norm(new_user)
            loss.backward()
            new_optim.step()
        
        print("Trained! Calculating top anime...")
        self.mask = torch.cat((self.mask, mask.cpu()), dim=1)
        self.uidm.loc[len(self.uidm.index)] = [self.user_matrix.shape[1], user_id]
        self.user_matrix = torch.cat((self.user_matrix, new_user), dim=1)
        torch.save(self.user_matrix, self.um_dir)
        torch.save(mask, self.mask_dir)
        self.uidm.to_csv(self.uidm_dir, index=False)
        return self.get_anime_for_userfeatures(new_user, mask, ntop)
    
    def get_anime_for_userfeatures(self, user, mask, ntop):
        
        mask_seen = mask.reshape(-1,1).to(device)
        mask_unseen = 1 - mask_seen
        preds = torch.squeeze(torch.matmul(self.anime_matrix, user.reshape(-1,1))).reshape(-1,1)
        preds = preds * self.a_stds + self.a_avs
        unseen = mask_unseen * preds
        seen = mask_seen * preds
        seen = seen.to('cpu')
        unseen = unseen.to('cpu')
        
        us = pd.DataFrame(pd.DataFrame(unseen.detach().numpy()))
        us = us.reset_index()
        us.columns = ['aid', 'prediction']
        
        umerged = pd.merge(pd.merge(us, self.aidm)[['prediction', 'anime_id']], self.al[['anime_id', 'title']])
        udesc = umerged.sort_values(by='prediction', ascending=False)
        udesc = udesc.loc[udesc['prediction']>0].head(ntop)
        
        s = pd.DataFrame(pd.DataFrame(seen.detach().numpy()))
        s = s.reset_index()
        s.columns = ['aid', 'prediction']
        
        smerged = pd.merge(pd.merge(s, self.aidm)[['prediction', 'anime_id']], self.al[['anime_id', 'title']])
        sdesc = smerged.sort_values(by='prediction', ascending=False)
        sdesc = sdesc.loc[sdesc['prediction']>0].head(ntop)
        
        return udesc, sdesc
        
    def get_anime_for_username(self, username, ntop):
        user_id = self.ul.loc[self.ul['username'] == username]['user_id']
        if user_id.empty:
            return self.fetch_user(username, ntop)
        user_id = list(user_id)[0]
        uid = self.uidm.loc[self.uidm['user_id'] == user_id]['uid']
        if uid.empty:
            return self.fetch_user(username, ntop)
        print("User found! Calculating top anime...")
        uid = list(uid)[0]
        
        return self.get_anime_for_userfeatures(self.user_matrix[:, uid], self.mask[:, uid], ntop)
        
    

In [4]:
directory = "MAL Dataset/a5000u100k"
recom = Recommender(
    anime_list = "MAL Dataset/AnimeList.csv",
    user_list = "MAL Dataset/ul.csv",
    aid_mapping = os.path.join(directory, 'aid_mapping.csv'),
    uid_mapping = os.path.join(directory, 'uid_mapping - Copy.csv'),
    anime_matrix = os.path.join(directory, 'anime_matrix.pt'),
    user_matrix = os.path.join(directory, 'user_matrix - Copy.pt'),
    a_avs = os.path.join(directory, 'a_avs.pt'),
    a_stds = os.path.join(directory, 'a_stds.pt'),
    mask = os.path.join(directory, 'mask - Copy.pt')
)

In [5]:
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

In [8]:
u, s = recom.get_anime_for_username("Green_Fish", 20)
u

User not found. Fetching from MAL...
Fetched!
Training features for user...
Trained! Calculating top anime...


Unnamed: 0,prediction,anime_id,title
1957,9.452149,28977,Gintama°
390,9.343551,918,Gintama
410,9.332602,9969,Gintama&#039;
431,9.219964,15417,Gintama&#039;: Enchousen
2255,9.19229,34096,Gintama.
230,9.080018,15335,Gintama Movie 2: Kanketsu-hen - Yorozuya yo Ei...
1689,9.044483,820,Ginga Eiyuu Densetsu
107,8.979399,5114,Fullmetal Alchemist: Brotherhood
448,8.972966,4181,Clannad: After Story
2259,8.934178,36838,Gintama.: Shirogane no Tamashii-hen


In [9]:
s

Unnamed: 0,prediction,anime_id,title
179,9.076328,11061,Hunter x Hunter (2011)
150,8.980632,9253,Steins;Gate
340,8.853635,32281,Kimi no Na wa.
81,8.706251,2904,Code Geass: Hangyaku no Lelouch R2
1481,8.628623,28851,Koe no Katachi
325,8.559837,30276,One Punch Man
60,8.497375,1575,Code Geass: Hangyaku no Lelouch
354,8.487936,33486,Boku no Hero Academia 2nd Season
274,8.393371,20583,Haikyuu!!
1,8.364041,19,Monster


In [10]:
u, s = recom.get_anime_for_username("dark_dragon007", 20)
u

User not found. Fetching from MAL...
Fetched!
Training features for user...
Trained! Calculating top anime...


Unnamed: 0,prediction,anime_id,title
330,9.750292,31181,Owarimonogatari
2424,9.744391,34240,Shelter
195,9.69438,11981,Mahou Shoujo Madoka★Magica Movie 3: Hangyaku n...
4979,9.661111,34376,Yuuki Yuuna wa Yuusha de Aru: Washio Sumi no S...
2411,9.588607,24687,Mushishi Zoku Shou: Odoro no Michi
2528,9.587846,14807,Kara no Kyoukai: Mirai Fukuin
542,9.576916,1004,Kanojo to Kanojo no Neko
662,9.537531,770,Pale Cocoon
4275,9.512987,25537,Fate/stay night Movie: Heaven&#039;s Feel - I....
2040,9.486119,4280,Kara no Kyoukai 4: Garan no Dou


In [11]:
u, s = recom.get_anime_for_username("Acidremix", 20)
u

User not found. Fetching from MAL...
Fetched!
Training features for user...
Trained! Calculating top anime...


Unnamed: 0,prediction,anime_id,title
2259,9.7079,36838,Gintama.: Shirogane no Tamashii-hen
1957,9.67722,28977,Gintama°
2255,9.676025,34096,Gintama.
2057,9.523924,24701,Mushishi Zoku Shou 2nd Season
230,9.394535,15335,Gintama Movie 2: Kanketsu-hen - Yorozuya yo Ei...
410,9.312644,9969,Gintama&#039;
431,9.29901,15417,Gintama&#039;: Enchousen
41,9.248134,578,Hotaru no Haka
2758,9.242245,26055,JoJo no Kimyou na Bouken: Stardust Crusaders 2...
15,9.239453,199,Sen to Chihiro no Kamikakushi
