In [None]:
import numpy as np
import pandas as pd

data = pd.read_csv('data.csv')

In [None]:
#Full list without zero scores
#no_zero_score_data = data.loc[data['score'] > 0]

#List with zero scores and a status of 1, 2, or 6 (see MAL-Scraper)
#status = {1, 2, 6}
#WCPtW_data = data.loc[data['status'].isin(status)]

#List with zero scores and a status of 1 or 2
#WC_data = data.loc[data['status'] <= 2]

#List without zero scores and a status of 2
C_data = data.loc[(data['status'] == 2) & (data['score'] > 0)]

In [None]:
from lightfm.data import Dataset
from lightfm import LightFM

#Building the dataset
dataset = Dataset()
dataset.fit(C_data['user'].values, C_data['anime_id'].values)

In [None]:
#Building the interactions matrix and the user features matrix
(interactions, weights) = dataset.build_interactions(zip(C_data['user'].values,
                                                         C_data['anime_id'].values,
                                                         C_data['score'].values))

In [None]:
model = LightFM(loss='bpr')
model.fit(interactions, sample_weight=weights, epochs=10)

In [None]:
from lightfm.evaluation import auc_score
from sklearn.model_selection import KFold
import statistics


train_auc_list = []
test_auc_list = []


kf = KFold(n_splits = 5, shuffle = True, random_state = 1)
for train, test in kf.split(C_data):
    
    C_train = C_data.iloc[train]
    C_test = C_data.iloc[test]
    
    #Building the training interactions matrix and the user features matrix
    (train_interactions, train_weights) = dataset.build_interactions(
                                                             zip(C_train['user'].values,
                                                             C_train['anime_id'].values,
                                                             C_train['score'].values))
    #Building the testing interactions matrix and the user features matrix
    (test_interactions, test_weights) = dataset.build_interactions(
                                                             zip(C_test['user'].values,
                                                             C_test['anime_id'].values,
                                                             C_test['score'].values))

    train_model = LightFM(loss='bpr')
    train_model.fit(train_interactions, sample_weight=train_weights, epochs=10)

    train_auc = auc_score(train_model, train_interactions).mean()
    train_auc_list.append(train_auc)
    test_auc = auc_score(train_model, test_interactions, train_interactions=train_interactions).mean()
    test_auc_list.append(test_auc)

In [None]:
avg_train_auc_list = sum(train_auc_list) / float(len(train_auc_list))
avg_test_auc_list = sum(test_auc_list) / float(len(test_auc_list))

print('Average collaborative filtering train AUC: %s' % avg_train_auc_list)    
print('Average collaborative filtering test AUC: %s' % avg_test_auc_list)

In [None]:
mappings = dataset.mapping()
# Dict of the form {username : LightFM userid}
users = mappings[0]
# Dict of the form {LightFM animeid : MAL animeid}
animes = {v: k for k, v in mappings[2].items()}
animes_list = np.array([*animes.keys()])

In [None]:
from operator import itemgetter

def getTopN(user, n):
    user_id = users[user]

    known_positives = C_data.loc[C_data['user'] == user]
    known_positives = known_positives['anime_id'].values
    for i in range(len(known_positives)):
        known_positives[i] = mappings[2][known_positives[i]]

    unwatched_anime = np.setxor1d(known_positives, animes_list)

    scores = model.predict(user_id, unwatched_anime)
    scores = list(zip(unwatched_anime, scores))
    scores.sort(key=itemgetter(1))

    topN = [scores[i][0] for i in range(n)]
    topN = [animes[i] for i in topN]
    
    return topN

In [None]:
from bs4 import BeautifulSoup
import requests
import time

def getTitle(anime_ids):
    base_url = "https://myanimelist.net/anime/"
    titles_list = []
    
    for anime_id in anime_ids:
        for i in range(10):
            url = base_url + str(anime_id)
            try:
                page = requests.get(url)
                soup = BeautifulSoup(page.text, "lxml")
                title = soup.title.text.split(" - ")[0].strip('\n')
                image_url = soup.find('img', attrs={"class":'ac'})['src']
                                
            #Alternative error handling - it is faster, but may not return all of the results
            except (requests.ConnectionError, AttributeError) as e:
                #error_string = ("There was an error in accessing the page."
                #               " You can access the page manually with the url:"
                #               " {}".format(url))
                #titles_list.append((error_string, e))
                time.sleep(5)
                continue
                

            titles_list.append((title, url, image_url))
            break
        
    return titles_list
        

In [None]:
def getRecommendations(user, n=5):
    topN = getTopN(user, n)
    topNInfo = getTitle(topN)
    return topNInfo


In [None]:
def testing():
    user = "FinalReality56"
    topN = getTopN(user, 10)
    print(topN)

    start = time.time()
    a = getTitle(topN) 
    print(a)
    print(len(a))
    end = time.time()
    print(end - start)

testing()