In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.metrics.pairwise import cosine_similarity
import jaro

from tqdm import tqdm
import pickle

from loguru import logger

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
pd.set_option('display.max_columns', None)

In [4]:
anime_selected = pd.read_csv('data/anime_selected.csv')

In [5]:
anime_synopsis_emb = pd.read_csv('data/anime_synopsis_emb_2.csv')

In [6]:
rating_selected = pd.read_csv('data/rating_selected.csv')

In [7]:
lightfm_emb_df = pd.read_csv('data/lightfm_emb_df_50e.csv')

In [8]:
anime_emb = lightfm_emb_df.merge(anime_synopsis_emb, on = 'anime_id')

In [9]:
user_df = pd.read_csv('data/user-preferences.csv')

In [10]:
with open('models/LightGBM-v5.pickle', 'rb') as file:
    model = pickle.load(file)

In [11]:
def recommend(user_input, model, ratings_df, user_df, anime_emb, anime_sel, new_user_preferences = {}):
    
    if type(user_input) is int:
        
        logger.debug('Getting recommendations for user')
        
        u_id = user_input
        
        if u_id in user_df.user_id.values:
            
            logger.debug('Getting recommendations for existing user')
            user_emb = user_df[user_df['user_id'] == u_id]
            lgb_dataset = pd.DataFrame(np.repeat(user_emb.values, len(anime_emb), axis = 0), columns = user_emb.columns).\
            join(anime_emb)

            logger.debug('Predicting rating')
            preds_rating = model.predict(lgb_dataset.drop(['user_id', 'anime_id'], axis = 1))
            lgb_dataset['predicted_rating'] = preds_rating

            emb_cols = [col for col in lgb_dataset.columns if 'emb' in col]

            user_ratings = ratings_df[ratings_df['user_id'] == u_id]

            logger.debug('Selecting anime watched by user')
            watched = lgb_dataset[lgb_dataset['anime_id'].\
                                 isin(user_ratings.anime_id.values)][['anime_id', 'predicted_rating']+emb_cols].\
            merge(user_ratings[['anime_id', 'rating']], on = 'anime_id').\
            merge(anime_sel[['anime_id', 'Name']], on = 'anime_id').\
            sort_values(by = 'rating', ascending = False)
            
            if watched['rating'].max() >= 7:
            
                user_favorite_emb = np.mean(watched[watched['rating'] == watched['rating'].max()][emb_cols].values, axis = 0)

                logger.debug('Calculating top 1000')
                top_1000_user = lgb_dataset[~lgb_dataset['anime_id'].\
                                         isin(user_ratings.anime_id.values)][['anime_id', 'predicted_rating']+emb_cols].\
                merge(anime_sel[['anime_id', 'Name']], on = 'anime_id').\
                sort_values(by = 'predicted_rating', ascending = False).iloc[:1000, :]

                top_1000_user['score'] = cosine_similarity([user_favorite_emb], top_1000_user[emb_cols].values)[0]

                logger.debug('Selecting top 10')
                top_10_user = top_1000_user.sort_values(by = ['score', 'predicted_rating'], ascending = False).\
                iloc[:10, :][['anime_id', 'Name', 'predicted_rating', 'score']].\
                sort_values(by = 'predicted_rating', ascending = False)
            
            else:
                
                lgb_dataset = lgb_dataset.merge(anime_sel[['anime_id', 'Name']], on = 'anime_id')

                logger.debug('Selecting top 10')
                top_10_user = lgb_dataset.sort_values(by = 'predicted_rating', ascending = False).\
                iloc[:10, :][['anime_id', 'Name', 'predicted_rating']].\
                sort_values(by = 'predicted_rating', ascending = False)

            watched = watched[['anime_id', 'Name', 'rating', 'predicted_rating']]
            
            return watched, top_10_user
        
        elif u_id not in user_df.user_id.values:
            
            if len(new_user_preferences) > 0:
                logger.debug('Getting recommendations for new user with preferences')

                user_dict = {}
                for k in user_df.columns:
                    if k in new_user_preferences:
                        user_dict[k] = new_user_preferences[k]
                    else:
                        user_dict[k] = [0]
                
                relevant_anime = []
                for anime_id, genres in zip(anime_sel['anime_id'], anime_sel['Genres'].values):
                    if set(genres.split(',')).intersection(set(new_user_preferences.keys())):
                        relevant_anime.append(anime_id)                             
                          
            else:
                logger.debug('Getting recommendations for new user without preferences')
                user_dict = {}
                for k in user_df.columns:
                    user_dict[k] = [0]
                    
                relevant_anime = []

            user_emb = pd.DataFrame.from_dict(user_dict)
            lgb_dataset = pd.DataFrame(np.repeat(user_emb.values, len(anime_emb), axis = 0), columns = user_emb.columns).\
            join(anime_emb)
            
            if len(relevant_anime) > 1:
                lgb_dataset = lgb_dataset[lgb_dataset['anime_id'].isin(relevant_anime)]

            logger.debug('Predicting rating')
            preds_rating = model.predict(lgb_dataset.drop(['user_id', 'anime_id'], axis = 1))
            lgb_dataset['predicted_rating'] = preds_rating

            lgb_dataset = lgb_dataset.merge(anime_sel[['anime_id', 'Name']], on = 'anime_id')

            logger.debug('Selecting top 10')
            top_10_user = lgb_dataset.sort_values(by = 'predicted_rating', ascending = False).\
            iloc[:10, :][['anime_id', 'Name', 'predicted_rating']].\
            sort_values(by = 'predicted_rating', ascending = False)

            return None, top_10_user
        
    elif type(user_input) is str:
        
        logger.debug('Searching for similar anime')
        
        a_name = user_input
        anime_df = anime_sel[['anime_id', 'Name', 'Genres', 'Score']].merge(anime_emb, on = 'anime_id')
        
        emb_cols = [col for col in anime_df.columns if 'emb' in col]
        
        if a_name not in anime_df['Name'].values:
            
            similar_names = []
            
            for name in anime_selected['Name'].values:
                score = jaro.jaro_winkler_metric(a_name, name)
                if score > 0.9:
                    similar_names.append((name, score))
            if len(similar_names) == 0:
                logger.debug('Anime not found in our database! Please check back later')
                return None, 'Anime not found in our database! Please check back later'
            else:
                similar_names.sort(key = lambda x: x[1], reverse = True)
                a_name = similar_names[0][0]
            
        logger.debug('Requested anime found in database')

        anime_request = anime_df[anime_df['Name'] == a_name]
        anime_search = anime_df[~anime_df['Name'].isin([a_name])]
        anime_search['score'] = cosine_similarity(anime_request[emb_cols].values, anime_search[emb_cols].values)[0]

        logger.debug('Found new anime for you')

        anime_search = anime_search.sort_values(by = 'score', ascending = False).iloc[:10, :][['anime_id', 'Name', 'Genres', 'Score']]

        return a_name, anime_search

In [12]:
new_user_preferences = {'Hentai': [1]}

In [13]:
watched, recs = recommend(252525252000, model, rating_selected, user_df, anime_emb, anime_selected, new_user_preferences)

2023-01-22 11:18:46.208 | DEBUG    | __main__:recommend:5 - Getting recommendations for user
2023-01-22 11:18:46.209 | DEBUG    | __main__:recommend:64 - Getting recommendations for new user with preferences
2023-01-22 11:18:46.278 | DEBUG    | __main__:recommend:93 - Predicting rating
2023-01-22 11:18:46.342 | DEBUG    | __main__:recommend:99 - Selecting top 10


In [14]:
watched

In [15]:
recs

Unnamed: 0,anime_id,Name,predicted_rating
287,3918,Resort Boin,5.741782
660,10779,Eroge! H mo Game mo Kaihatsu Zanmai,5.562331
144,2866,Ane☆Haramix,5.555424
600,9322,HHH Triple Ecchi,5.519064
347,4502,Stringendo+Accelerando Ultimatum Sera,5.504671
668,11321,Nee Summer!,5.503282
238,3559,Stringendo: Angel-tachi no Private Lesson,5.502744
536,6893,Oni Chichi,5.497214
27,1401,Bible Black: New Testament,5.453221
681,11879,Oni Chichi: Re-born,5.445516


In [16]:
new_user_preferences = {'School': [1], 'Romance': [0.4]}

In [17]:
watched, recs = recommend(252525252000, model, rating_selected, user_df, anime_emb, anime_selected, new_user_preferences)

2023-01-22 11:19:11.312 | DEBUG    | __main__:recommend:5 - Getting recommendations for user
2023-01-22 11:19:11.313 | DEBUG    | __main__:recommend:64 - Getting recommendations for new user with preferences
2023-01-22 11:19:11.377 | DEBUG    | __main__:recommend:93 - Predicting rating
2023-01-22 11:19:11.397 | DEBUG    | __main__:recommend:99 - Selecting top 10


In [18]:
watched

In [19]:
recs

Unnamed: 0,anime_id,Name,predicted_rating
122,32281,Kimi no Na wa.,6.828395
46,5081,Bakemonogatari,6.640729
135,34822,Tsuki ga Kirei,6.388173
141,35466,ReLIFE: Kanketsu-hen,6.385452
93,21185,Baby Steps,6.152159
149,37786,Yagate Kimi ni Naru,6.110218
38,3750,Maria-sama ga Miteru 4th,5.953403
0,183,Whistle!,5.734248
4,401,Rurouni Kenshin: Meiji Kenkaku Romantan - Seis...,5.629359
151,37956,3D Kanojo: Real Girl 2nd Season,5.535871


In [20]:
new_user_preferences = {'Military': [1], 'Action': [0.9], 'Adventure': [0.4]}

In [21]:
watched, recs = recommend(252525252000, model, rating_selected, user_df, anime_emb, anime_selected, new_user_preferences)

2023-01-22 11:19:21.025 | DEBUG    | __main__:recommend:5 - Getting recommendations for user
2023-01-22 11:19:21.027 | DEBUG    | __main__:recommend:64 - Getting recommendations for new user with preferences
2023-01-22 11:19:21.098 | DEBUG    | __main__:recommend:93 - Predicting rating
2023-01-22 11:19:21.332 | DEBUG    | __main__:recommend:99 - Selecting top 10


In [22]:
watched

In [23]:
recs

Unnamed: 0,anime_id,Name,predicted_rating
359,820,Ginga Eiyuu Densetsu,7.861911
0,1,Cowboy Bebop,7.273682
4342,38524,Shingeki no Kyojin Season 3 Part 2,7.154719
17,44,Rurouni Kenshin: Meiji Kenkaku Romantan - Tsui...,7.135634
227,457,Mushishi,7.124447
3153,21939,Mushishi Zoku Shou,7.009389
3262,24701,Mushishi Zoku Shou 2nd Season,7.00563
2631,11061,Hunter x Hunter (2011),6.985788
4218,37521,Vinland Saga,6.985468
15,33,Kenpuu Denki Berserk,6.979776


In [24]:
new_user_preferences = {}

In [25]:
watched, recs = recommend(252525252000, model, rating_selected, user_df, anime_emb, anime_selected, new_user_preferences)

2023-01-22 11:19:25.872 | DEBUG    | __main__:recommend:5 - Getting recommendations for user
2023-01-22 11:19:25.874 | DEBUG    | __main__:recommend:79 - Getting recommendations for new user without preferences
2023-01-22 11:19:25.894 | DEBUG    | __main__:recommend:93 - Predicting rating
2023-01-22 11:19:26.610 | DEBUG    | __main__:recommend:99 - Selecting top 10


In [26]:
watched

In [27]:
recs

Unnamed: 0,anime_id,Name,predicted_rating
739,820,Ginga Eiyuu Densetsu,7.865002
5460,9253,Steins;Gate,7.767338
2908,3297,Aria the Origination,7.6595
11081,35180,3-gatsu no Lion 2nd Season,7.655907
7867,22135,Ping Pong the Animation,7.552529
831,918,Gintama,7.543455
0,1,Cowboy Bebop,7.537644
3897,5114,Fullmetal Alchemist: Brotherhood,7.524498
7835,21939,Mushishi Zoku Shou,7.514788
8288,24701,Mushishi Zoku Shou 2nd Season,7.505011


In [28]:
watched, recs = recommend(1452, model, rating_selected, user_df, anime_emb, anime_selected)

2023-01-22 11:19:29.521 | DEBUG    | __main__:recommend:5 - Getting recommendations for user
2023-01-22 11:19:29.522 | DEBUG    | __main__:recommend:11 - Getting recommendations for existing user
2023-01-22 11:19:29.539 | DEBUG    | __main__:recommend:16 - Predicting rating
2023-01-22 11:19:30.267 | DEBUG    | __main__:recommend:24 - Selecting anime watched by user
2023-01-22 11:19:30.307 | DEBUG    | __main__:recommend:35 - Calculating top 1000
2023-01-22 11:19:30.381 | DEBUG    | __main__:recommend:43 - Selecting top 10


In [29]:
watched

Unnamed: 0,anime_id,Name,rating,predicted_rating
5,2471,Doraemon (1979),7,4.752722
3,801,Koukaku Kidoutai: Stand Alone Complex 2nd GIG,4,6.569627
2,527,Pokemon,3,3.998979
9,16498,Shingeki no Kyojin,2,6.033495
0,1,Cowboy Bebop,1,7.280251
1,20,Naruto,1,4.873409
4,1535,Death Note,1,6.657106
6,6547,Angel Beats!,1,4.717061
7,11061,Hunter x Hunter (2011),1,7.218934
8,14719,JoJo no Kimyou na Bouken (TV),1,5.927222


In [30]:
recs

Unnamed: 0,anime_id,Name,predicted_rating,score
255,283,Akage no Anne,5.71464,0.690376
1345,1491,Ginga Tetsudou 999,5.610054,0.672331
3812,4935,Ikkyuu-san,5.233734,0.682497
1955,2150,Tanoshii Muumin Ikka,5.158935,0.685525
1161,1279,Taiyou no Ko Esteban,5.133096,0.701949
205,232,Cardcaptor Sakura,5.008584,0.665842
479,516,Keroro Gunsou,4.946539,0.702589
1418,1572,Jungle Taitei,4.83521,0.696017
3074,3545,Kochira Katsushikaku Kameari Kouenmae Hashutsujo,4.833291,0.704631
1922,2116,Captain Tsubasa,4.750194,0.710168


In [31]:
watched, recs = recommend(77, model, rating_selected, user_df, anime_emb, anime_selected)

2023-01-22 11:19:57.616 | DEBUG    | __main__:recommend:5 - Getting recommendations for user
2023-01-22 11:19:57.617 | DEBUG    | __main__:recommend:11 - Getting recommendations for existing user
2023-01-22 11:19:57.634 | DEBUG    | __main__:recommend:16 - Predicting rating
2023-01-22 11:19:58.376 | DEBUG    | __main__:recommend:24 - Selecting anime watched by user
2023-01-22 11:19:58.415 | DEBUG    | __main__:recommend:35 - Calculating top 1000
2023-01-22 11:19:58.486 | DEBUG    | __main__:recommend:43 - Selecting top 10


In [32]:
watched

Unnamed: 0,anime_id,Name,rating,predicted_rating
9,1535,Death Note,10,8.997077
16,11981,Mahou Shoujo Madoka★Magica Movie 3: Hangyaku n...,10,8.960831
4,322,Paradise Kiss,10,8.545106
5,339,Serial Experiments Lain,10,8.887404
15,10620,Mirai Nikki,10,7.714446
14,9756,Mahou Shoujo Madoka★Magica,10,8.970703
8,877,Nana,10,8.984224
1,19,Monster,10,9.551063
10,1575,Code Geass: Hangyaku no Lelouch,10,8.861192
12,5114,Fullmetal Alchemist: Brotherhood,10,9.244865


In [33]:
recs

Unnamed: 0,anime_id,Name,predicted_rating,score
3464,4181,Clannad: After Story,9.296981,0.861862
2041,2251,Baccano!,9.175737,0.864979
177,205,Samurai Champloo,9.084072,0.855355
1802,2001,Tengen Toppa Gurren Lagann,9.057436,0.893775
2620,2904,Code Geass: Hangyaku no Lelouch R2,9.037893,0.85783
1090,1210,NHK ni Youkoso!,9.014538,0.880369
835,934,Higurashi no Naku Koro ni,8.814241,0.851522
4392,6211,Tokyo Magnitude 8.0,8.718044,0.855871
8629,28223,Death Parade,8.636511,0.85912
3491,4224,Toradora!,8.633241,0.8556


In [34]:
watched, recs = recommend(4050, model, rating_selected, user_df, anime_emb, anime_selected)

2023-01-22 11:20:12.897 | DEBUG    | __main__:recommend:5 - Getting recommendations for user
2023-01-22 11:20:12.898 | DEBUG    | __main__:recommend:11 - Getting recommendations for existing user
2023-01-22 11:20:12.915 | DEBUG    | __main__:recommend:16 - Predicting rating
2023-01-22 11:20:13.671 | DEBUG    | __main__:recommend:24 - Selecting anime watched by user
2023-01-22 11:20:13.709 | DEBUG    | __main__:recommend:35 - Calculating top 1000
2023-01-22 11:20:13.779 | DEBUG    | __main__:recommend:43 - Selecting top 10


In [35]:
watched

Unnamed: 0,anime_id,Name,rating,predicted_rating
0,934,Higurashi no Naku Koro ni,10,8.903886
3,22789,Barakamon,9,9.042247
4,32828,Amaama to Inazuma,9,8.311512
2,21511,Kantai Collection: KanColle,8,7.840949
1,16592,Danganronpa: Kibou no Gakuen to Zetsubou no Ko...,7,7.774751


In [36]:
recs

Unnamed: 0,anime_id,Name,predicted_rating,score
9,19,Monster,9.510989,0.789292
3475,4181,Clannad: After Story,9.375802,0.788428
1388,1535,Death Note,9.05787,0.790921
1712,1889,Higurashi no Naku Koro ni Kai,8.953438,0.965028
313,339,Serial Experiments Lain,8.94757,0.820276
1383,1530,Kanon (2006),8.83166,0.767815
1975,2167,Clannad,8.710588,0.786107
2646,2924,ef: A Tale of Memories.,8.707864,0.790614
4521,6547,Angel Beats!,8.558232,0.777259
4981,7724,Shiki,8.555955,0.779236


In [37]:
recommend('Neon Genesis Evangelion', model, rating_selected, user_df, anime_emb, anime_selected)

2023-01-22 11:20:21.314 | DEBUG    | __main__:recommend:108 - Searching for similar anime
2023-01-22 11:20:21.382 | DEBUG    | __main__:recommend:130 - Requested anime found in database
2023-01-22 11:20:21.445 | DEBUG    | __main__:recommend:136 - Found new anime for you


Unnamed: 0,anime_id,Name,Genres,Score
21,32,Neon Genesis Evangelion: The End of Evangelion,"Sci-Fi, Dementia, Psychological, Drama, Mecha",8.51
202,227,FLCL,"Action, Sci-Fi, Comedy, Parody, Dementia, Psyc...",8.03
313,339,Serial Experiments Lain,"Dementia, Drama, Mystery, Psychological, Sci-F...",8.03
20,31,Neon Genesis Evangelion: Death & Rebirth,"Drama, Mecha, Psychological, Sci-Fi",7.45
1814,2001,Tengen Toppa Gurren Lagann,"Action, Adventure, Comedy, Mecha, Sci-Fi",8.66
2518,2759,Evangelion: 1.0 You Are (Not) Alone,"Action, Sci-Fi, Psychological, Drama, Mecha",8.07
0,1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",8.78
3234,3784,Evangelion: 2.0 You Can (Not) Advance,"Action, Sci-Fi, Psychological, Drama, Mecha",8.35
297,323,Mousou Dairinin,"Mystery, Dementia, Police, Psychological, Supe...",7.68
23,43,Koukaku Kidoutai,"Action, Mecha, Police, Psychological, Sci-Fi, ...",8.29


In [38]:
recommend('Black Lagoon', model, rating_selected, user_df, anime_emb, anime_selected)

2023-01-22 11:20:23.922 | DEBUG    | __main__:recommend:108 - Searching for similar anime
2023-01-22 11:20:23.992 | DEBUG    | __main__:recommend:130 - Requested anime found in database
2023-01-22 11:20:24.053 | DEBUG    | __main__:recommend:136 - Found new anime for you


Unnamed: 0,anime_id,Name,Genres,Score
1375,1519,Black Lagoon: The Second Barrage,"Action, Seinen",8.19
3803,4901,Black Lagoon: Roberta's Blood Trail,"Action, Seinen",8.05
4187,5682,Phantom: Requiem for the Phantom,"Action, Drama, Seinen, Thriller",8.01
2,6,Trigun,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",8.24
181,205,Samurai Champloo,"Action, Adventure, Comedy, Historical, Samurai...",8.5
1837,2025,Darker than Black: Kuro no Keiyakusha,"Action, Sci-Fi, Mystery, Super Power",8.12
244,270,Hellsing,"Action, Horror, Supernatural, Vampire, Seinen",7.5
716,790,Ergo Proxy,"Psychological, Mystery, Sci-Fi",7.92
241,267,Gungrave,"Action, Drama, Sci-Fi, Seinen, Super Power",7.87
1648,1818,Claymore,"Action, Adventure, Super Power, Demons, Supern...",7.78


In [39]:
recommend('Cowboy Bebop', model, rating_selected, user_df, anime_emb, anime_selected)

2023-01-22 11:20:26.257 | DEBUG    | __main__:recommend:108 - Searching for similar anime
2023-01-22 11:20:26.329 | DEBUG    | __main__:recommend:130 - Requested anime found in database
2023-01-22 11:20:26.389 | DEBUG    | __main__:recommend:136 - Found new anime for you


Unnamed: 0,anime_id,Name,Genres,Score
2,6,Trigun,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",8.24
181,205,Samurai Champloo,"Action, Adventure, Comedy, Historical, Samurai...",8.5
1814,2001,Tengen Toppa Gurren Lagann,"Action, Adventure, Comedy, Mecha, Sci-Fi",8.66
1,5,Cowboy Bebop: Tengoku no Tobira,"Action, Drama, Mystery, Sci-Fi, Space",8.39
374,400,Seihou Bukyou Outlaw Star,"Action, Sci-Fi, Adventure, Space, Comedy",7.87
2053,2251,Baccano!,"Action, Comedy, Historical, Mystery, Supernatural",8.42
3897,5114,Fullmetal Alchemist: Brotherhood,"Action, Military, Adventure, Comedy, Drama, Ma...",9.19
178,202,Wolf's Rain,"Action, Adventure, Drama, Fantasy, Mystery, Sc...",7.82
303,329,Planetes,"Drama, Romance, Sci-Fi, Seinen, Space",8.31
202,227,FLCL,"Action, Sci-Fi, Comedy, Parody, Dementia, Psyc...",8.03


In [40]:
recommend('Kimagure Orange☆Road', model, rating_selected, user_df, anime_emb, anime_selected)

2023-01-22 11:20:29.906 | DEBUG    | __main__:recommend:108 - Searching for similar anime
2023-01-22 11:20:29.976 | DEBUG    | __main__:recommend:130 - Requested anime found in database
2023-01-22 11:20:30.038 | DEBUG    | __main__:recommend:136 - Found new anime for you


Unnamed: 0,anime_id,Name,Genres,Score
2240,2454,Kimagure Orange☆Road: Ano Hi ni Kaeritai,"Comedy, Drama, Romance, Shounen, Slice of Life",7.65
1315,1453,Maison Ikkoku,"Slice of Life, Comedy, Drama, Romance, Seinen",8.18
1910,2098,"Shin Kimagure Orange☆Road: Soshite, Ano Natsu ...","Ecchi, Slice of Life, Fantasy, Magic, Comedy, ...",7.43
2244,2458,Kimagure Orange☆Road OVA,"Magic, Comedy, Romance, School, Drama",7.44
2972,3394,Kimagure Orange☆Road: Shounen Jump Special,"Romance, Drama, Shounen",6.61
2246,2460,Maison Ikkoku: Kanketsu-hen,"Comedy, Drama, Romance, Seinen",7.23
590,630,Mahoutsukai Tai! (TV),"Sci-Fi, Comedy, Magic, Romance, School",6.78
1744,1920,Urusei Yatsura Movie 1: Only You,"Action, Adventure, Comedy, Romance, Drama, Sci-Fi",7.08
1177,1293,Urusei Yatsura,"Action, Sci-Fi, Adventure, Comedy, Drama, Romance",7.69
526,563,DNA²,"Sci-Fi, Comedy, Romance",6.68


In [41]:
recommend('Shoujo Kakumei Utena', model, rating_selected, user_df, anime_emb, anime_selected)

2023-01-22 11:20:35.649 | DEBUG    | __main__:recommend:108 - Searching for similar anime
2023-01-22 11:20:35.718 | DEBUG    | __main__:recommend:130 - Requested anime found in database
2023-01-22 11:20:35.777 | DEBUG    | __main__:recommend:136 - Found new anime for you


Unnamed: 0,anime_id,Name,Genres,Score
661,721,Princess Tutu,"Comedy, Drama, Fantasy, Magic, Mystery, Romance",8.15
411,441,Shoujo Kakumei Utena: Adolescence Mokushiroku,"Dementia, Drama, Fantasy, Romance, Shoujo, Sho...",7.6
361,387,Haibane Renmei,"Slice of Life, Mystery, Psychological, Drama, ...",7.99
313,339,Serial Experiments Lain,"Dementia, Drama, Mystery, Psychological, Sci-F...",8.03
19,30,Neon Genesis Evangelion,"Action, Sci-Fi, Dementia, Psychological, Drama...",8.32
343,369,Boogiepop wa Warawanai,"Psychological, Supernatural, Dementia, Mystery...",7.17
122,145,Kareshi Kanojo no Jijou,"Comedy, Drama, Romance, School, Shoujo, Slice ...",7.61
158,182,Tenkuu no Escaflowne,"Adventure, Psychological, Romance, Fantasy, Mecha",7.7
207,232,Cardcaptor Sakura,"Adventure, Comedy, Drama, Magic, Romance, Fant...",8.16
297,323,Mousou Dairinin,"Mystery, Dementia, Police, Psychological, Supe...",7.68


In [42]:
recommend('Utena', model, rating_selected, user_df, anime_emb, anime_selected)

2023-01-22 11:20:37.218 | DEBUG    | __main__:recommend:108 - Searching for similar anime
2023-01-22 11:20:37.452 | DEBUG    | __main__:recommend:124 - Anime not found in our database! Please check back later


'Anime not found in our database! Please check back later'

## Measure precision@10 and recall@10

In [32]:
def get_metrics(user_input, model, ratings_df, user_df, anime_emb, anime_sel, new_user_preferences = {}):
    
    #logger.debug('Getting recommendations for user')
    if type(user_input) is int:
        
        u_id = user_input
        
        if u_id in user_df.user_id.values:
            
            #logger.debug('Getting recommendations for existing user')
            user_emb = user_df[user_df['user_id'] == u_id]
            lgb_dataset = pd.DataFrame(np.repeat(user_emb.values, len(anime_emb), axis = 0), columns = user_emb.columns).\
            join(anime_emb)

            #logger.debug('Predicting rating')
            preds_rating = model.predict(lgb_dataset.drop(['user_id', 'anime_id'], axis = 1))
            lgb_dataset['predicted_rating'] = preds_rating

            emb_cols = [col for col in lgb_dataset.columns if 'emb' in col]

            user_ratings = ratings_df[ratings_df['user_id'] == u_id]

            #logger.debug('Selecting anime watched by user')
            watched = lgb_dataset[lgb_dataset['anime_id'].\
                                 isin(user_ratings.anime_id.values)][['anime_id', 'predicted_rating']+emb_cols].\
            merge(user_ratings[['anime_id', 'rating']], on = 'anime_id').\
            merge(anime_sel[['anime_id', 'Name']], on = 'anime_id').\
            sort_values(by = 'rating', ascending = False)
            
            if watched['rating'].max() >= 7:
            
                user_favorite_emb = np.mean(watched[watched['rating'] == watched['rating'].max()][emb_cols].values, axis = 0)

                #logger.debug('Calculating top 1000')
                top_1000_user = lgb_dataset[['anime_id', 'predicted_rating']+emb_cols].\
                merge(anime_sel[['anime_id', 'Name']], on = 'anime_id').\
                sort_values(by = 'predicted_rating', ascending = False).iloc[:1000, :]

                top_1000_user['score'] = cosine_similarity([user_favorite_emb], top_1000_user[emb_cols].values)[0]

                #logger.debug('Selecting top 10')
                top_10_user = top_1000_user.sort_values(by = ['score', 'predicted_rating'], ascending = False).\
                iloc[:10, :][['anime_id', 'Name', 'predicted_rating', 'score']].\
                sort_values(by = 'predicted_rating', ascending = False)
            
            else:
                
                lgb_dataset = lgb_dataset.merge(anime_sel[['anime_id', 'Name']], on = 'anime_id')

                #logger.debug('Selecting top 10')
                top_10_user = lgb_dataset.sort_values(by = 'predicted_rating', ascending = False).\
                iloc[:10, :][['anime_id', 'Name', 'predicted_rating']].\
                sort_values(by = 'predicted_rating', ascending = False)

            watched = watched[['anime_id', 'Name', 'rating', 'predicted_rating']]
            
            favs = watched[['anime_id', 'Name', 'rating', 'predicted_rating']][(watched['rating']==watched['rating'].max())|(watched['rating']==watched['rating'].max()-1)]
    
            num_common = len(list(set(favs['Name'].values).intersection(top_10_user['Name'].values)))
    
            precision = num_common/10
            recall = num_common/len(favs)
            
            return precision, recall

In [33]:
user_id_test = np.load('user_id_test.npy')

In [34]:
precision_list = []
recall_list = []

for i in tqdm(list(user_id_test)):
    us_input = int(i)
    precision, recall = get_metrics(us_input, model, rating_selected, user_df, anime_emb, anime_selected)
    precision_list.append(precision)
    recall_list.append(recall)

100%|██████████| 2621/2621 [42:17<00:00,  1.03it/s]


In [35]:
np.mean(precision_list)

0.4448683708508203

In [36]:
np.mean(recall_list)

0.18564694505717275