# Walkthrough of User-Based Recommendation Module

In [1]:
# !pip install fuzzywuzzy
# !pip install python-Levenshtein

In [2]:
import sys
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns
from joypy import joyplot
import sweetviz as sv
from tqdm import tqdm
from collections import ChainMap
import sklearn
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances, euclidean_distances
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
from scipy import sparse
pd.set_option("max_columns", 200)
from fuzzywuzzy import fuzz

# Import the recommendation module
from ubfilter import UserBasedFiltering



---
## Initialize User-based recommendation module
---

In [3]:
# set querying user_id
QUERY_USER_ID = 2000

In [4]:
# Initialize
ubf = UserBasedFiltering()

# Load titles data for checking purposes
df_titles = pd.read_csv("/mnt/disks/sdb/home/dy0904k/assets/titles_2000p.csv")

### Test: Recommendation from user_id

In [5]:
# Query by user_id

# calculate similarity and similar user ids
top_10_similar_user_ids = ubf.get_similar_users_from_user_id(start_col=1, dist_metric="cosine_similarity", query_user_id=QUERY_USER_ID)
print(top_10_similar_user_ids)

# Work out the average ratio of overlapping titles
print(ubf.evaluate_by_overlap_titles(top_10_similar_user_ids))

# make recommendation
recommended_titles = ubf.recommend_unread_titles(10, top_10_similar_user_ids, method="refer_popularity")

# show recommendations
display(df_titles[df_titles["title_id"].isin(recommended_titles)].head(3))


[3594, 1272, 4854, 138, 8311, 1480, 8884, 8265, 4062, 358]
0.15751964101492064


Unnamed: 0,title_id,title_english,title_romaji,type,duration,start_year,chapters,volume,publishing_status,country,adult,genres,average_score,mean_score,popularity,favorites,score_10,score_20,score_30,score_40,score_50,score_60,score_70,score_80,score_90,score_100,count_CURRENT,count_PLANNING,count_COMPLETED,count_DROPPED,count_PAUSED,ranking_RATED,ranking_POPULAR,synopsis,cover_image_url
3,30013,One Piece,ONE PIECE,MANGA,,1997.0,,,RELEASING,JP,False,"['Action', 'Adventure', 'Comedy', 'Fantasy']",91.0,91.0,113083,23264,394.0,58.0,109.0,177.0,453.0,805.0,2170.0,4773.0,12472.0,29853.0,80409,15977,4344,3211,9142,1.0,1.0,"As a child, Monkey D. Luffy was inspired to be...",{'large': 'https://s4.anilist.co/file/anilistc...
9,104578,Attack on Titan Season 3 Part 2,Shingeki no Kyojin 3 Part 2,ANIME,24.0,2019.0,,,FINISHED,JP,False,"['Action', 'Drama', 'Fantasy', 'Mystery']",90.0,90.0,340480,20389,775.0,136.0,274.0,460.0,1282.0,2536.0,8866.0,26721.0,70960.0,93286.0,9756,25181,303207,912,1424,1.0,2.0,The battle to retake Wall Maria begins now! Wi...,{'large': 'https://s4.anilist.co/file/anilistc...
15,11061,Hunter x Hunter (2011),HUNTER×HUNTER (2011),ANIME,24.0,2011.0,,,FINISHED,JP,False,"['Action', 'Adventure', 'Fantasy']",89.0,89.0,488883,54771,1027.0,253.0,438.0,782.0,2140.0,3951.0,12943.0,32075.0,75488.0,105797.0,71230,78737,296691,10182,32043,1.0,1.0,A new adaption of the manga of the same name b...,{'large': 'https://s4.anilist.co/file/anilistc...


### Test: Recommendation from list of title_ids

In [6]:
# Query by title_ids

# example query title_id list
ex_titles_action = [30002, 105778, 53390, 87216, 85486, 30656, 30642, 31706, 31133, 30025]
ex_titles_romance = [72451, 97852, 85135, 101583, 87395, 59211, 132182, 30145, 41514, 86481]

top_10_similar_user_ids = ubf.get_similar_users_from_titles(ex_titles_romance)
print(top_10_similar_user_ids)

# Work out the average ratio of overlapping titles
print(ubf.evaluate_by_overlap_titles(top_10_similar_user_ids))

# make recommendation
recommended_titles = ubf.recommend_unread_titles(10, top_10_similar_user_ids, query_user_id=QUERY_USER_ID, method="refer_others")

# show recommendations
display(df_titles[df_titles["title_id"].isin(recommended_titles)].head(3))


[9392 2866 9494 2154 5626 8157 8612 1434 8839 7633]
0.3838791368593074


Unnamed: 0,title_id,title_english,title_romaji,type,duration,start_year,chapters,volume,publishing_status,country,adult,genres,average_score,mean_score,popularity,favorites,score_10,score_20,score_30,score_40,score_50,score_60,score_70,score_80,score_90,score_100,count_CURRENT,count_PLANNING,count_COMPLETED,count_DROPPED,count_PAUSED,ranking_RATED,ranking_POPULAR,synopsis,cover_image_url
3718,6347,Baka and Test - Summon the Beasts,Baka to Test to Shoukanjuu,ANIME,24.0,2010.0,,,FINISHED,JP,False,"['Comedy', 'Romance']",71.0,71.0,79324,1139,277.0,243.0,624.0,1147.0,2603.0,4974.0,10011.0,10181.0,5216.0,2458.0,2220,19212,52409,3202,2281,7.0,3.0,"The story centers around Akihisa Yoshii, the ""...",{'large': 'https://s4.anilist.co/file/anilistc...
3775,9790,Heaven's Lost Property the Movie: The Angeloid...,Sora no Otoshimono: Tokeijikake no Angeloid,ANIME,96.0,2011.0,,,FINISHED,JP,False,"['Comedy', 'Drama', 'Ecchi', 'Romance', 'Sci-Fi']",71.0,72.0,18457,138,74.0,79.0,162.0,295.0,682.0,1161.0,2393.0,2252.0,1273.0,766.0,243,3979,13983,111,141,17.0,7.0,Movie adaptation of the Sora no Otoshimono man...,{'large': 'https://s4.anilist.co/file/anilistc...
4124,15451,,High School DxD New,ANIME,27.0,2013.0,,,FINISHED,JP,False,"['Action', 'Comedy', 'Ecchi', 'Fantasy', 'Roma...",71.0,71.0,118832,1887,761.0,739.0,1457.0,2489.0,5060.0,8921.0,17768.0,15831.0,8724.0,6973.0,2949,8766,103854,1752,1511,14.0,7.0,The devilish haremking Issei Hyoudou is back f...,{'large': 'https://s4.anilist.co/file/anilistc...


### Test: Recommendation from a title, but refering to title-user matrix

- Insight: the more popular the title, it comes up with more accurate results

In [7]:
# Query by title_id

# query_title_id = 15 # eye shield 21 (popular, while back, american football manga) -> recommends SLAM DUNK, Major, Hajime no Ippo(boxing), so that's pretty good
query_title_id = 105778 # Chainsaw man (popular, recent, dark fantasy) -> SPYxFAMILY (recent) , Jujutsu Kaisen(popular recent title, dark fantasy)
# query_title_id = 87395 # Grand Blue (popular, recent, comedy, romance, ) -> Kaguya-sam (popular recent rom-com), ReLife (recent romance but no comedy), so not sure about this one

res = ubf.recommend_from_other_user_histories(query_title_id)
print(res)

# show recommendations
display(df_titles[df_titles["title_id"].isin(res)])


[108556  87170 101517  87423  87216 117195 140960 120760 132029 113415]


Unnamed: 0,title_id,title_english,title_romaji,type,duration,start_year,chapters,volume,publishing_status,country,adult,genres,average_score,mean_score,popularity,favorites,score_10,score_20,score_30,score_40,score_50,score_60,score_70,score_80,score_90,score_100,count_CURRENT,count_PLANNING,count_COMPLETED,count_DROPPED,count_PAUSED,ranking_RATED,ranking_POPULAR,synopsis,cover_image_url
46,140960,SPY x FAMILY,SPY×FAMILY,ANIME,24.0,2022.0,,,FINISHED,JP,False,"['Action', 'Comedy', 'Slice of Life', 'Superna...",88.0,88.0,204605,15916,326.0,61.0,81.0,132.0,528.0,989.0,4662.0,14522.0,31530.0,24779.0,75411,39742,86553,1082,1817,2.0,1.0,Everyone has a part of themselves they cannot ...,{'large': 'https://s4.anilist.co/file/anilistc...
70,113415,JUJUTSU KAISEN,Jujutsu Kaisen,ANIME,24.0,2020.0,,,FINISHED,JP,False,"['Action', 'Drama', 'Supernatural']",87.0,87.0,464435,39901,945.0,243.0,414.0,750.0,2157.0,4231.0,16622.0,47633.0,91695.0,73209.0,83950,61681,301532,5098,12174,1.0,1.0,"A boy fights... for ""the right death.""<br>\n<b...",{'large': 'https://s4.anilist.co/file/anilistc...
100,108556,SPY x FAMILY,SPY×FAMILY,MANGA,,2019.0,,,RELEASING,JP,False,"['Action', 'Comedy', 'Slice of Life', 'Superna...",86.0,86.0,98224,10387,62.0,14.0,51.0,57.0,272.0,390.0,1989.0,5871.0,9836.0,6097.0,60612,28709,1756,1219,5928,2.0,1.0,The master spy codenamed &lt;Twilight&gt; has ...,{'large': 'https://s4.anilist.co/file/anilistc...
132,101517,Jujutsu Kaisen,Jujutsu Kaisen,MANGA,,2018.0,,,RELEASING,JP,False,"['Action', 'Drama', 'Supernatural']",85.0,85.0,114091,13319,162.0,47.0,116.0,171.0,530.0,904.0,3208.0,7946.0,14297.0,9826.0,74047,26174,4095,2172,7603,4.0,3.0,Although Yuji Itadori looks like your average ...,{'large': 'https://s4.anilist.co/file/anilistc...
260,117195,[Oshi no Ko],[Oshi no Ko],MANGA,,2020.0,,,RELEASING,JP,False,"['Drama', 'Mystery', 'Psychological', 'Superna...",83.0,83.0,35053,2953,44.0,21.0,47.0,83.0,147.0,250.0,763.0,2060.0,3097.0,1752.0,17674,14741,248,766,1624,7.0,3.0,Gorou is a gynecologist and idol fan who’s in ...,{'large': 'https://s4.anilist.co/file/anilistc...
483,87216,Demon Slayer: Kimetsu no Yaiba,Kimetsu no Yaiba,MANGA,,2016.0,208.0,23.0,FINISHED,JP,False,"['Action', 'Adventure', 'Drama', 'Supernatural']",81.0,81.0,123619,11142,288.0,169.0,459.0,841.0,1786.0,3085.0,7621.0,13322.0,16973.0,13793.0,25900,21524,69708,2128,4359,9.0,1.0,The setting is Taisho era Japan. Tanjirou is a...,{'large': 'https://s4.anilist.co/file/anilistc...
667,87423,The Promised Neverland,Yakusoku no Neverland,MANGA,,2016.0,181.0,20.0,FINISHED,JP,False,"['Drama', 'Fantasy', 'Horror', 'Mystery', 'Psy...",80.0,80.0,92175,6278,195.0,95.0,272.0,571.0,1045.0,2092.0,4843.0,8485.0,9706.0,7050.0,23546,25187,33426,3550,6466,16.0,2.0,"Emma, Norman and Ray are the brightest kids at...",{'large': 'https://s4.anilist.co/file/anilistc...
748,132029,Dandadan,Dandadan,MANGA,,2021.0,,,RELEASING,JP,False,"['Action', 'Comedy', 'Drama', 'Romance', 'Sci-...",79.0,80.0,27269,1394,26.0,20.0,36.0,50.0,153.0,275.0,885.0,2098.0,1867.0,700.0,14475,10832,207,591,1164,8.0,1.0,"Ghosts, monsters, aliens, teen romance, battle...",{'large': 'https://s4.anilist.co/file/anilistc...
1257,87170,Fire Punch,Fire Punch,MANGA,,2016.0,83.0,8.0,FINISHED,JP,False,"['Action', 'Drama', 'Mystery', 'Psychological'...",77.0,77.0,40228,2448,93.0,59.0,142.0,233.0,505.0,966.0,2389.0,3756.0,3436.0,1605.0,4583,15721,17467,1045,1412,31.0,5.0,"Orphaned siblings Agni and Luna, like the Ice ...",{'large': 'https://s4.anilist.co/file/anilistc...
1272,120760,Kaiju No.8,Kaijuu 8-gou,MANGA,,2020.0,,,RELEASING,JP,False,"['Action', 'Adventure', 'Sci-Fi']",77.0,77.0,40631,1779,44.0,25.0,69.0,120.0,363.0,663.0,2094.0,3633.0,2548.0,906.0,23134,13843,505,1029,2120,37.0,1.0,A man working a job far removed from his child...,{'large': 'https://s4.anilist.co/file/anilistc...


---
# Extras
---

## Compare different methods of user similarities calculation

Develop a custom similarity evaluation metric by checking overlapping titles in media_list

In [8]:
# Get the media_list df
df_mlist = pd.read_csv("/mnt/disks/sdb/home/dy0904k/assets/media_list_all_users.csv")

In [9]:
# calculate similarity and similar user ids
top_10_similar_user_ids = ubf.get_similar_users_from_user_id(start_col=1, dist_metric="cosine_similarity", query_user_id=QUERY_USER_ID)

# Work out the average ratio of overlapping titles
ubf.evaluate_by_overlap_titles(top_10_similar_user_ids, query_user_id=QUERY_USER_ID)

0.3244751036399858

In [10]:
# Calculate user similarities: Euclidean

# calculate similarity and similar user ids
# need to add ascending=True
top_10_similar_user_ids = ubf.get_similar_users_from_user_id(start_col=1, dist_metric="euclidean_distances", query_user_id=QUERY_USER_ID, ascending=True)

# Work out the average ratio of overlapping titles
ubf.evaluate_by_overlap_titles(top_10_similar_user_ids, query_user_id=QUERY_USER_ID)

0.3467422785584834

In [11]:
# Calculate user similarities: Manhattan

# calculate similarity and similar user ids
top_10_similar_user_ids = ubf.get_similar_users_from_user_id(start_col=1, dist_metric="manhattan_distances", query_user_id=QUERY_USER_ID, ascending=True)

# Work out the average ratio of overlapping titles
ubf.evaluate_by_overlap_titles(top_10_similar_user_ids, query_user_id=QUERY_USER_ID)

0.359142546660906

---
### Personal task list
---

#### ToDo

- [x] Calculate user similarities
    - [x] Query by user_id -> users with similar taste -> recommend
    - [x] Query by picking out the favorite titles -> users with similar taste -> recommend
- [x] Similarity Evaluation: average of title overlap ratio of top 10 similar users
    - [x] cosine -> 0.256
    - [x] euclidean -> 0.231
    - [x] manhattan -> 0.275
- [x] Recommendation
    - [x] Try item-user matrix recommendation
    - [x] Work out what the querying user has not yet seen or watched. From that list:
        - [x] a) Pick the most popular titles
        - [x] b) From top 10 similar users, which title have you not seen/read but the others have?
- [x] Refactor
    - [x] Run recommendation but show matching df, so we can see how good it might be
    - [x] Separate sections for scaled & not scaled codes
    - [x] Convert methods into one class -> UserBasedFiltering
    - [x] Refactor UserBasedFiltering: 
        - [x] move the kNN.fit() to init and assign it to `self`

#### Other
- [x] Extra: Convert media list to network visualization -> too many edges to be visualized