## Concepts

From one current user find best correlated user

Recommend 5 best rated anime from the best correlated user

Use the best rated anime from current user and recommend 5 more 

Total 10 recommendation is provided 

## Import Library

In [107]:
#pip install pandas
import pandas as pd
import string
import re
import datetime

## Variables

In [108]:
start_time = datetime.datetime.now()
target_user_id = 0
target_vote_number = 2000
numberOfResult = 5

## Loading Dataset
▪ Loading anime.csv and rating.csv into DataFrame

In [109]:
anime_data = pd.read_csv('anime.csv')
anime_data = pd.DataFrame(anime_data)
anime_data

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [110]:
rating_data = pd.read_csv('rating.csv')
rating_data = pd.DataFrame(rating_data)
rating_data = rating_data[:5000000]

new_rating_data = pd.read_csv('newRating.csv')
rating_data = pd.DataFrame(rating_data)

rating_data = pd.concat([rating_data, new_rating_data])
rating_data

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1
...,...,...,...
74,75005,10620,9
75,75005,22319,10
76,75005,15689,10
77,75005,33286,4


# Data Preprocessing

## Change variable type to appropraite data type

In [111]:
#read datatype of each variable
anime_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [112]:
anime_data['episodes'] = anime_data['episodes'].replace('Unknown', -1)
anime_data = anime_data.astype({"name":"string","genre":"string","type":"string","episodes":"int64"})
anime_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  string 
 2   genre     12232 non-null  string 
 3   type      12269 non-null  string 
 4   episodes  12294 non-null  int64  
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(3), string(3)
memory usage: 672.5 KB


In [113]:
#all datatype is good
rating_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5000079 entries, 0 to 78
Data columns (total 3 columns):
 #   Column    Dtype
---  ------    -----
 0   user_id   int64
 1   anime_id  int64
 2   rating    int64
dtypes: int64(3)
memory usage: 152.6 MB


## Rename column

In [114]:
anime_data = anime_data.rename(columns={"rating": "average_rating"})
anime_data.head(3)

Unnamed: 0,anime_id,name,genre,type,episodes,average_rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262


In [115]:
rating_data = rating_data.rename(columns={"rating": "user_rating"})
rating_data.head(3)

Unnamed: 0,user_id,anime_id,user_rating
0,1,20,-1
1,1,24,-1
2,1,79,-1


## Check Missing Value

In [116]:
rating_data.isna().sum()

user_id        0
anime_id       0
user_rating    0
dtype: int64

In [117]:
anime_data.isna().sum()

anime_id            0
name                0
genre              62
type               25
episodes            0
average_rating    230
members             0
dtype: int64

In [118]:
anime_data.dropna(axis=0, inplace= True)
anime_data.isna().sum()

anime_id          0
name              0
genre             0
type              0
episodes          0
average_rating    0
members           0
dtype: int64

## Check Duplicates

In [119]:
#check duplicate anime
duplicated_anime = anime_data[anime_data.duplicated()].shape[0]
print(f'count of duplicated anime: {duplicated_anime}')

count of duplicated anime: 0


In [120]:
#check duplicate rating
duplicated_rating = rating_data[rating_data.duplicated()].shape[0]
print(f'count of duplicated rating: {duplicated_rating}')

count of duplicated rating: 1


In [121]:
rating_data.drop_duplicates(keep='first',inplace=True)
duplicated_rating = rating_data[rating_data.duplicated()].shape[0]
print(f'count of duplicated rating after removing: {duplicated_rating}')

count of duplicated rating after removing: 0


## Clean Text

In [122]:
def clean_text(text):
    # remove sybmols and other words
    text = re.sub(r'<[^>]*>', '', text)
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'&quot;', '', text)
    text = re.sub(r'.hack//', '', text)
    text = re.sub(r'&#039;', '', text)
    text = re.sub(r'A&#039;s', '', text)
    text = re.sub(r'I&#039;', 'I\'', text)
    text = re.sub(r'&amp;', 'and', text)
    #text = re.sub('[^a-zA-Z0-9\s]','', text)
  
    # remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text

In [123]:
#before clean text
pd.DataFrame(anime_data.loc[anime_data['anime_id'] == 9969])

Unnamed: 0,anime_id,name,genre,type,episodes,average_rating,members
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [124]:
anime_data.loc[:, 'name'] = anime_data['name'].apply(clean_text)

In [125]:
#after clean text 28977 9969 15417
pd.DataFrame(anime_data.loc[anime_data['anime_id'] == 9969])

Unnamed: 0,anime_id,name,genre,type,episodes,average_rating,members
4,9969,Gintama,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


## Merge Dataset by anime_id

In [126]:
raw_merged_data = pd.merge(rating_data, anime_data, on='anime_id', sort = True)
raw_merged_data.sort_values(by='anime_id', ascending= False)

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,average_rating,members
4999993,13954,34475,6,Monster Strike Rain of Memories,"Action, Fantasy, Game",ONA,1,6.58,313
4999992,47765,34367,6,Long Riders Recap,"Comedy, Shounen, Slice of Life, Sports",Special,1,5.97,582
4999991,30565,34367,6,Long Riders Recap,"Comedy, Shounen, Slice of Life, Sports",Special,1,5.97,582
4999990,47421,34349,-1,Diamond no Ace Second Season OVA,"Comedy, School, Shounen, Sports",OVA,2,7.25,1885
4999989,40821,34349,-1,Diamond no Ace Second Season OVA,"Comedy, School, Shounen, Sports",OVA,2,7.25,1885
...,...,...,...,...,...,...,...,...,...
6272,29917,1,-1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
6271,29903,1,10,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
6270,29899,1,8,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
6269,29895,1,-1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824


## Drop Unnessary Row
Drop the row that less than min_vote_number

In [127]:
#get number of rating for each anime
grouped_data = pd.DataFrame(raw_merged_data.groupby('anime_id')['user_rating'].count()).sort_values(by='user_rating', ascending= False)
grouped_data.describe()

Unnamed: 0,user_rating
count,9897.0
mean,505.202991
std,1420.236138
min,1.0
25%,7.0
50%,45.0
75%,305.0
max,25637.0


In [128]:
#get the anime that fulfilled the constaint which is less than mean value
trimmed_data = grouped_data[grouped_data['user_rating'] > grouped_data.mean()[0]]
#trim the data
raw_merged_data = raw_merged_data.loc[raw_merged_data['anime_id'].isin(trimmed_data.index.values.tolist())]
raw_merged_data

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,average_rating,members
0,13,1,-1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
1,19,1,10,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
2,21,1,9,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
3,23,1,9,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
4,32,1,10,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
...,...,...,...,...,...,...,...,...,...
4999940,47536,34240,10,Shelter,"Music, Sci-Fi",Music,1,8.38,71136
4999941,47678,34240,-1,Shelter,"Music, Sci-Fi",Music,1,8.38,71136
4999942,47701,34240,-1,Shelter,"Music, Sci-Fi",Music,1,8.38,71136
4999943,47702,34240,7,Shelter,"Music, Sci-Fi",Music,1,8.38,71136


# PART 1: User Based Correlation

## Read target user_id from user

In [129]:
target_user_id = int(input('Enter target user_id (75000 to 75005 is the new added user): '))
while target_user_id not in rating_data['user_id'].unique():
    target_user_id = int(input('Enter valid target user_id (75000 to 75005 is the new added user): '))

## Find rating_anime_matrix

In [130]:
rating_anime_matrix = raw_merged_data.pivot_table(index='user_id', columns=['anime_id'], values = 'user_rating', fill_value= -1)
rating_anime_matrix.head()

anime_id,1,5,6,7,15,16,18,19,20,22,...,32668,32681,32729,32828,32935,32998,33028,33558,34103,34240
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
3,-1,-1,-1,-1,-1,-1,-1,-1,8,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
4,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
5,-1,-1,8,-1,6,-1,6,-1,6,5,...,-1,-1,-1,7,-1,-1,-1,-1,-1,-1


## Find anime_rating_matrix

In [131]:
anime_rating_matrix = rating_anime_matrix.transpose()
anime_rating_matrix.head()

user_id,1,2,3,4,5,6,7,8,9,10,...,47846,47847,47848,47849,75000,75001,75002,75003,75004,75005
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,6.0,9.0,-1.0,-1.0,-1.0,-1.0,-1.0
5,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
6,-1.0,-1.0,-1.0,-1.0,8.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,6.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
7,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
15,-1.0,-1.0,-1.0,-1.0,6.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


## Find the information of target_user_id

In [132]:
anime_rating_user = anime_rating_matrix[target_user_id]
pd.DataFrame(anime_rating_user.sort_values(ascending= False)).rename(columns={target_user_id: f"user_id {target_user_id}'s rating"})

Unnamed: 0_level_0,user_id 75001's rating
anime_id,Unnamed: 1_level_1
5114,9.0
11061,9.0
1535,9.0
136,8.0
14719,8.0
...,...
3228,-1.0
3226,-1.0
3225,-1.0
3221,-1.0


## Find Correlation between target_user_id with all the other users

In [133]:
similar_users = anime_rating_matrix.corrwith(anime_rating_user).dropna()
similar_users

user_id
1       -0.002746
2       -0.001372
3        0.098112
5        0.115442
7        0.058415
           ...   
75001    1.000000
75002   -0.004323
75003    0.154151
75004   -0.005424
75005   -0.004732
Length: 45115, dtype: float64

## Sort and Format into Data Frame

In [134]:
sorted_similar_users = pd.DataFrame(similar_users, columns=['correlation']).sort_values(ascending= False, by= 'correlation')
sorted_similar_users

Unnamed: 0_level_0,correlation
user_id,Unnamed: 1_level_1
75001,1.000000
47354,0.740115
9639,0.620118
29006,0.565519
43533,0.565519
...,...
19593,-0.039100
10307,-0.039976
22987,-0.042602
10826,-0.046088


## Find the best correlated user

In [135]:
#most similar user is the user that at index number 1
most_similar_user = sorted_similar_users.index.values
most_similar_user = most_similar_user[1]
most_similar_user

47354

## Find 5 best rated anime from the user that have highest correlation value

In [136]:
#most similar user is the user that at index number 1
most_similar_user = sorted_similar_users.index.values
most_similar_user = most_similar_user[1]
most_similar_user

#sort value by rating
target_user = raw_merged_data.loc[raw_merged_data['user_id'] == target_user_id].sort_values(by= 'user_rating', ascending= False)

user_recommendation = raw_merged_data[raw_merged_data['user_id'] == most_similar_user]
user_recommendation = user_recommendation.sort_values(by= 'user_rating', ascending= False)

#Trim the result exist in user's rating
user_recommendation = user_recommendation[~user_recommendation['anime_id'].isin(target_user['anime_id'].tolist())]

In [137]:
#validate the number of recommendation fulfill requirement, if not, move to the second highest correlation user
n = 1
while(len(user_recommendation) < 5):
    n = n + 1
    most_similar_user = sorted_similar_users.index.values
    most_similar_user = most_similar_user[n]
    most_similar_user
    
    target_user = raw_merged_data.loc[raw_merged_data['user_id'] == target_user_id].sort_values(by= 'user_rating', ascending= False)
    user_recommendation = raw_merged_data[raw_merged_data['user_id'] == most_similar_user]
    user_recommendation = user_recommendation.sort_values(by= 'user_rating', ascending= False)
    user_recommendation = user_recommendation[~user_recommendation['anime_id'].isin(target_user['anime_id'].tolist())]
    
user_recommendation = user_recommendation[:numberOfResult]
user_recommendation

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,average_rating,members
475925,38352,269,10,Bleach,"Action, Comedy, Shounen, Super Power, Supernat...",TV,366,7.95,624055
248491,38352,137,9,Hunter x Hunter OVA,"Action, Adventure, Shounen, Super Power",OVA,8,8.41,53168
42643,38352,20,-1,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220,7.81,683297
250216,38352,138,-1,Hunter x Hunter Greed Island,"Action, Adventure, Shounen, Super Power",OVA,8,8.33,57029
251904,38352,139,-1,Hunter x Hunter Greed Island Final,"Action, Adventure, Shounen, Super Power",OVA,14,8.41,55787


In [138]:
user_recommendation_list = user_recommendation['anime_id'].tolist()
user_recommendation_list

[269, 137, 20, 138, 139]

# PART 2: Item Based Correlation

## Use the user's highest rated anime to recommend other related anime

In [139]:
source_user_anime_rating = raw_merged_data.loc[raw_merged_data['user_id'] == target_user_id].sort_values(by= 'user_rating', ascending= False)
pd.DataFrame(source_user_anime_rating.head(10))

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,average_rating,members
1216237,75001,1535,9,Death Note,"Mystery, Police, Psychological, Supernatural, ...",TV,37,8.71,1013917
2110435,75001,5114,9,Fullmetal Alchemist Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
3249376,75001,11061,9,Hunter x Hunter 2011,"Action, Adventure, Shounen, Super Power",TV,148,9.13,425855
247187,75001,136,8,Hunter x Hunter,"Action, Adventure, Shounen, Super Power",TV,62,8.48,166255
3721627,75001,14719,8,JoJo no Kimyou na Bouken TV,"Action, Adventure, Shounen, Supernatural, Vampire",TV,26,8.51,190197
4301560,75001,20899,8,JoJo no Kimyou na Bouken Stardust Crusaders,"Action, Adventure, Drama, Shounen, Supernatural",TV,24,8.25,120089
4648289,75001,26055,8,JoJo no Kimyou na Bouken Stardust Crusaders 2n...,"Action, Adventure, Drama, Shounen, Supernatural",TV,24,8.6,93657


In [140]:
top_rated_anime_id = source_user_anime_rating['anime_id'].tolist()[0]
top_rated_anime_id

1535

In [141]:
ratings = rating_anime_matrix[top_rated_anime_id]
pd.DataFrame(ratings).rename(columns={top_rated_anime_id: f"{top_rated_anime_id}'s rating"})

Unnamed: 0_level_0,1535's rating
user_id,Unnamed: 1_level_1
1,-1
2,-1
3,10
4,-1
5,4
...,...
75001,9
75002,-1
75003,-1
75004,-1


In [142]:
similar_animes = rating_anime_matrix.corrwith(ratings)
similar_animes = pd.DataFrame(similar_animes, columns=['correlation'])
similar_animes

Unnamed: 0_level_0,correlation
anime_id,Unnamed: 1_level_1
1,0.186201
5,0.111462
6,0.163407
7,0.062110
15,0.093198
...,...
32998,0.093589
33028,0.060904
33558,0.043390
34103,0.054228


## Identify the most correlated anime

In [143]:
sorted_similar_anime = pd.DataFrame(similar_animes, columns=['correlation']).sort_values(by= 'correlation', ascending= False)
sorted_similar_anime

Unnamed: 0_level_0,correlation
anime_id,Unnamed: 1_level_1
1535,1.000000
1575,0.389486
2904,0.372149
226,0.340129
5114,0.332932
...,...
9441,0.012667
1110,0.011173
6586,0.009670
521,0.008952


## Eliminate the source anime

In [144]:
sorted_similar_anime = sorted_similar_anime[1:]
sorted_similar_anime

Unnamed: 0_level_0,correlation
anime_id,Unnamed: 1_level_1
1575,0.389486
2904,0.372149
226,0.340129
5114,0.332932
16498,0.316129
...,...
9441,0.012667
1110,0.011173
6586,0.009670
521,0.008952


## Ensure the identified anime is popular
Higher number of votes means more popular

In [145]:
#Get number of rating for each anime
rating_votes = pd.DataFrame(raw_merged_data.groupby('anime_id')['user_rating'].count())
rating_votes=rating_votes.rename(columns={'user_rating': 'rating_count'})

KeyboardInterrupt: 

In [None]:
similar_animes_ratings = sorted_similar_anime.join(rating_votes['rating_count']).sort_values(by = 'correlation', ascending = False)
similar_animes_ratings

Unnamed: 0_level_0,correlation,rating_count
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1
121,0.444149,15504
2904,0.368395,15663
1575,0.363553,17718
9253,0.361184,13579
16498,0.340635,20603
...,...,...
2148,0.001416,1023
719,0.000911,689
521,0.000674,805
1110,-0.001450,521


## Get the anime that have higher votes and have higher correlation

In [None]:
similar_popular_animes = similar_animes_ratings.loc[similar_animes_ratings['rating_count']>=target_vote_number].dropna()
similar_popular_animes.head()

Unnamed: 0_level_0,correlation,rating_count
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1
121,0.444149,15504
2904,0.368395,15663
1575,0.363553,17718
9253,0.361184,13579
16498,0.340635,20603


In [None]:
#trim the result exist in user's rating
similar_popular_animes = similar_popular_animes[~similar_popular_animes.index.isin(target_user['anime_id'].tolist())]
similar_popular_animes.head()

Unnamed: 0_level_0,correlation,rating_count
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1
121,0.444149,15504
2904,0.368395,15663
16498,0.340635,20603
10087,0.339758,9925
1535,0.332932,25637


In [None]:
#make it a list
most_similar_popular_animes = similar_popular_animes[:numberOfResult]
most_similar_popular_animes_list = most_similar_popular_animes.index.to_list()
most_similar_popular_animes_list

[121, 2904, 16498, 10087, 1535]

# Result Showing

In [None]:
final_recommendation_result = user_recommendation_list.copy()
final_recommendation_result += most_similar_popular_animes_list.copy()
final_recommendation_result

[2904, 1535, 16498, 1210, 18679, 121, 2904, 16498, 10087, 1535]

## Show Source User's Rating

In [None]:
#target_user = raw_merged_data.loc[raw_merged_data['user_id'] == target_user_id].sort_values(by= 'user_rating', ascending= False)
pd.DataFrame(source_user_anime_rating)

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,average_rating,members
2110434,75000,5114,10,Fullmetal Alchemist Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2830802,75000,9253,10,SteinsGate,"Sci-Fi, Thriller",TV,24,9.17,673572
4989857,75000,32935,10,Haikyuu Karasuno Koukou VS Shiratorizawa Gakue...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351
9407,75000,1,9,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
1257730,75000,1575,9,Code Geass Hangyaku no Lelouch,"Action, Mecha, Military, School, Sci-Fi, Super...",TV,25,8.83,715151
3249375,75000,11061,9,Hunter x Hunter 2011,"Action, Adventure, Shounen, Super Power",TV,148,9.13,425855
4836678,75000,30276,9,One Punch Man,"Action, Comedy, Parody, Sci-Fi, Seinen, Super ...",TV,12,8.82,552458
247186,75000,136,8,Hunter x Hunter,"Action, Adventure, Shounen, Super Power",TV,62,8.48,166255
1411132,75000,2001,8,Tengen Toppa Gurren Lagann,"Action, Adventure, Comedy, Mecha, Sci-Fi",TV,27,8.78,562962
1974841,75000,4565,8,Tengen Toppa Gurren Lagann Movie Lagannhen,"Action, Mecha, Sci-Fi, Space, Super Power",Movie,1,8.64,82253


## Show User Correlation Table

In [None]:
sorted_similar_users

Unnamed: 0_level_0,correlation
user_id,Unnamed: 1_level_1
75000,1.000000
4699,0.494599
8581,0.456285
29476,0.447517
39488,0.446689
...,...
34321,-0.040832
6361,-0.041038
37351,-0.041923
1497,-0.043427


## Show Most Similar User's Rating
- The user might not be the most correlated if the number of rated anime is less than 5

In [None]:
most_similar_user_df = raw_merged_data.loc[raw_merged_data['user_id'] == most_similar_user].sort_values(by= 'user_rating', ascending= False)
most_similar_user_df.head(10)

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,average_rating,members
1616376,4699,2904,10,Code Geass Hangyaku no Lelouch R2,"Action, Drama, Mecha, Military, Sci-Fi, Super ...",TV,25,8.98,572888
2818709,4699,9253,10,SteinsGate,"Sci-Fi, Thriller",TV,24,9.17,673572
1193075,4699,1535,10,Death Note,"Mystery, Police, Psychological, Supernatural, ...",TV,37,8.71,1013917
1400088,4699,2001,9,Tengen Toppa Gurren Lagann,"Action, Adventure, Comedy, Mecha, Sci-Fi",TV,27,8.78,562962
3915425,4699,16498,9,Shingeki no Kyojin,"Action, Drama, Fantasy, Shounen, Super Power",TV,25,8.54,896229
2095388,4699,5114,9,Fullmetal Alchemist Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
1241671,4699,1575,9,Code Geass Hangyaku no Lelouch,"Action, Mecha, Military, School, Sci-Fi, Super...",TV,25,8.83,715151
839,4699,1,8,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
1097486,4699,1210,8,NHK ni Youkoso,"Comedy, Drama, Psychological, Romance",TV,24,8.4,291228
4130170,4699,18679,8,Kill la Kill,"Action, Comedy, School, Super Power",TV,24,8.23,508118


## Show Target Anime's information 

In [None]:
target_search_anime = anime_data.loc[anime_data['anime_id'] == top_rated_anime_id]
pd.DataFrame(target_search_anime)

Unnamed: 0,anime_id,name,genre,type,episodes,average_rating,members
1,5114,Fullmetal Alchemist Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665


## Show Anime Correlation Table

In [None]:
most_similar_popular_animes_df = anime_data.loc[anime_data['anime_id'].isin(most_similar_popular_animes_list)]
most_similar_popular_animes_df

Unnamed: 0,anime_id,name,genre,type,episodes,average_rating,members
13,2904,Code Geass Hangyaku no Lelouch R2,"Action, Drama, Mecha, Military, Sci-Fi, Super ...",TV,25,8.98,572888
40,1535,Death Note,"Mystery, Police, Psychological, Supernatural, ...",TV,37,8.71,1013917
86,16498,Shingeki no Kyojin,"Action, Drama, Fantasy, Shounen, Super Power",TV,25,8.54,896229
94,10087,FateZero,"Action, Fantasy, Supernatural",TV,13,8.51,453630
200,121,Fullmetal Alchemist,"Action, Adventure, Comedy, Drama, Fantasy, Mag...",TV,51,8.33,600384


## Show Final Recommendation Result
- The recommendation might not enought 10 if there is duplicated anime

In [None]:
#Drop the duplicated anime
recommended_anime = anime_data.loc[anime_data['anime_id'].isin(final_recommendation_result)]
recommended_anime.drop_duplicates(subset=['anime_id'], keep='first')

pd.DataFrame(recommended_anime)

Unnamed: 0,anime_id,name,genre,type,episodes,average_rating,members
13,2904,Code Geass Hangyaku no Lelouch R2,"Action, Drama, Mecha, Military, Sci-Fi, Super ...",TV,25,8.98,572888
40,1535,Death Note,"Mystery, Police, Psychological, Supernatural, ...",TV,37,8.71,1013917
86,16498,Shingeki no Kyojin,"Action, Drama, Fantasy, Shounen, Super Power",TV,25,8.54,896229
94,10087,FateZero,"Action, Fantasy, Supernatural",TV,13,8.51,453630
156,1210,NHK ni Youkoso,"Comedy, Drama, Psychological, Romance",TV,24,8.4,291228
200,121,Fullmetal Alchemist,"Action, Adventure, Comedy, Drama, Fantasy, Mag...",TV,51,8.33,600384
281,18679,Kill la Kill,"Action, Comedy, School, Super Power",TV,24,8.23,508118


In [None]:
end_time = datetime.datetime.now()
# Calculate the time elapsed
elapsed_time = end_time - start_time

# Print the elapsed time in seconds
print("Elapsed time in seconds:", elapsed_time.total_seconds())

Elapsed time in seconds: 55.520153
