## Import Library

In [1]:
#pip install pandas
import pandas as pd
import numpy as np
import string
import re
import datetime

## Variables

In [2]:
start_time = datetime.datetime.now()
target_user_id = 0
target_vote_number = 2000
numberOfResult = 5

## Loading Dataset
- Loading anime.csv, rating.csv and newRating.csv into DataFrame

In [3]:
anime_data = pd.read_csv('anime.csv')
anime_data = pd.DataFrame(anime_data)
anime_data

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [4]:
rating_data = pd.read_csv('rating.csv')
rating_data = pd.DataFrame(rating_data)
rating_data = rating_data[:5000000]

new_rating_data = pd.read_csv('newRating.csv')
rating_data = pd.DataFrame(rating_data)

rating_data = pd.concat([rating_data, new_rating_data])
rating_data

Unnamed: 0,user_id,anime_id,rating
0,1.0,20.0,-1.0
1,1.0,24.0,-1.0
2,1.0,79.0,-1.0
3,1.0,226.0,-1.0
4,1.0,241.0,-1.0
...,...,...,...
113,75009.0,18617.0,5.0
114,75009.0,25781.0,9.0
115,75009.0,5081.0,7.0
116,75009.0,29755.0,8.0


# Data Preprocessing

## Change variable type to appropraite data type

In [5]:
#anime file datatype
anime_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [6]:
anime_data['episodes'] = anime_data['episodes'].replace('Unknown', -1)
anime_data = anime_data.astype({"name":"string","genre":"string","type":"string","episodes":"int64"})
anime_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  string 
 2   genre     12232 non-null  string 
 3   type      12269 non-null  string 
 4   episodes  12294 non-null  int64  
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(3), string(3)
memory usage: 672.5 KB


In [7]:
#rating file datatype
rating_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5000118 entries, 0 to 117
Data columns (total 3 columns):
 #   Column    Dtype  
---  ------    -----  
 0   user_id   float64
 1   anime_id  float64
 2   rating    float64
dtypes: float64(3)
memory usage: 152.6 MB


In [8]:
rating_data = rating_data.replace([np.inf, -np.inf, np.nan], 0)
rating_data = rating_data.astype({"user_id":"int64", "anime_id":"int64", "rating":"int64"})
rating_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5000118 entries, 0 to 117
Data columns (total 3 columns):
 #   Column    Dtype
---  ------    -----
 0   user_id   int64
 1   anime_id  int64
 2   rating    int64
dtypes: int64(3)
memory usage: 152.6 MB


## Rename column

In [9]:
anime_data = anime_data.rename(columns={"rating": "average_rating"})
anime_data.head(3)

Unnamed: 0,anime_id,name,genre,type,episodes,average_rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262


In [10]:
rating_data = rating_data.rename(columns={"rating": "user_rating"})
rating_data.head(3)

Unnamed: 0,user_id,anime_id,user_rating
0,1,20,-1
1,1,24,-1
2,1,79,-1


## Check Missing Value

In [11]:
rating_data.isna().sum()

user_id        0
anime_id       0
user_rating    0
dtype: int64

In [12]:
anime_data.isna().sum()

anime_id            0
name                0
genre              62
type               25
episodes            0
average_rating    230
members             0
dtype: int64

In [13]:
anime_data.dropna(axis=0, inplace= True)
anime_data.isna().sum()

anime_id          0
name              0
genre             0
type              0
episodes          0
average_rating    0
members           0
dtype: int64

## Check Duplicates

In [14]:
#check duplicate anime
duplicated_anime = anime_data[anime_data.duplicated()].shape[0]
print(f'count of duplicated anime: {duplicated_anime}')

count of duplicated anime: 0


In [15]:
#check duplicate rating
duplicated_rating = rating_data[rating_data.duplicated()].shape[0]
print(f'count of duplicated rating: {duplicated_rating}')

count of duplicated rating: 2


In [16]:
rating_data.drop_duplicates(keep='first',inplace=True)
duplicated_rating = rating_data[rating_data.duplicated()].shape[0]
print(f'count of duplicated rating after removing: {duplicated_rating}')

count of duplicated rating after removing: 0


## Clean Text

In [17]:
def clean_text(text):
    # remove sybmols and other words
    text = re.sub(r'<[^>]*>', '', text)
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'&quot;', '', text)
    text = re.sub(r'.hack//', '', text)
    text = re.sub(r'&#039;', '', text)
    text = re.sub(r'A&#039;s', '', text)
    text = re.sub(r'I&#039;', 'I\'', text)
    text = re.sub(r'&amp;', 'and', text)
  
    # remove punctuation
    #text = text.translate(str.maketrans('', '', string.punctuation))
    return text

In [18]:
#before clean text
pd.DataFrame(anime_data.loc[anime_data['anime_id'] == 9969])

Unnamed: 0,anime_id,name,genre,type,episodes,average_rating,members
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [19]:
anime_data.loc[:, 'name'] = anime_data['name'].apply(clean_text)

In [20]:
#after clean text 28977 9969 15417
pd.DataFrame(anime_data.loc[anime_data['anime_id'] == 9969])

Unnamed: 0,anime_id,name,genre,type,episodes,average_rating,members
4,9969,Gintama,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


## Merge Dataset by anime_id

In [21]:
raw_merged_data = pd.merge(rating_data, anime_data, on='anime_id', sort = True)
raw_merged_data.sort_values(by='anime_id', ascending= False)

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,average_rating,members
5000030,13954,34475,6,Monster Strike: Rain of Memories,"Action, Fantasy, Game",ONA,1,6.58,313
5000029,47765,34367,6,Long Riders Recap,"Comedy, Shounen, Slice of Life, Sports",Special,1,5.97,582
5000028,30565,34367,6,Long Riders Recap,"Comedy, Shounen, Slice of Life, Sports",Special,1,5.97,582
5000027,47421,34349,-1,Diamond no Ace: Second Season OVA,"Comedy, School, Shounen, Sports",OVA,2,7.25,1885
5000026,40821,34349,-1,Diamond no Ace: Second Season OVA,"Comedy, School, Shounen, Sports",OVA,2,7.25,1885
...,...,...,...,...,...,...,...,...,...
6272,29917,1,-1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
6271,29903,1,10,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
6270,29899,1,8,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
6269,29895,1,-1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824


## Drop Unnessary Row
Drop the row that less than rating number's mean value

In [22]:
#get number of rating for each anime
grouped_data = pd.DataFrame(raw_merged_data.groupby('anime_id')['user_rating'].count()).sort_values(by='user_rating', ascending= False)
grouped_data.describe()

Unnamed: 0,user_rating
count,9897.0
mean,505.206729
std,1420.249029
min,1.0
25%,7.0
50%,45.0
75%,305.0
max,25637.0


In [23]:
#get the anime that fulfilled the constaint which is more than mean value
trimmed_data = grouped_data[grouped_data['user_rating'] > grouped_data.mean()[0]]
#trim the data
raw_merged_data = raw_merged_data.loc[raw_merged_data['anime_id'].isin(trimmed_data.index.values.tolist())]
raw_merged_data

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,average_rating,members
0,13,1,-1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
1,19,1,10,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
2,21,1,9,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
3,23,1,9,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
4,32,1,10,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
...,...,...,...,...,...,...,...,...,...
4999977,47536,34240,10,Shelter,"Music, Sci-Fi",Music,1,8.38,71136
4999978,47678,34240,-1,Shelter,"Music, Sci-Fi",Music,1,8.38,71136
4999979,47701,34240,-1,Shelter,"Music, Sci-Fi",Music,1,8.38,71136
4999980,47702,34240,7,Shelter,"Music, Sci-Fi",Music,1,8.38,71136


# PART 1: User Based Correlation

## Read target user_id from user

In [24]:
target_user_id = int(input('Enter target user_id (75000 to 75009 is the new added user): '))
while target_user_id not in rating_data['user_id'].unique():
    target_user_id = int(input('Enter valid target user_id (75000 to 75009 is the new added user): '))

## Find rating_anime_matrix

In [25]:
rating_anime_matrix = raw_merged_data.pivot_table(index='user_id', columns=['anime_id'], values = 'user_rating', fill_value= -1)
rating_anime_matrix.head()

anime_id,1,5,6,7,15,16,18,19,20,22,...,32668,32681,32729,32828,32935,32998,33028,33558,34103,34240
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
3,-1,-1,-1,-1,-1,-1,-1,-1,8,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
4,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
5,-1,-1,8,-1,6,-1,6,-1,6,5,...,-1,-1,-1,7,-1,-1,-1,-1,-1,-1


## Find anime_rating_matrix

In [26]:
anime_rating_matrix = rating_anime_matrix.transpose()
anime_rating_matrix.head()

user_id,1,2,3,4,5,6,7,8,9,10,...,75000,75001,75002,75003,75004,75005,75006,75007,75008,75009
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,9.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
5,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
6,-1.0,-1.0,-1.0,-1.0,8.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
7,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
15,-1.0,-1.0,-1.0,-1.0,6.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


## Find the information of target_user_id

In [27]:
anime_rating_user = anime_rating_matrix[target_user_id]
pd.DataFrame(anime_rating_user.sort_values(ascending= False)).rename(columns={target_user_id: f"user_id {target_user_id}'s rating"})

Unnamed: 0_level_0,user_id 75000's rating
anime_id,Unnamed: 1_level_1
5114,10.0
9253,10.0
32935,10.0
1,9.0
11061,9.0
...,...
3228,-1.0
3226,-1.0
3225,-1.0
3221,-1.0


## Find Correlation between target_user_id with all the other users

In [28]:
similar_users = anime_rating_matrix.corrwith(anime_rating_user).dropna()
similar_users

user_id
1       -0.003736
2       -0.001867
3        0.058397
5        0.058647
7        0.044381
           ...   
75005   -0.006439
75006    0.322018
75007   -0.005257
75008    0.258323
75009    0.254479
Length: 45119, dtype: float64

## Sort and Format into Data Frame

In [29]:
sorted_similar_users = pd.DataFrame(similar_users, columns=['correlation']).sort_values(ascending= False, by= 'correlation')
sorted_similar_users

Unnamed: 0_level_0,correlation
user_id,Unnamed: 1_level_1
75000,1.000000
4699,0.494599
8581,0.456285
29476,0.447517
39488,0.446689
...,...
34321,-0.040832
6361,-0.041038
37351,-0.041923
1497,-0.043427


## Find the best correlated user

In [30]:
#most similar user is the user that at index number 1
most_similar_user = sorted_similar_users.index.values
most_similar_user = most_similar_user[1]
most_similar_user

4699

## Find 5 best rated anime from the user that have highest correlation value

In [31]:
#most similar user is the user that at index number 1
most_similar_user = sorted_similar_users.index.values
most_similar_user = most_similar_user[1]

#sort value by rating
user_recommendation = raw_merged_data[raw_merged_data['user_id'] == most_similar_user]
user_recommendation = user_recommendation.sort_values(by= 'user_rating', ascending= False)

#Find target_user's rated anime
target_user = raw_merged_data.loc[raw_merged_data['user_id'] == target_user_id].sort_values(by= 'user_rating', ascending= False)

#Trim the result exist in user's rating
user_recommendation = user_recommendation[~user_recommendation['anime_id'].isin(target_user['anime_id'].tolist())]

In [32]:
#validate the number of recommendation fulfill requirement, if not, move to the second highest correlation user
n = 1
while(len(user_recommendation) < 5):
    n = n + 1
    #most similar user is the user that at index number n
    most_similar_user = sorted_similar_users.index.values
    most_similar_user = most_similar_user[n]

    #sort value by rating
    user_recommendation = raw_merged_data[raw_merged_data['user_id'] == most_similar_user]
    user_recommendation = user_recommendation.sort_values(by= 'user_rating', ascending= False)

    #Find target_user's rated anime
    target_user = raw_merged_data.loc[raw_merged_data['user_id'] == target_user_id].sort_values(by= 'user_rating', ascending= False)

    #Trim the result exist in user's rating
    user_recommendation = user_recommendation[~user_recommendation['anime_id'].isin(target_user['anime_id'].tolist())]
    
user_recommendation = user_recommendation[:numberOfResult]
user_recommendation

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,average_rating,members
1616383,4699,2904,10,Code Geass: Hangyaku no Lelouch R2,"Action, Drama, Mecha, Military, Sci-Fi, Super ...",TV,25,8.98,572888
1193081,4699,1535,10,Death Note,"Mystery, Police, Psychological, Supernatural, ...",TV,37,8.71,1013917
3915445,4699,16498,9,Shingeki no Kyojin,"Action, Drama, Fantasy, Shounen, Super Power",TV,25,8.54,896229
1097492,4699,1210,8,NHK ni Youkoso!,"Comedy, Drama, Psychological, Romance",TV,24,8.4,291228
4130192,4699,18679,8,Kill la Kill,"Action, Comedy, School, Super Power",TV,24,8.23,508118


In [33]:
user_recommendation_list = user_recommendation['anime_id'].tolist()
user_recommendation_list

[2904, 1535, 16498, 1210, 18679]

# PART 2: Item Based Correlation

## Use the user's highest rated anime to recommend other related anime

In [34]:
source_user_anime_rating = raw_merged_data.loc[raw_merged_data['user_id'] == target_user_id].sort_values(by= 'user_rating', ascending= False)
pd.DataFrame(source_user_anime_rating.head(10))

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,average_rating,members
2110444,75000,5114,10,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2830815,75000,9253,10,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4989894,75000,32935,10,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351
9407,75000,1,9,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
1257736,75000,1575,9,Code Geass: Hangyaku no Lelouch,"Action, Mecha, Military, School, Sci-Fi, Super...",TV,25,8.83,715151
3249393,75000,11061,9,Hunter x Hunter (2011),"Action, Adventure, Shounen, Super Power",TV,148,9.13,425855
4836711,75000,30276,9,One Punch Man,"Action, Comedy, Parody, Sci-Fi, Seinen, Super ...",TV,12,8.82,552458
247188,75000,136,8,Hunter x Hunter,"Action, Adventure, Shounen, Super Power",TV,62,8.48,166255
1411138,75000,2001,8,Tengen Toppa Gurren Lagann,"Action, Adventure, Comedy, Mecha, Sci-Fi",TV,27,8.78,562962
1974850,75000,4565,8,Tengen Toppa Gurren Lagann Movie: Lagann-hen,"Action, Mecha, Sci-Fi, Space, Super Power",Movie,1,8.64,82253


In [35]:
top_rated_anime_id = source_user_anime_rating['anime_id'].tolist()[0]
top_rated_anime_id

5114

In [36]:
ratings = rating_anime_matrix[top_rated_anime_id]
pd.DataFrame(ratings).rename(columns={top_rated_anime_id: f"{top_rated_anime_id}'s rating"})

Unnamed: 0_level_0,5114's rating
user_id,Unnamed: 1_level_1
1,-1
2,-1
3,10
4,-1
5,-1
...,...
75005,-1
75006,8
75007,-1
75008,-1


In [37]:
similar_animes = rating_anime_matrix.corrwith(ratings)
similar_animes = pd.DataFrame(similar_animes, columns=['correlation'])
similar_animes

Unnamed: 0_level_0,correlation
anime_id,Unnamed: 1_level_1
1,0.240698
5,0.136156
6,0.207397
7,0.062925
15,0.111955
...,...
32998,0.114893
33028,0.074868
33558,0.079880
34103,0.069608


## Identify the most correlated anime

In [38]:
sorted_similar_anime = pd.DataFrame(similar_animes, columns=['correlation']).sort_values(by= 'correlation', ascending= False)
sorted_similar_anime

Unnamed: 0_level_0,correlation
anime_id,Unnamed: 1_level_1
5114,1.000000
121,0.444110
2904,0.368360
1575,0.363516
9253,0.361173
...,...
2148,0.001413
719,0.000908
521,0.000672
1110,-0.001451


## Eliminate the source anime

In [39]:
sorted_similar_anime = sorted_similar_anime[1:]
sorted_similar_anime

Unnamed: 0_level_0,correlation
anime_id,Unnamed: 1_level_1
121,0.444110
2904,0.368360
1575,0.363516
9253,0.361173
16498,0.340596
...,...
2148,0.001413
719,0.000908
521,0.000672
1110,-0.001451


## Ensure the identified anime is popular
Higher number of votes means more popular

In [40]:
#Get number of rating for each anime
rating_votes = pd.DataFrame(raw_merged_data.groupby('anime_id')['user_rating'].count())
rating_votes=rating_votes.rename(columns={'user_rating': 'rating_count'})

In [41]:
similar_animes_ratings = sorted_similar_anime.join(rating_votes['rating_count']).sort_values(by = 'correlation', ascending = False)
similar_animes_ratings

Unnamed: 0_level_0,correlation,rating_count
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1
121,0.444110,15504
2904,0.368360,15663
1575,0.363516,17718
9253,0.361173,13581
16498,0.340596,20603
...,...,...
2148,0.001413,1023
719,0.000908,689
521,0.000672,805
1110,-0.001451,521


## Get the anime that have higher votes and have higher correlation

In [42]:
similar_popular_animes = similar_animes_ratings.loc[similar_animes_ratings['rating_count']>=target_vote_number].dropna()
similar_popular_animes

Unnamed: 0_level_0,correlation,rating_count
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1
121,0.444110,15504
2904,0.368360,15663
1575,0.363516,17718
9253,0.361173,13581
16498,0.340596,20603
...,...,...
4814,0.018440,3137
2923,0.017166,2799
149,0.009796,2572
243,0.008719,2261


In [43]:
#trim the result exist in user's rating
similar_popular_animes = similar_popular_animes[~similar_popular_animes.index.isin(target_user['anime_id'].tolist())]
similar_popular_animes

Unnamed: 0_level_0,correlation,rating_count
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1
121,0.444110,15504
2904,0.368360,15663
16498,0.340596,20603
10087,0.339668,9926
1535,0.332886,25637
...,...,...
4814,0.018440,3137
2923,0.017166,2799
149,0.009796,2572
243,0.008719,2261


In [44]:
#make it a list
most_similar_popular_animes = similar_popular_animes[:numberOfResult]
most_similar_popular_animes_list = most_similar_popular_animes.index.to_list()
most_similar_popular_animes_list

[121, 2904, 16498, 10087, 1535]

# Result Showing

In [45]:
#combine the user-based and item-based result
final_recommendation_result = user_recommendation_list.copy()
final_recommendation_result += most_similar_popular_animes_list.copy()
final_recommendation_result

[2904, 1535, 16498, 1210, 18679, 121, 2904, 16498, 10087, 1535]

## Show Source User's Rating

In [46]:
pd.DataFrame(source_user_anime_rating)

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,average_rating,members
2110444,75000,5114,10,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2830815,75000,9253,10,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4989894,75000,32935,10,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351
9407,75000,1,9,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
1257736,75000,1575,9,Code Geass: Hangyaku no Lelouch,"Action, Mecha, Military, School, Sci-Fi, Super...",TV,25,8.83,715151
3249393,75000,11061,9,Hunter x Hunter (2011),"Action, Adventure, Shounen, Super Power",TV,148,9.13,425855
4836711,75000,30276,9,One Punch Man,"Action, Comedy, Parody, Sci-Fi, Seinen, Super ...",TV,12,8.82,552458
247188,75000,136,8,Hunter x Hunter,"Action, Adventure, Shounen, Super Power",TV,62,8.48,166255
1411138,75000,2001,8,Tengen Toppa Gurren Lagann,"Action, Adventure, Comedy, Mecha, Sci-Fi",TV,27,8.78,562962
1974850,75000,4565,8,Tengen Toppa Gurren Lagann Movie: Lagann-hen,"Action, Mecha, Sci-Fi, Space, Super Power",Movie,1,8.64,82253


## Show User Correlation Table

In [47]:
sorted_similar_users

Unnamed: 0_level_0,correlation
user_id,Unnamed: 1_level_1
75000,1.000000
4699,0.494599
8581,0.456285
29476,0.447517
39488,0.446689
...,...
34321,-0.040832
6361,-0.041038
37351,-0.041923
1497,-0.043427


## Show Most Similar User's Rating
- The user might not be the most correlated if the number of rated anime is less than 5

In [48]:
most_similar_user_df = raw_merged_data.loc[raw_merged_data['user_id'] == most_similar_user].sort_values(by= 'user_rating', ascending= False)
most_similar_user_df.head(10)
user_recommendation

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,average_rating,members
1616383,4699,2904,10,Code Geass: Hangyaku no Lelouch R2,"Action, Drama, Mecha, Military, Sci-Fi, Super ...",TV,25,8.98,572888
1193081,4699,1535,10,Death Note,"Mystery, Police, Psychological, Supernatural, ...",TV,37,8.71,1013917
3915445,4699,16498,9,Shingeki no Kyojin,"Action, Drama, Fantasy, Shounen, Super Power",TV,25,8.54,896229
1097492,4699,1210,8,NHK ni Youkoso!,"Comedy, Drama, Psychological, Romance",TV,24,8.4,291228
4130192,4699,18679,8,Kill la Kill,"Action, Comedy, School, Super Power",TV,24,8.23,508118


## Show Target Anime's information 
- Anime is picked from the source user's top rating

In [49]:
target_search_anime = anime_data.loc[anime_data['anime_id'] == top_rated_anime_id]
pd.DataFrame(target_search_anime)

Unnamed: 0,anime_id,name,genre,type,episodes,average_rating,members
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665


## Show Anime Correlation Table

In [50]:
similar_popular_animes

Unnamed: 0_level_0,correlation,rating_count
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1
121,0.444110,15504
2904,0.368360,15663
16498,0.340596,20603
10087,0.339668,9926
1535,0.332886,25637
...,...,...
4814,0.018440,3137
2923,0.017166,2799
149,0.009796,2572
243,0.008719,2261


In [51]:
# Item-based recommended result
most_similar_popular_animes_df = anime_data.loc[anime_data['anime_id'].isin(most_similar_popular_animes_list)]
most_similar_popular_animes_df

Unnamed: 0,anime_id,name,genre,type,episodes,average_rating,members
13,2904,Code Geass: Hangyaku no Lelouch R2,"Action, Drama, Mecha, Military, Sci-Fi, Super ...",TV,25,8.98,572888
40,1535,Death Note,"Mystery, Police, Psychological, Supernatural, ...",TV,37,8.71,1013917
86,16498,Shingeki no Kyojin,"Action, Drama, Fantasy, Shounen, Super Power",TV,25,8.54,896229
94,10087,Fate/Zero,"Action, Fantasy, Supernatural",TV,13,8.51,453630
200,121,Fullmetal Alchemist,"Action, Adventure, Comedy, Drama, Fantasy, Mag...",TV,51,8.33,600384


## Show Final Recommendation Result
- The recommendation might not enought 10 if there is duplicated anime

In [52]:
#Drop the duplicated anime
recommended_anime = anime_data.loc[anime_data['anime_id'].isin(final_recommendation_result)]
recommended_anime.drop_duplicates(subset=['anime_id'], keep='first')

pd.DataFrame(recommended_anime)

Unnamed: 0,anime_id,name,genre,type,episodes,average_rating,members
13,2904,Code Geass: Hangyaku no Lelouch R2,"Action, Drama, Mecha, Military, Sci-Fi, Super ...",TV,25,8.98,572888
40,1535,Death Note,"Mystery, Police, Psychological, Supernatural, ...",TV,37,8.71,1013917
86,16498,Shingeki no Kyojin,"Action, Drama, Fantasy, Shounen, Super Power",TV,25,8.54,896229
94,10087,Fate/Zero,"Action, Fantasy, Supernatural",TV,13,8.51,453630
156,1210,NHK ni Youkoso!,"Comedy, Drama, Psychological, Romance",TV,24,8.4,291228
200,121,Fullmetal Alchemist,"Action, Adventure, Comedy, Drama, Fantasy, Mag...",TV,51,8.33,600384
281,18679,Kill la Kill,"Action, Comedy, School, Super Power",TV,24,8.23,508118


In [53]:
end_time = datetime.datetime.now()
# Calculate the time elapsed
elapsed_time = end_time - start_time

# Print the elapsed time in seconds
print("Elapsed time in seconds:", elapsed_time.total_seconds())

Elapsed time in seconds: 60.498275
