In [1]:
# Import relevant libraries 

import pandas as pd
import numpy as np


from sklearn.metrics import ndcg_score
from sklearn.metrics import mean_absolute_error
from surprise import accuracy
from surprise import Reader
from surprise import KNNWithMeans
from surprise import Dataset
from surprise.model_selection import KFold
from scipy import sparse

In [2]:
anime = pd.read_csv('dataset/anime.csv')
rating = pd.read_csv('dataset/rating_fix.csv')
anime

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [3]:
rating.rating.replace({-1: np.nan}, regex=True, inplace = True)
rating

Unnamed: 0,user_id,anime_id,rating
0,1,20,
1,1,24,
2,1,79,
3,1,226,
4,1,241,
...,...,...,...
7813732,73515,16512,7.0
7813733,73515,17187,9.0
7813734,73515,22145,10.0
7813735,73516,790,9.0


In [4]:
# Join the two dataframes on the anime_id columns

merged = rating.merge(anime, left_on = 'anime_id', right_on = 'anime_id', suffixes= ['_user', ''])
merged.head()

Unnamed: 0,user_id,anime_id,rating_user,name,genre,type,episodes,rating,members
0,1,20,,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220,7.81,683297
1,3,20,8.0,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220,7.81,683297
2,5,20,6.0,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220,7.81,683297
3,6,20,,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220,7.81,683297
4,10,20,,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220,7.81,683297


In [5]:
merged.rename(columns = {'rating_user':'user_rating'}, inplace = True)

In [6]:
merged=merged[['user_id','anime_id', 'user_rating', 'name']]
merged.rename(columns = {'user_rating': 'rating'}, inplace = True)
merged_sub = merged[merged.user_id < 200]

In [7]:
merged_sub

Unnamed: 0,user_id,anime_id,rating,name
0,1,20,,Naruto
1,3,20,8.0,Naruto
2,5,20,6.0,Naruto
3,6,20,,Naruto
4,10,20,,Naruto
...,...,...,...,...
7023433,198,31890,,Nurse Witch Komugi-chan R
7023550,198,32214,,Koukaku no Pandora
7023933,198,32313,,Concrete Revolutio: Choujin Gensou - The Last ...
7024268,198,32526,,Love Live! Sunshine!!


In [8]:
merged_sub = merged_sub[~merged_sub.rating.isnull()]
merged_sub = merged_sub.reset_index(drop=True)
merged_sub

Unnamed: 0,user_id,anime_id,rating,name
0,3,20,8.0,Naruto
1,5,20,6.0,Naruto
2,21,20,8.0,Naruto
3,28,20,9.0,Naruto
4,34,20,9.0,Naruto
...,...,...,...,...
12260,196,254,8.0,Jungle wa Itsumo Hare nochi Guu Deluxe
12261,196,1279,7.0,Taiyou no Ko Esteban
12262,196,10765,8.0,Hikaru no Go Special
12263,198,15807,10.0,Ro-Kyu-Bu!: Tomoka no Ichigo Sundae


In [9]:
# film yang pernah dirating user 20
merged_sub[merged_sub.user_id == 20]

Unnamed: 0,user_id,anime_id,rating,name
485,20,6205,10.0,Kämpfer
508,20,6500,10.0,Seikon no Qwaser
579,20,6682,9.0,11eyes
692,20,7739,10.0,11eyes: Momoiro Genmutan
1003,20,10073,10.0,Seikon no Qwaser II
1006,20,10076,10.0,Kämpfer für die Liebe
3553,20,10620,10.0,Mirai Nikki (TV)
4022,20,249,10.0,InuYasha
4659,20,5680,10.0,K-On!
5319,20,12581,10.0,Asa made Jugyou Chu!


In [10]:
def get_ndcg(predictions, k_highest_scores=None):
    
    uids = [int(p.uid) for p in predictions ]
    iids = [int(p.iid) for p in predictions ]
    r_uis = [p.r_ui for p in predictions ]
    ests = [p.est for p in predictions ]
    
    assert(len(uids) == len(iids) == len(r_uis) == len(ests) )    
    
    sparse_preds = sparse.coo_matrix( (ests, (uids , iids )) )
    sparse_vals = sparse.coo_matrix( (r_uis, (uids , iids )) )
    
    dense_preds = sparse_preds.toarray()
    dense_vals = sparse_vals.toarray()
    
    return ndcg_score(y_true= dense_vals , y_score= dense_preds, k=k_highest_scores)

In [11]:
# fungsi untuk menampilkan daftar film anime yang telah ditonton oleh user
# dan menampilkan sejumlah film anime rekomendasi hasil dari keluaran proses Collaborative Filtering 
# item-based maupun user-based
def recommend_movies(user, num_recommended_movies, data_train, prediction):
    print('The list of the Movies that (ID User:{}) Has Watched \n'.format(user))
    datas = data_train.df
    index_datas = datas.index[datas['user_id'] == user]
    datas = datas.loc[index_datas]
    
    for i in range(len(datas)):
        data = datas.iloc[i]
        
        if(data['user_id'] == user):
            index_anime = merged_sub.index[merged_sub['anime_id'] == data['anime_id']]
            data_anime = merged_sub.loc[index_anime]
            print('({}) {} - Rating:{}'.format(data_anime['anime_id'].iloc[0], data_anime['name'].iloc[0], data_anime['rating'].iloc[0]))
    
    x = 0
    global list_recom
    list_recom = []
    print('\n')        
    print('The list of the Recommended Movies for (ID User:{}) \n'.format(user))

    for i in range(len(prediction)):
        user_id = prediction[i].uid

        if(user_id == user):
            anime_id = prediction[i].iid
            details = prediction[i].details

            if (details['was_impossible'] == False):
                
                
                if(x < num_recommended_movies):
                    predicted_rating = prediction[i].est
                    index_anime = merged_sub.index[merged_sub['anime_id'] == anime_id]
                    try:
                        x += 1
                        data_anime = merged_sub.loc[index_anime]
                        recom = [data_anime['anime_id'].iloc[0], data_anime['name'].iloc[0], predicted_rating]
                        list_recom.append(recom)
                    except:
                        continue
 
    if(x == 0):
        print('No Movies to recommended for (ID User:{}) \n'.format(user))
    else:
        print('\n');
        list_recom.sort(key=lambda row: (row[2], row[0], row[1]), reverse=True)
        j = 1
        for anime_id, name, pred_rating in list_recom:
            print('{}. ({}) {} - Predicted Rating:{:.1f}'.format(str(j), anime_id, name, pred_rating))
            j += 1

In [12]:
# fungsi untuk menampilkan daftar film anime yang memiliki similarity dengan film anime yang ditentukan
# dan menampilkan sejumlah film anime rekomendasi hasil dari keluaran proses Collaborative Filtering 
# item-based
def recommend_movies_by_anime_id(anime_idx, num_recommended_movies, datas, prediction):
    list_userid = []
    print('The list of the ID User that Has Watched this (Anime ID: {})'.format(anime_idx))

    index_datas = datas.index[datas['anime_id'] == anime_idx]
 
    datas = datas.loc[index_datas]
    data = datas.iloc[0]

    index_anime = merged_sub.index[merged_sub['anime_id'] == anime_idx]
    data_anime = merged_sub.loc[index_anime]
    print('({}) {} - Rating:{}\n'.format(data_anime['anime_id'].iloc[0], data_anime['name'].iloc[0], data_anime['rating'].iloc[0]))
    
    for i in range(len(datas)):
        data = datas.iloc[i]
        
        if(data['anime_id'] == anime_idx):
            list_userid.append(data['user_id'])

    
    x = 0
    list_recom = []
     
    print('The list of the Recommended Movies that similarity to (Anime ID: {}) \n'.format(anime_idx))
    
    for i in range(len(prediction)):
        user_id = prediction[i].uid

        if(user_id in list_userid):

            anime_id = prediction[i].iid
            details = prediction[i].details
            
            if (details['was_impossible'] == False):
                
                
                if(x < num_recommended_movies):
                    predicted_rating = prediction[i].est
                    index_anime = merged_sub.index[merged_sub['anime_id'] == anime_id]
                    try:
                        x += 1
                        data_anime = merged_sub.loc[index_anime]
                        recom = [data_anime['anime_id'].iloc[0], data_anime['name'].iloc[0], predicted_rating]
                        list_recom.append(recom)
                    except:
                        continue
 
    if(x == 0):
        print('No Movies to recommended that similarity to (Anime ID: {}) \n'.format(anime_idx))
    else:

        list_recom.sort(key=lambda row: (row[2], row[0], row[1]), reverse=True)

        j = 0
        for anime_id, name, pred_rating in list_recom:
            print('{}. ({}) {} - Predicted Rating:{:.1f}'.format(str(j), anime_id, name, pred_rating))
            j += 1
            
    print('\n') 

In [13]:
# mendapatkan daftar rating dan daftar estimasi score dari film anime (sesuai id-nya) 
# dari hasil prediksi Collaborative Filtering
# untuk NDCG dan MAE

def get_predictions_and_rating(iid, prediction):
    global list_rating
    global list_score_est
    list_rating = []
    list_score_est = []
    
    for i in range(len(prediction)):
        anime_id = prediction[i].iid
        details = prediction[i].details

        if (anime_id == iid):
            predicted_rating = prediction[i].r_ui
            score_est = prediction[i].est
            
            list_rating.append(predicted_rating)
            list_score_est.append(score_est)
            
    return list_rating, list_score_est



In [14]:
#testing data
rating_dataset = merged_sub.copy()
rating_dataset = rating_dataset[["user_id","anime_id",  "rating"]]
reader = Reader(line_format='user item rating', rating_scale=(1, 10))
test_rating =Dataset.load_from_df(rating_dataset,reader)
anti_set = test_rating.build_full_trainset().build_anti_testset()

In [15]:
best_prediction_MAE= None
best_prediction_NDCG= None
best_prediction= None
best_MAE= 2
best_NDCG= 0
MAE_prediction = 2
NDCG_prediction= 0
kf = KFold(n_splits=5)
sim_options = {
    'name':'cosine',
    "user_based": False,  # Compute similarities between users
}
algo = KNNWithMeans(k=5,sim_options=sim_options)
for trainset, testset in kf.split(test_rating):
    algo.fit(trainset)
    prediction = algo.test(testset)
    Hasil_MAE = accuracy.mae(prediction, verbose=True)
    score = get_ndcg(prediction);
    print('NDCG Score {:.4f}'.format(score))
    if Hasil_MAE < best_MAE :
            best_MAE= Hasil_MAE
            best_algo_MAE = algo
            MAE_prediction = Hasil_MAE
    if score > best_NDCG :
        best_NDCG= score
        best_algo_NDCG = algo
        NDCG_prediction= score

Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1681
NDCG Score 0.7982
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1547
NDCG Score 0.7946
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1625
NDCG Score 0.8050
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1739
NDCG Score 0.7748
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1711
NDCG Score 0.8030


In [16]:
print('MAE Score {:.4f}'.format(best_MAE))

MAE Score 1.1547


In [17]:
print('NDC Score {:.4f}'.format(best_NDCG))


NDC Score 0.8050


In [18]:
best_MAE2= 2
best_NDCG2= 0
kf = KFold(n_splits=5)
sim_options = {
    'name':'cosine',
    "user_based": True,  # Compute similarities between users
}
algo = KNNWithMeans(k=5,sim_options=sim_options)
for trainset, testset in kf.split(test_rating):
    algo.fit(trainset)
    prediction = algo.test(testset)
    Hasil_MAE = accuracy.mae(prediction, verbose=True)
    score = get_ndcg(prediction);
    print('NDCG Score {:.4f}'.format(score))
    if Hasil_MAE < best_MAE2 :
            best_MAE2= Hasil_MAE
            if best_MAE2 < MAE_prediction :
                best_algo_MAE = algo
                MAE_prediction = Hasil_MAE
    if score > best_NDCG2 :
        best_NDCG2= score
        if best_NDCG2 > NDCG_prediction :
            best_algo_NDCG = algo
            NDCG_prediction= score

Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1110
NDCG Score 0.7881
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0815
NDCG Score 0.7849
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0672
NDCG Score 0.8003
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0683
NDCG Score 0.7794
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0752
NDCG Score 0.8300


In [19]:
print('MAE Score {:.4f}'.format(best_MAE2))

MAE Score 1.0672


In [20]:
print('NDC Score {:.4f}'.format(best_NDCG2))


NDC Score 0.8300


In [21]:
best_MAE3= 2
best_NDCG3= 0
kf = KFold(n_splits=10)
sim_options = {
    'name':'cosine',
    "user_based": False,  # Compute similarities between users
}
algo = KNNWithMeans(k=5,sim_options=sim_options)
for trainset, testset in kf.split(test_rating):
    algo.fit(trainset)
    prediction = algo.test(testset)
    Hasil_MAE = accuracy.mae(prediction, verbose=True)
    score = get_ndcg(prediction);
    print('NDCG Score {:.4f}'.format(score))
    if Hasil_MAE < best_MAE3 :
            best_MAE3= Hasil_MAE
            if best_MAE3 < MAE_prediction :
                best_algo_MAE = algo
                MAE_prediction = Hasil_MAE
    if score > best_NDCG3 :
        best_NDCG3= score
        if best_NDCG3 > NDCG_prediction :
            best_algo_NDCG = algo
            NDCG_prediction= score

Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1472
NDCG Score 0.7165
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.2037
NDCG Score 0.7305
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1195
NDCG Score 0.7261
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1557
NDCG Score 0.7544
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1934
NDCG Score 0.7436
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1421
NDCG Score 0.7247
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0932
NDCG Score 0.7586
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.2056
NDCG Score 0.7264
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1370
NDCG Score 0.7085
Computing the cosine similarity matri

In [22]:
print('MAE Score {:.4f}'.format(best_MAE3))

MAE Score 1.0932


In [23]:
print('NDC Score {:.4f}'.format(best_NDCG3))

NDC Score 0.7586


In [24]:
best_MAE4= 2
best_NDCG4= 0
kf = KFold(n_splits=10)
sim_options = {
    'name':'cosine',
    "user_based": True,  # Compute similarities between users
}
algo = KNNWithMeans(k=5,sim_options=sim_options)
for trainset, testset in kf.split(test_rating):
    algo.fit(trainset)
    prediction = algo.test(testset)
    Hasil_MAE = accuracy.mae(prediction, verbose=True)
    score = get_ndcg(prediction);
    print('NDCG Score {:.4f}'.format(score))
    if Hasil_MAE < best_MAE4 :
            best_MAE4= Hasil_MAE
            if best_MAE4 < MAE_prediction :
                best_algo_MAE = algo
                MAE_prediction = Hasil_MAE
    if score > best_NDCG4 :
        best_NDCG4= score
        if best_NDCG4 > NDCG_prediction :
            best_algo_NDCG = algo
            NDCG_prediction= score

Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0703
NDCG Score 0.6994
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1029
NDCG Score 0.7502
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1102
NDCG Score 0.7576
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0330
NDCG Score 0.7001
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0660
NDCG Score 0.7134
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0464
NDCG Score 0.7178
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0557
NDCG Score 0.7262
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0576
NDCG Score 0.7325
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1235
NDCG Score 0.7471
Computing the cosine similarity matri

In [25]:
print('MAE Score {:.4f}'.format(best_MAE4))

MAE Score 1.0270


In [26]:
print('NDC Score {:.4f}'.format(best_NDCG4))

NDC Score 0.7576


In [27]:
best_MAE5= 2
best_NDCG5= 0
kf = KFold(n_splits=10)
sim_options = {
    'name':'cosine',
    "user_based": False,  # Compute similarities between users
}
algo = KNNWithMeans(k=50,sim_options=sim_options)
for trainset, testset in kf.split(test_rating):
    algo.fit(trainset)
    prediction = algo.test(testset)
    Hasil_MAE = accuracy.mae(prediction, verbose=True)
    score = get_ndcg(prediction);
    print('NDCG Score {:.4f}'.format(score))
    if Hasil_MAE < best_MAE5 :
            best_MAE5= Hasil_MAE
            if best_MAE5 < MAE_prediction :
                best_algo_MAE = algo
                MAE_prediction = Hasil_MAE
    if score > best_NDCG5 :
        best_NDCG5= score
        if best_NDCG5 > NDCG_prediction :
            best_algo_NDCG = algo
            NDCG_prediction= score

Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1125
NDCG Score 0.7273
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0862
NDCG Score 0.7082
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0712
NDCG Score 0.6972
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1299
NDCG Score 0.7145
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0855
NDCG Score 0.7390
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0695
NDCG Score 0.7411
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0829
NDCG Score 0.7433
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0814
NDCG Score 0.7299
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1039
NDCG Score 0.7443
Computing the cosine similarity matri

In [28]:
print('MAE Score {:.4f}'.format(best_MAE5))

MAE Score 1.0467


In [29]:
print('NDC Score {:.4f}'.format(best_NDCG5))

NDC Score 0.7443


In [30]:
best_MAE6= 2
best_NDCG6= 0
kf = KFold(n_splits=10)
sim_options = {
    'name':'cosine',
    "user_based": True,  # Compute similarities between users
}
algo = KNNWithMeans(k=50,sim_options=sim_options)
for trainset, testset in kf.split(test_rating):
    algo.fit(trainset)
    prediction = algo.test(testset)
    Hasil_MAE = accuracy.mae(prediction, verbose=True)
    score = get_ndcg(prediction);
    print('NDCG Score {:.4f}'.format(score))
    if Hasil_MAE < best_MAE6 :
            best_MAE6= Hasil_MAE
            if best_MAE6 < MAE_prediction :
                best_algo_MAE = algo
                MAE_prediction = Hasil_MAE
    if score > best_NDCG6 :
        best_NDCG6= score
        if best_NDCG6 > NDCG_prediction :
            best_algo_NDCG = algo
            NDCG_prediction= score

Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0643
NDCG Score 0.7146
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0591
NDCG Score 0.7294
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0492
NDCG Score 0.7488
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0191
NDCG Score 0.7227
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0747
NDCG Score 0.7042
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0735
NDCG Score 0.7285
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0549
NDCG Score 0.7398
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0765
NDCG Score 0.7381
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0509
NDCG Score 0.7241
Computing the cosine similarity matri

In [31]:
print('MAE Score {:.4f}'.format(best_MAE6))

MAE Score 1.0191


In [32]:
print('NDC Score {:.4f}'.format(best_NDCG6))

NDC Score 0.7488


In [33]:
best_MAE7= 2
best_NDCG7= 0
kf = KFold(n_splits=5)
sim_options = {
    'name':'cosine',
    "user_based": False,  # Compute similarities between users
}
algo = KNNWithMeans(k=50,sim_options=sim_options)
for trainset, testset in kf.split(test_rating):
    algo.fit(trainset)
    prediction = algo.test(testset)
    Hasil_MAE = accuracy.mae(prediction, verbose=True)
    score = get_ndcg(prediction);
    print('NDCG Score {:.4f}'.format(score))
    if Hasil_MAE < best_MAE7 :
            best_MAE7= Hasil_MAE
            if best_MAE7 < MAE_prediction :
                best_algo_MAE = algo
                MAE_prediction = Hasil_MAE
    if score > best_NDCG7 :
        best_NDCG7= score
        if best_NDCG7 > NDCG_prediction :
            best_algo_NDCG = algo
            NDCG_prediction= score

Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1139
NDCG Score 0.8063
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1058
NDCG Score 0.8058
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0728
NDCG Score 0.8017
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1056
NDCG Score 0.8274
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.1283
NDCG Score 0.8051


In [34]:
print('MAE Score {:.4f}'.format(best_MAE7))

MAE Score 1.0728


In [35]:
print('NDC Score {:.4f}'.format(best_NDCG7))

NDC Score 0.8274


In [36]:
best_MAE8= 2
best_NDCG8= 0
kf = KFold(n_splits=5)
sim_options = {
    'name':'cosine',
    "user_based": True,  # Compute similarities between users
}
algo = KNNWithMeans(k=50,sim_options=sim_options)
for trainset, testset in kf.split(test_rating):
    algo.fit(trainset)
    prediction = algo.test(testset)
    Hasil_MAE = accuracy.mae(prediction, verbose=True)
    score = get_ndcg(prediction);
    print('NDCG Score {:.4f}'.format(score))
    if Hasil_MAE < best_MAE8 :
            best_MAE8= Hasil_MAE
            if best_MAE8 < MAE_prediction :
                best_algo_MAE = algo
                MAE_prediction = Hasil_MAE
    if score > best_NDCG8 :
        best_NDCG8= score
        if best_NDCG8 > NDCG_prediction :
            best_algo_NDCG = algo
            NDCG_prediction= score

Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0661
NDCG Score 0.8015
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0810
NDCG Score 0.8160
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0531
NDCG Score 0.8171
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0564
NDCG Score 0.8171
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.0807
NDCG Score 0.7845


In [37]:
print('MAE Score {:.4f}'.format(best_MAE8))

MAE Score 1.0531


In [38]:
print('NDC Score {:.4f}'.format(best_NDCG8))

NDC Score 0.8171


In [39]:
print('MAE Score {:.4f}'.format(MAE_prediction))

MAE Score 1.0191


In [40]:
print('NDC Score {:.4f}'.format(NDCG_prediction))

NDC Score 0.8300


In [41]:
best_prediction_MAE = best_algo_MAE.test(anti_set)
best_prediction_NDCG = best_algo_NDCG.test(anti_set)

In [42]:
recommend_movies(20,5,test_rating,best_prediction_MAE)

The list of the Movies that (ID User:20) Has Watched 

(6205) Kämpfer - Rating:8.0
(6500) Seikon no Qwaser - Rating:10.0
(6682) 11eyes - Rating:9.0
(7739) 11eyes: Momoiro Genmutan - Rating:10.0
(10073) Seikon no Qwaser II - Rating:10.0
(10076) Kämpfer für die Liebe - Rating:8.0
(10620) Mirai Nikki (TV) - Rating:8.0
(249) InuYasha - Rating:1.0
(5680) K-On! - Rating:1.0
(12581) Asa made Jugyou Chu! - Rating:3.0
(8876) Koe de Oshigoto! The Animation - Rating:7.0
(243) Gravitation - Rating:4.0
(853) Ouran Koukou Host Club - Rating:7.0
(819) Boku wa Imouto ni Koi wo Suru - Rating:10.0
(193) Maburaho - Rating:8.0
(325) Peach Girl - Rating:8.0
(1639) Boku no Pico - Rating:9.0
(3375) Kirepapa. - Rating:10.0
(5391) Pico to Chico - Rating:9.0


The list of the Recommended Movies for (ID User:20) 



1. (20) Naruto - Predicted Rating:9.8
2. (24) School Rumble - Predicted Rating:9.2
3. (226) Elfen Lied - Predicted Rating:9.2
4. (79) Shuffle! - Predicted Rating:8.9
5. (241) Girls Bravo: First Seaso

In [43]:
recommend_movies(20,5,test_rating,best_prediction_NDCG)

The list of the Movies that (ID User:20) Has Watched 

(6205) Kämpfer - Rating:8.0
(6500) Seikon no Qwaser - Rating:10.0
(6682) 11eyes - Rating:9.0
(7739) 11eyes: Momoiro Genmutan - Rating:10.0
(10073) Seikon no Qwaser II - Rating:10.0
(10076) Kämpfer für die Liebe - Rating:8.0
(10620) Mirai Nikki (TV) - Rating:8.0
(249) InuYasha - Rating:1.0
(5680) K-On! - Rating:1.0
(12581) Asa made Jugyou Chu! - Rating:3.0
(8876) Koe de Oshigoto! The Animation - Rating:7.0
(243) Gravitation - Rating:4.0
(853) Ouran Koukou Host Club - Rating:7.0
(819) Boku wa Imouto ni Koi wo Suru - Rating:10.0
(193) Maburaho - Rating:8.0
(325) Peach Girl - Rating:8.0
(1639) Boku no Pico - Rating:9.0
(3375) Kirepapa. - Rating:10.0
(5391) Pico to Chico - Rating:9.0


The list of the Recommended Movies for (ID User:20) 



1. (20) Naruto - Predicted Rating:10.0
2. (226) Elfen Lied - Predicted Rating:9.0
3. (79) Shuffle! - Predicted Rating:8.9
4. (24) School Rumble - Predicted Rating:8.8
5. (241) Girls Bravo: First Seas

In [50]:
# film-film anime yang direkomendasikan dan memiliki similarity dengan film anime dengan ID anime: 33
# sebanyak 10 film
# Hasil rekomendasi film anime berdasarkan hasil keluaran proses Collaborative filtering item-based
recommend_movies_by_anime_id(20,10,rating_dataset,best_prediction_MAE)

The list of the ID User that Has Watched this (Anime ID: 20)
(20) Naruto - Rating:8.0

The list of the Recommended Movies that similarity to (Anime ID: 20) 

0. (846) School Rumble Ni Gakki - Predicted Rating:7.5
1. (355) Shakugan no Shana - Predicted Rating:7.4
2. (226) Elfen Lied - Predicted Rating:7.3
3. (936) Naruto Movie 2: Dai Gekitotsu! Maboroshi no Chiteiiseki Dattebayo! - Predicted Rating:7.3
4. (24) School Rumble - Predicted Rating:7.1
5. (356) Fate/stay night - Predicted Rating:7.1
6. (487) Girls Bravo: Second Season - Predicted Rating:7.0
7. (241) Girls Bravo: First Season - Predicted Rating:6.5
8. (442) Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shinobu Houjou Dattebayo! - Predicted Rating:6.3
9. (79) Shuffle! - Predicted Rating:6.1




In [51]:
# film-film anime yang direkomendasikan dan memiliki similarity dengan film anime dengan ID anime: 33
# sebanyak 10 film
# Hasil rekomendasi film anime berdasarkan hasil keluaran proses Collaborative filtering item-based
recommend_movies_by_anime_id(20,10,rating_dataset,best_prediction_NDCG)

The list of the ID User that Has Watched this (Anime ID: 20)
(20) Naruto - Rating:8.0

The list of the Recommended Movies that similarity to (Anime ID: 20) 

0. (846) School Rumble Ni Gakki - Predicted Rating:7.8
1. (24) School Rumble - Predicted Rating:7.7
2. (356) Fate/stay night - Predicted Rating:7.5
3. (355) Shakugan no Shana - Predicted Rating:7.4
4. (936) Naruto Movie 2: Dai Gekitotsu! Maboroshi no Chiteiiseki Dattebayo! - Predicted Rating:7.2
5. (226) Elfen Lied - Predicted Rating:6.7
6. (442) Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shinobu Houjou Dattebayo! - Predicted Rating:6.5
7. (79) Shuffle! - Predicted Rating:6.1
8. (241) Girls Bravo: First Season - Predicted Rating:5.5
9. (487) Girls Bravo: Second Season - Predicted Rating:4.2




In [44]:
# film-film anime yang direkomendasikan dan memiliki similarity dengan film anime dengan ID anime: 33
# sebanyak 10 film
# Hasil rekomendasi film anime berdasarkan hasil keluaran proses Collaborative filtering item-based
recommend_movies_by_anime_id(33,10,rating_dataset,best_prediction_MAE)

The list of the ID User that Has Watched this (Anime ID: 33)
(33) Berserk - Rating:7.0

The list of the Recommended Movies that similarity to (Anime ID: 33) 

0. (846) School Rumble Ni Gakki - Predicted Rating:7.9
1. (355) Shakugan no Shana - Predicted Rating:7.8
2. (936) Naruto Movie 2: Dai Gekitotsu! Maboroshi no Chiteiiseki Dattebayo! - Predicted Rating:7.7
3. (24) School Rumble - Predicted Rating:7.5
4. (487) Girls Bravo: Second Season - Predicted Rating:7.4
5. (1546) Negima!? - Predicted Rating:7.2
6. (241) Girls Bravo: First Season - Predicted Rating:6.9
7. (1836) Shuffle! Memories - Predicted Rating:6.8
8. (442) Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shinobu Houjou Dattebayo! - Predicted Rating:6.7
9. (79) Shuffle! - Predicted Rating:6.5




In [45]:
# film-film anime yang direkomendasikan dan memiliki similarity dengan film anime dengan ID anime: 33
# sebanyak 10 film
# Hasil rekomendasi film anime berdasarkan hasil keluaran proses Collaborative filtering item-based
recommend_movies_by_anime_id(33,10,rating_dataset,best_prediction_NDCG)

The list of the ID User that Has Watched this (Anime ID: 33)
(33) Berserk - Rating:7.0

The list of the Recommended Movies that similarity to (Anime ID: 33) 

0. (846) School Rumble Ni Gakki - Predicted Rating:8.6
1. (24) School Rumble - Predicted Rating:8.2
2. (355) Shakugan no Shana - Predicted Rating:8.0
3. (936) Naruto Movie 2: Dai Gekitotsu! Maboroshi no Chiteiiseki Dattebayo! - Predicted Rating:7.6
4. (1546) Negima!? - Predicted Rating:7.1
5. (442) Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shinobu Houjou Dattebayo! - Predicted Rating:6.9
6. (1836) Shuffle! Memories - Predicted Rating:6.7
7. (79) Shuffle! - Predicted Rating:6.5
8. (241) Girls Bravo: First Season - Predicted Rating:5.9
9. (487) Girls Bravo: Second Season - Predicted Rating:4.6




In [46]:
# perhitungan Mean Absolute Error dari hasil prediksi dan nilai rating awal untuk film anime id = 20
# untuk Collaborative Filtering item-based
user_iid = 20
try:
    pred_ratings,scores = get_predictions_and_rating(user_iid,best_prediction_MAE)
    pred_scores_arr = np.asarray([scores])
    ratings_arr = np.asarray([pred_ratings])


    print('Ratings = {}\n'.format(ratings_arr))
    print('Predicted = {}\n'.format(pred_scores_arr))

    MAE = mean_absolute_error(ratings_arr, pred_scores_arr)
    print('MAE Value = {:.4f}'.format(MAE))
except Exception as e:
    print(e)
    print('MAE Value = N/A')

Ratings = [[7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.8

In [47]:
# perhitungan Mean Absolute Error dari hasil prediksi dan nilai rating awal untuk film anime id = 20
# untuk Collaborative Filtering item-based
user_iid = 20
try:
    pred_ratings,scores = get_predictions_and_rating(user_iid,best_prediction_NDCG)
    pred_scores_arr = np.asarray([scores])
    ratings_arr = np.asarray([pred_ratings])


    print('Ratings = {}\n'.format(ratings_arr))
    print('Predicted = {}\n'.format(pred_scores_arr))

    MAE = mean_absolute_error(ratings_arr, pred_scores_arr)
    print('MAE Value = {:.4f}'.format(MAE))
except Exception as e:
    print(e)
    print('MAE Value = N/A')

Ratings = [[7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.8

In [55]:
# perhitungan NDCG Score dari hasil prediksi untuk film anime_id = 20
# untuk Collaborative Filtering item-based
user_iid = 44
pred_ratings,scores = get_predictions_and_rating(user_iid,best_prediction_MAE)
pred_ratings = np.asarray([pred_ratings])

print('Predicted Ratings = {}\n'.format(pred_ratings))
scores = np.asarray([scores])
print('Estimated Score = {}\n'.format(scores))
try:
    ndcg = ndcg_score(pred_ratings, scores)
    print('NDCG Score = {:.4f}'.format(ndcg))
except Exception as e:
    print(e)
    print('NDCG Score = N/A')

Predicted Ratings = [[7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80684876 7.80684876 7.80684876 7.80684876
  7.80684876 7.80684876 7.80

In [None]:
# perhitungan NDCG Score dari hasil prediksi untuk film anime_id = 20
# untuk Collaborative Filtering item-based
user_iid = 20
pred_ratings,scores = get_predictions_and_rating(user_iid,best_prediction_NDCG)
pred_ratings = np.asarray([pred_ratings])

print('Predicted Ratings = {}\n'.format(pred_ratings))
scores = np.asarray([scores])
print('Estimated Score = {}\n'.format(scores))
try:
    ndcg = ndcg_score(pred_ratings, scores)
    print('NDCG Score = {:.4f}'.format(ndcg))
except Exception as e:
    print(e)
    print('NDCG Score = N/A')