In [None]:
import pandas as pd
import numpy as np
import os
import joblib
import time
from tqdm.auto import tqdm
from natsort import natsorted

import warnings
warnings.filterwarnings("ignore")
from sklearn.metrics import mutual_info_score as SimMI

In [8]:
# # Collab
# path = ''

# Local
path = ''

# For MovieLens 1M dataset
pathDataset = 'Datasets/ml-1m'
sep = '::'
names = ['user_id', 'item_id', 'rating']
totalUser = 6040
totalItem = 3706

# For MovieLens 100K dataset
# pathDataset = 'Datasets/ml-100k'
# names = ['user_id', 'item_id', 'rating', 'timestime']
# sep = '\t'
# totalUser = 943
# totalItem = 1682


# For Fold
nameFold = '1Fold'
# Path for Collaborative Filtering models Save & Load
pathCF = ['user_k1.joblib', 'item_k1.joblib']

# Function  

## Mean Rating

In [4]:
def calculate_mean(rating):
    # sum all rating based on user/item row then divide by number of rating that is not zero
    user_mean = (rating.sum(axis=1))/(np.count_nonzero(rating, axis=1))
    user_mean[np.isnan(user_mean)] = 0.0
    return pd.DataFrame(user_mean, index=rating.index)

## Mean Centered Rating

In [47]:
def calculate_mean_centered(rating: pd.DataFrame, mean: pd.DataFrame) -> pd.DataFrame:
    R = rating.to_numpy()
    M = mean.to_numpy().reshape(-1, 1) 
    
    diff = R - M

    result_matrix = np.where(R != 0, diff, 0.0)

    return pd.DataFrame(
        result_matrix, 
        index=rating.index, 
        columns=rating.columns
    )

## Similarity Function MI

In [50]:
def SimilarityMI(rating):
    mat_sim = [[i for i in range(len(rating))] for _ in range(len(rating))]
    for i in range(len(rating)):  # user/item
        for j in range(i, len(rating)):  # user/item
            nilaSimIJ = SimMI(rating.iloc[i], rating.iloc[j])
            nilaSimJI = SimMI(rating.iloc[j], rating.iloc[i])
            mat_sim[i][j] = nilaSimIJ
            mat_sim[j][i] = nilaSimJI
    return pd.DataFrame(mat_sim, index=rating.index, columns=rating.index)

## Prediksi UCF dan ICF

In [84]:
def predict(datas, mean, mean_centered, similarity, user=3, item=2, tetangga=2, jenis='user'):
    # determine based model wheter user-based or item-based
    # take user/item rating, mean centered, and simillarity to calculate
    if jenis == "user":
        dt = datas.loc[:, item].to_numpy()
        meanC = mean_centered.loc[:, item].to_numpy()
        simi = similarity.loc[user, :].to_numpy()
    elif jenis == "item":
        dt = datas.loc[:, user].to_numpy()
        meanC = mean_centered.loc[:, user].to_numpy()
        simi = similarity.loc[item, :].to_numpy()

    # user/item index that has rated
    idx_dt = np.where(dt != 0)

    # filter user/item rating, mean centered, and simillarity value that is not zero
    nilai_mean_c = np.array(meanC)[idx_dt]
    nilai_similarity = simi[idx_dt]
    
    # take user/item similarity index as neighbors and sort it
    idx_sim = (-nilai_similarity).argsort()[:tetangga]

    # see equation 5 & 6 (prediction formula) in paper
    # numerator
    a = np.sum(nilai_mean_c[idx_sim] * nilai_similarity[idx_sim])
    # denomerator
    b = np.abs(nilai_similarity[idx_sim]).sum()

    # check denominator is not zero and add μ (mean rating)
    if b != 0:
        if jenis == "user":
            hasil = mean.loc[user] + (a/b)
        else:
            hasil = mean.loc[item] + (a/b)
    else:
        if jenis == "user":
            hasil = mean.loc[user] + 0
        else:
            hasil = mean.loc[item] + 0

    return [item, float(hasil)]

## Hybrid

In [8]:
def hybrid(predict_user, predict_item, r1=0.7):
    # degree of fusion will be splitted in to two parameter
    # the one (Γ1) is used for user-based model
    # the others (Γ2 = 1 - Γ1) is used for item-based model
    r = np.array([r1, 1-r1])

    # weighting all the users and items corresponding to the Topk UCF and TopkICF models
    # see equation 13 (hybrid formula) in paper
    r_caping = np.column_stack((predict_user, predict_item))
    result = np.sum((r*r_caping), axis=1)

    return result

## Evaluasi Performa

In [15]:
# Evaluasi
def precision(ground_truth, topN, n=1):
    return (100 * (len(np.intersect1d(topN[:n], ground_truth)) / n))

def recall(ground_truth, topN, n=1):
    return (100 * (len(np.intersect1d(topN[:n], ground_truth)) / len(set(ground_truth))))

def f1Score(ground_truth, topN, n=1):
    p = precision(ground_truth, topN, n)
    r = recall(ground_truth, topN, n)

    return ((2 * p * r) / (p + r)) if (p > 0 and r > 0) else 0

def idcg(n):
    return np.sum((1 / np.log2(1 + np.array(list(range(1, n+1))))))

def dcg(ground_truth, topN, n):
    a = np.array([(1 / np.log2(1 + x)) for x in range(1,n+1)])
    b = np.array([np.sum(np.where(tp == ground_truth, 1, 0)) for tp in topN[:n]])
    return np.sum(a*b)

def ndcg(ground_truth, topN, n):
    return (dcg(ground_truth, topN, n) / idcg(n))

def AkurasiTopN(gt, topN):
    evTopN = [[],[],[],[],[]]
    for n in range(1, 101):
        p = precision(ground_truth=gt, topN=topN, n=n)
        r = recall(ground_truth=gt, topN=topN, n=n)
        f = f1Score(ground_truth=gt, topN=topN, n=n)
        d = dcg(ground_truth=gt, topN=topN, n=n)
        nd = ndcg(ground_truth=gt, topN=topN, n=n)
        evTopN[0].append(p)
        evTopN[1].append(r)
        evTopN[2].append(f)
        evTopN[3].append(d)
        evTopN[4].append(nd)
    return evTopN

In [6]:
def sistemRekomendasi(rating_matrix, mean_user_df, mean_centered_user_df, similarity_user_df, mean_item_df, mean_centered_item_df, similarity_item_df, user, tetangga):
    # prediksi UCF dan ICF
    prediksiUCF, prediksiICF = [], []
    items = (np.where((rating_matrix.loc[user,:] == 0))[0]+1).tolist()
    for item in items:
        prediksiUCF.append(predict(rating_matrix, mean_user_df, mean_centered_user_df, similarity_user_df, user=user, item=item, tetangga=tetangga, jenis='user'))
        prediksiICF.append(predict(rating_matrix.T, mean_item_df, mean_centered_item_df, similarity_item_df, user=user, item=item, tetangga=tetangga, jenis='item'))

    prediksiUCF, prediksiICF = np.array(prediksiUCF), np.array(prediksiICF)
    return [prediksiUCF, prediksiICF]


def sistemRekomendasiEvaluasi(gts, ucfPreds, icfPreds, tetanggaU, tetanggaI, folder, r1=0):
    evHBF = []
    evHBR = []
    evHBX = []
    startHBF = time.time()
    for ind in range(len(gts)):
        gt = gts[ind]
        predUCF = ucfPreds[ind]
        predICF = icfPreds[ind]

        srHBF = predUCF.copy()
        srHBR = predUCF.copy()
        srHBX = predUCF.copy()

        # hasil prediksi diambil index 1 dan diganti type float
        ucf = predUCF[:, 1].astype(float)
        icf = predICF[:, 1].astype(float)

        # hybrid

        uicfHBF = hybrid(ucf, icf, r1)
        uicfHBR = (ucf + icf)/2
        uicfHBX = ucf * icf


        # replace container hybrid
        srHBF[:,1]=uicfHBF
        srHBR[:,1]=uicfHBR
        srHBX[:,1]=uicfHBX
        # TopN
        topNHBF = srHBF[(-srHBF[:, 1].astype(float)).argsort()][:,0]
        topNHBR = srHBR[(-srHBR[:, 1].astype(float)).argsort()][:,0]
        topNHBX = srHBX[(-srHBX[:, 1].astype(float)).argsort()][:,0]
        # akurasi
        evHBF.append(AkurasiTopN(gt, topNHBF))
        evHBR.append(AkurasiTopN(gt, topNHBR))
        evHBX.append(AkurasiTopN(gt, topNHBX))
    endHBF = time.time()
    
    evaluasi_map = {
        'HBF': evHBF,
        'HBR': evHBR,
        'HBX': evHBX
    }

    waktu_komputasi = endHBF - startHBF
    for i, full_folder_name in enumerate(folder):
        try:
            algoritma_prefix = full_folder_name.split('/')[0].upper()
        except IndexError:
            print(f"Peringatan: Format nama folder salah: {full_folder_name}. Melewati.")
            continue

        
        ev_var = evaluasi_map[algoritma_prefix]
        file_suffix = f'vR-{r1}_{nameFold}' if algoritma_prefix == 'HBF' else f'{nameFold}'
        
        folder_path_segment = full_folder_name

        # --- Bagian Evaluasi (.ev) ---
        filename = os.path.join(
            path, 
            f'Code/HYBRID/Skenario/HB/{folder_path_segment}/{nameFold}/Evaluasi', 
            f'vNU-{tetanggaU}_vNI-{tetanggaI}_{file_suffix}.ev'
        )
        joblib.dump(ev_var, filename)

        # --- Bagian Waktu (.time) ---
        filenameWaktu = os.path.join(
            path, 
            f'Code/HYBRID/Skenario/HB/{folder_path_segment}/{nameFold}/Waktu', 
            f'vNU-{tetanggaU}_vNI-{tetanggaI}_{file_suffix}.time'
        )
        joblib.dump(waktu_komputasi, filenameWaktu)

def calculateBestAkurasi(path):
    evaluasi = {}
    for x, linkpathFold in enumerate(tqdm(natsorted(os.listdir(path)))):
        for linkpath in tqdm(natsorted(os.listdir(os.path.join(path, linkpathFold, 'Evaluasi')))):
            ev = np.array(joblib.load(os.path.join(path, linkpathFold, "Evaluasi", linkpath)))
            meanEv = (ev[:,4,19]).mean(axis=0)
            pathName = linkpath.split(f'_{x+1}Fold.ev')[0]
            if pathName in evaluasi:
                evaluasi[pathName].append(meanEv)
            else:
                evaluasi[pathName] = [meanEv]
    return evaluasi

# Proses Modeling

## Prepare Data Training and Data Test

In [60]:
ratings_train_old = pd.read_csv(os.path.join(path, pathDataset, 'u1.base'), sep=sep, header=None, names=names)
ratings_test = pd.read_csv(os.path.join(path, pathDataset, 'u1.test'), sep=sep, header=None, names=names)

In [61]:
ratings_train_old.head()

Unnamed: 0,user_id,item_id,rating
0,1,1,5
1,1,2,3
2,1,3,3
3,1,4,4
4,1,5,5


In [62]:
rating_matrix = pd.DataFrame(np.zeros((totalUser, totalItem)), index=list(range(1,totalUser+1)), columns=list(range(1,totalItem+1))).rename_axis(index='user_id', columns="item_id")
rating_matrix

item_id,1,2,3,4,5,6,7,8,9,10,...,3697,3698,3699,3700,3701,3702,3703,3704,3705,3706
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6038,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6039,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [63]:
# train
rating_matrix_old = ratings_train_old.pivot_table(index='user_id', columns='item_id', values='rating')
rating_matrix_old = rating_matrix_old.fillna(0)

In [64]:
rating_matrix.update(rating_matrix_old)

### Data Training

In [65]:
rating_matrix

item_id,1,2,3,4,5,6,7,8,9,10,...,3697,3698,3699,3700,3701,3702,3703,3704,3705,3706
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,3.0,4.0,5.0,3.0,5.0,5.0,4.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6037,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6038,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6039,0.0,3.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Data Test

In [66]:
ratings_test

Unnamed: 0,user_id,item_id,rating
0,5,4,3
1,5,5,5
2,5,10,4
3,5,19,2
4,5,28,1
...,...,...,...
200036,6039,1922,4
200037,6039,2042,4
200038,6039,2044,4
200039,6039,2050,3


## User-Based

### Mean

In [67]:
mean_user_df = calculate_mean(rating_matrix)

### Mean Centered

In [69]:
mean_centered_user_df = calculate_mean_centered(rating_matrix, mean_user_df)

### Similarity

In [None]:
similarity_user_df = SimilarityMI(rating_matrix)

### Save

In [None]:
filename = os.path.join(path, 'Code/Model', pathCF[0])
joblib.dump([mean_user_df, mean_centered_user_df, similarity_user_df], filename)

### Load

In [71]:
mean_user_df, mean_centered_user_df, similarity_user_df = joblib.load(os.path.join(path, 'Code/Model', pathCF[0]))

In [72]:
mean_user_df

Unnamed: 0_level_0,0
user_id,Unnamed: 1_level_1
1,4.188679
2,3.713178
3,3.901961
4,4.190476
5,3.062500
...,...
6036,3.421053
6037,3.717822
6038,3.800000
6039,3.890244


In [73]:
mean_centered_user_df

item_id,1,2,3,4,5,6,7,8,9,10,...,3697,3698,3699,3700,3701,3702,3703,3704,3705,3706
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.811321,-1.188679,-1.188679,-0.188679,0.811321,-1.188679,0.811321,0.811321,-0.188679,-0.188679,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.286822,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.000000,0.000000,0.000000,0.000000,1.098039,1.098039,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6037,0.282178,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.282178,0.000000,0.282178,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6038,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6039,0.000000,-0.890244,0.109756,0.000000,0.000000,0.109756,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [74]:
similarity_user_df

user_id,1,2,3,4,5,6,7,8,9,10,...,6031,6032,6033,6034,6035,6036,6037,6038,6039,6040
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.089182,0.002602,0.003217,0.002560,0.000062,0.002066,0.001119,0.005198,0.008979,0.014329,...,0.005695,0.003058,0.001736,0.000792,0.000152,0.000223,0.004519,0.000078,0.005698,0.004897
2,0.002602,0.198204,0.005387,0.006065,0.000153,0.004448,0.013423,0.011546,0.007912,0.011749,...,0.004076,0.002663,0.011969,0.000747,0.000903,0.003328,0.009905,0.001760,0.002266,0.014001
3,0.003217,0.005387,0.090612,0.005109,0.000060,0.003617,0.004880,0.002475,0.004391,0.011521,...,0.003232,0.003658,0.004768,0.000079,0.000147,0.000215,0.003903,0.002227,0.002774,0.007425
4,0.002560,0.006065,0.005109,0.041514,0.000025,0.000042,0.003974,0.003226,0.002529,0.005744,...,0.004618,0.003573,0.013496,0.000032,0.000060,0.000088,0.004431,0.003348,0.001118,0.008076
5,0.000062,0.000153,0.000060,0.000025,0.033118,0.000032,0.002043,0.001620,0.002130,0.000941,...,0.001161,0.000123,0.000071,0.000025,0.002052,0.002222,0.000528,0.000023,0.000097,0.001930
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,0.000223,0.003328,0.000215,0.000088,0.002222,0.000113,0.001699,0.003600,0.002008,0.001641,...,0.001447,0.000628,0.003993,0.000948,0.000966,0.099360,0.001984,0.000084,0.000849,0.002525
6037,0.004519,0.009905,0.003903,0.004431,0.000528,0.002814,0.004416,0.005317,0.013376,0.013933,...,0.007691,0.016098,0.003778,0.004217,0.003046,0.001984,0.280177,0.001751,0.007966,0.042745
6038,0.000078,0.001760,0.002227,0.003348,0.000023,0.000040,0.000045,0.000646,0.002431,0.004536,...,0.003668,0.003330,0.002140,0.000031,0.000057,0.000084,0.001751,0.040982,0.003338,0.006151
6039,0.005698,0.002266,0.002774,0.001118,0.000097,0.003377,0.000457,0.000834,0.001174,0.012858,...,0.001692,0.006678,0.000723,0.001148,0.000237,0.000849,0.007966,0.003338,0.129440,0.008493


## Item-Based

In [75]:
mean_item_df = calculate_mean(rating_matrix.T)

### Mean Centered

In [76]:
mean_centered_item_df = calculate_mean_centered(rating_matrix.T, mean_item_df)

### Similarity

In [None]:
similarity_item_df = SimilarityMI(rating_matrix.T)

### Save

In [None]:
filename = os.path.join(path, 'Code/Model', pathCF[1])
joblib.dump([mean_item_df, mean_centered_item_df, similarity_item_df], filename)

['Model/item_k1.joblib']

### Load

In [77]:
mean_item_df, mean_centered_item_df, similarity_item_df = joblib.load(os.path.join(path, 'Code/Model', pathCF[1]))

In [78]:
mean_item_df

Unnamed: 0_level_0,0
item_id,Unnamed: 1_level_1
1,4.389510
2,3.453682
3,4.158317
4,3.863472
5,3.860795
...,...
3702,5.000000
3703,0.000000
3704,1.000000
3705,5.000000


In [79]:
mean_centered_item_df

user_id,1,2,3,4,5,6,7,8,9,10,...,6031,6032,6033,6034,6035,6036,6037,6038,6039,6040
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.610490,0.61049,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.0,0.61049,0.61049,0.0,0.0,0.0,-0.38951,0.0,0.000000,-0.38951
2,-0.453682,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.0,0.00000,0.00000,0.0,0.0,0.0,0.00000,0.0,-0.453682,0.00000
3,-1.158317,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.841683,...,0.0,0.00000,0.00000,0.0,0.0,0.0,0.00000,0.0,-0.158317,0.00000
4,0.136528,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.136528,0.136528,...,0.0,0.00000,0.00000,0.0,0.0,0.0,0.00000,0.0,0.000000,0.00000
5,1.139205,0.00000,1.139205,0.0,0.0,0.0,0.0,0.0,0.139205,0.139205,...,0.0,0.00000,0.00000,0.0,0.0,0.0,0.00000,0.0,0.000000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3702,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.0,0.00000,0.00000,0.0,0.0,0.0,0.00000,0.0,0.000000,0.00000
3703,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.0,0.00000,0.00000,0.0,0.0,0.0,0.00000,0.0,0.000000,0.00000
3704,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.0,0.00000,0.00000,0.0,0.0,0.0,0.00000,0.0,0.000000,0.00000
3705,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.0,0.00000,0.00000,0.0,0.0,0.0,0.00000,0.0,0.000000,0.00000


In [80]:
similarity_item_df

item_id,1,2,3,4,5,6,7,8,9,10,...,3697,3698,3699,3700,3701,3702,3703,3704,3705,3706
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.792126,0.006165,0.012374,0.005831,0.005882,0.013822,0.018222,0.023912,0.012614,0.043205,...,1.052165e-03,1.052165e-03,4.473540e-05,3.387652e-04,4.204651e-04,3.387652e-04,0.0,4.473540e-05,4.204651e-04,4.473540e-05
2,0.006165,0.350056,0.010614,0.004746,0.025565,0.013176,0.005950,0.008147,0.035580,0.021107,...,1.196300e-05,1.196300e-05,6.027026e-04,8.167777e-04,1.196300e-05,1.196300e-05,0.0,1.196300e-05,1.196300e-05,1.196300e-05
3,0.012374,0.010614,0.382571,0.006086,0.009160,0.021978,0.023839,0.011279,0.048420,0.046598,...,1.427756e-05,1.427756e-05,1.427756e-05,1.427756e-05,1.427756e-05,1.427756e-05,0.0,1.427756e-05,1.427756e-05,1.427756e-05
4,0.005831,0.004746,0.006086,0.703513,0.018594,0.008299,0.002845,0.006176,0.004953,0.007482,...,5.236514e-04,5.236514e-04,3.348883e-05,3.348883e-05,5.236514e-04,4.002134e-04,0.0,3.348883e-05,3.348883e-05,3.348883e-05
5,0.005882,0.025565,0.009160,0.018594,0.830374,0.029369,0.007875,0.013731,0.030219,0.016842,...,4.394709e-05,4.394709e-05,3.639160e-04,4.394709e-05,4.394709e-05,4.394709e-05,0.0,4.394709e-05,4.394709e-05,4.394709e-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3702,0.000339,0.000012,0.000014,0.000400,0.000044,0.000064,0.000016,0.000400,0.000017,0.000044,...,2.741562e-08,2.741562e-08,2.741562e-08,2.741562e-08,2.741562e-08,1.606966e-03,0.0,2.741562e-08,2.741562e-08,2.741562e-08
3703,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.000000e+00,0.000000e+00
3704,0.000045,0.000012,0.000014,0.000033,0.000044,0.000064,0.000016,0.000033,0.000017,0.000044,...,2.741562e-08,2.741562e-08,2.741562e-08,2.741562e-08,2.741562e-08,2.741562e-08,0.0,1.606966e-03,2.741562e-08,2.741562e-08
3705,0.000420,0.000012,0.000014,0.000033,0.000044,0.000301,0.000016,0.000033,0.000017,0.000958,...,2.741562e-08,2.741562e-08,2.741562e-08,2.741562e-08,2.741562e-08,2.741562e-08,0.0,2.741562e-08,1.606966e-03,2.741562e-08


# Recomendation

## Predict UCF, ICF, and Ground Truth

In [None]:
tetanggaN = [5,10,15,18,20,25,30,40,50,100,200]
for tetangga in tqdm(tetanggaN, leave=True):
    startHBF = time.time()
    gtSementara = []
    ucfSementara = []
    icfSementara = []
    for user in tqdm(ratings_test['user_id'].unique().tolist()):
        sr = sistemRekomendasi(rating_matrix, mean_user_df, mean_centered_user_df, similarity_user_df, mean_item_df, mean_centered_item_df, similarity_item_df, user=user, tetangga=tetangga)
        gt = ratings_test[ratings_test['user_id'] == user].loc[:,'item_id'].tolist()
        gtSementara.append(gt)
        ucfSementara.append(sr[0])
        icfSementara.append(sr[1])
    endHBF = time.time()
    # Save Ground Truth, UCF, ICF
    filename = os.path.join(path, f'Code/HYBRID/Skenario/GT/Prediksi/{nameFold}', f'GT-{tetangga}_{nameFold}.ucf')
    joblib.dump(gtSementara, filename)
    filename = os.path.join(path, f'Code/HYBRID/Skenario/UCF/Prediksi/{nameFold}', f'vNU-{tetangga}_{nameFold}.ucf')
    joblib.dump(ucfSementara, filename)
    filename = os.path.join(path, f'Code/HYBRID/Skenario/ICF/Prediksi/{nameFold}', f'vNI-{tetangga}_{nameFold}.icf')
    joblib.dump(icfSementara, filename)
    # waktu
    filenameWaktu = os.path.join(path, f'Code/HYBRID/Skenario/Waktu/{nameFold}', f'Waktu-{tetangga}_{nameFold}.time')
    joblib.dump(endHBF-startHBF, filenameWaktu)

## Evaluasi Best UCF 

In [None]:
tetanggaNU = [5,10,15,18,20,25,30,40,50,100,200]
tetanggaI = 20
r1 = 0.8 

folder = ["HBF/tetanggaU", "HBR/tetanggaU", "HBX/tetanggaU"]

for tetanggaU in tqdm(tetanggaNU, leave=True):
    gt = joblib.load(os.path.join(path, f'Code/HYBRID/Skenario/GT/Prediksi/{nameFold}', f'GT-{tetanggaU}_{nameFold}.ucf'))
    ucfPred = joblib.load(os.path.join(path, f'Code/HYBRID/Skenario/UCF/Prediksi/{nameFold}', f'vNU-{tetanggaU}_{nameFold}.ucf'))
    icfPred = joblib.load(os.path.join(path, f'Code/HYBRID/Skenario/ICF/Prediksi/{nameFold}', f'vNI-{tetanggaI}_{nameFold}.icf'))
    sistemRekomendasiEvaluasi(gts=gt, ucfPreds=ucfPred, icfPreds=icfPred, tetanggaU=tetanggaU,tetanggaI=tetanggaI, r1=r1, folder=folder)

In [None]:
pathEvaluasiUCF = os.path.join(path, 'Code/HYBRID/Skenario/HB/HBF', 'tetanggaU')
evaluasiUCF = calculateBestAkurasi(pathEvaluasiUCF)

VBox(children=(  0%|          | 0/5 [00:00<?, ?it/s],))

In [None]:
dfBestEvaluasiUCF = pd.DataFrame(evaluasiUCF, index=[f'{i}Fold' for i in range(1,6)]).T

In [11]:
dfBestEvaluasiUCF['Mean'] = dfBestEvaluasiUCF.mean(axis=1)
dfBestEvaluasiUCF

Unnamed: 0,1Fold,2Fold,3Fold,4Fold,5Fold,Mean
vNU-5_vNI-20_vR-0.8,0.098346,0.159424,0.130933,0.139474,0.125347,0.130705
vNU-10_vNI-20_vR-0.8,0.086437,0.155008,0.122618,0.126387,0.126825,0.123455
vNU-15_vNI-20_vR-0.8,0.079029,0.147324,0.116669,0.113735,0.12221,0.115793
vNU-18_vNI-20_vR-0.8,0.075934,0.141957,0.113932,0.10903,0.119169,0.112004
vNU-20_vNI-20_vR-0.8,0.074572,0.13923,0.110903,0.106107,0.116494,0.109461
vNU-25_vNI-20_vR-0.8,0.071182,0.132889,0.104604,0.099544,0.113411,0.104326
vNU-30_vNI-20_vR-0.8,0.068227,0.130238,0.099533,0.094545,0.110015,0.100512
vNU-40_vNI-20_vR-0.8,0.063358,0.121857,0.091604,0.086847,0.104328,0.093599
vNU-50_vNI-20_vR-0.8,0.059313,0.115912,0.085882,0.082065,0.098931,0.088421
vNU-100_vNI-20_vR-0.8,0.047657,0.096268,0.067688,0.064883,0.080727,0.071444


## Evaluasi Base ICF

In [None]:
tetanggaU = 5
tetanggaNI = [5,10,15,18,20,25,30,40,50,100,200]
r1 = 0.8

folder = ["HBF/tetanggaI", "HBR/tetanggaI", "HBX/tetanggaI"]

for tetanggaI in tqdm(tetanggaNI, leave=True):
    gt = joblib.load(os.path.join(path, f'Code/HYBRID/Skenario/GT/Prediksi/{nameFold}', f'GT-{tetanggaU}_{nameFold}.ucf'))
    ucfPred = joblib.load(os.path.join(path, f'Code/HYBRID/Skenario/UCF/Prediksi/{nameFold}', f'vNU-{tetanggaU}_{nameFold}.ucf'))
    icfPred = joblib.load(os.path.join(path, f'Code/HYBRID/Skenario/ICF/Prediksi/{nameFold}', f'vNI-{tetanggaI}_{nameFold}.icf'))
    sistemRekomendasiEvaluasi(gts=gt, ucfPreds=ucfPred, icfPreds=icfPred, tetanggaU=tetanggaU,tetanggaI=tetanggaI, r1=r1, folder=folder)

In [12]:
pathEvaluasiICF = os.path.join(path, 'Code/HYBRID/Skenario/HB/HBF', 'tetanggaI')
evaluasiICF = calculateBestAkurasi(pathEvaluasiICF)

VBox(children=(  0%|          | 0/5 [00:00<?, ?it/s],))

In [13]:
dfBestEvaluasiICF = pd.DataFrame(evaluasiICF, index=[f'{i}Fold' for i in range(1,6)]).T

In [14]:
dfBestEvaluasiICF['Mean'] = dfBestEvaluasiICF.mean(axis=1)
dfBestEvaluasiICF

Unnamed: 0,1Fold,2Fold,3Fold,4Fold,5Fold,Mean
vNU-5_vNI-5_vR-0.8,0.099928,0.160574,0.131227,0.141168,0.127276,0.132034
vNU-5_vNI-10_vR-0.8,0.099373,0.160138,0.131239,0.140578,0.126747,0.131615
vNU-5_vNI-15_vR-0.8,0.098738,0.159773,0.130737,0.139983,0.125621,0.130971
vNU-5_vNI-18_vR-0.8,0.098072,0.159686,0.131305,0.139964,0.125149,0.130835
vNU-5_vNI-20_vR-0.8,0.098346,0.159424,0.130933,0.139474,0.125347,0.130705
vNU-5_vNI-25_vR-0.8,0.098381,0.159314,0.130995,0.138998,0.124793,0.130496
vNU-5_vNI-30_vR-0.8,0.0974,0.159283,0.130451,0.138462,0.124597,0.130039
vNU-5_vNI-40_vR-0.8,0.096947,0.158818,0.130257,0.137725,0.124158,0.129581
vNU-5_vNI-50_vR-0.8,0.096961,0.158725,0.130247,0.138075,0.124406,0.129683
vNU-5_vNI-100_vR-0.8,0.096762,0.158447,0.129953,0.138182,0.12405,0.129479


## Evaluasi Base Hybrid

In [None]:
tetanggaU = 5
tetanggaI = 5
r = np.arange(0.0, 1.1, 0.1).round(1).tolist() 
folder = ["HBF/R"]
for r1 in tqdm(r, leave=True):
    gt = joblib.load(os.path.join(path, f'Code/HYBRID/Skenario/GT/Prediksi/{nameFold}', f'GT-{tetanggaU}_{nameFold}.ucf'))
    ucfPred = joblib.load(os.path.join(path, f'Code/HYBRID/Skenario/UCF/Prediksi/{nameFold}', f'vNU-{tetanggaU}_{nameFold}.ucf'))
    icfPred = joblib.load(os.path.join(path, f'Code/HYBRID/Skenario/ICF/Prediksi/{nameFold}', f'vNI-{tetanggaI}_{nameFold}.icf'))
    sistemRekomendasiEvaluasi(gts=gt, ucfPreds=ucfPred, icfPreds=icfPred, tetanggaU=tetanggaU,tetanggaI=tetanggaI, r1=r1, folder=folder)

In [15]:
pathEvaluasiHBF = os.path.join(path, 'Code/HYBRID/Skenario/HB/HBF', 'R')
evaluasiHBF = calculateBestAkurasi(pathEvaluasiHBF)

VBox(children=(  0%|          | 0/5 [00:00<?, ?it/s],))

In [16]:
dfBestEvaluasiHBF = pd.DataFrame(evaluasiHBF, index=[f'{i}Fold' for i in range(1,6)]).T

In [17]:
dfBestEvaluasiHBF['Mean'] = dfBestEvaluasiHBF.mean(axis=1)
dfBestEvaluasiHBF

Unnamed: 0,1Fold,2Fold,3Fold,4Fold,5Fold,Mean
vNU-5_vNI-5_vR-0.0,0.047829,0.054428,0.054152,0.050571,0.044302,0.050257
vNU-5_vNI-5_vR-0.1,0.052019,0.061104,0.058375,0.054818,0.048592,0.054981
vNU-5_vNI-5_vR-0.2,0.055998,0.068721,0.063518,0.06078,0.054927,0.060789
vNU-5_vNI-5_vR-0.3,0.061702,0.079906,0.069785,0.067706,0.061123,0.068045
vNU-5_vNI-5_vR-0.4,0.069371,0.093406,0.07855,0.077644,0.070166,0.077827
vNU-5_vNI-5_vR-0.5,0.078684,0.109732,0.089741,0.091471,0.083262,0.090578
vNU-5_vNI-5_vR-0.6,0.088618,0.13034,0.103476,0.109882,0.098669,0.106197
vNU-5_vNI-5_vR-0.7,0.09646,0.149722,0.120836,0.128307,0.114495,0.121964
vNU-5_vNI-5_vR-0.8,0.099928,0.160574,0.131227,0.141168,0.127276,0.132034
vNU-5_vNI-5_vR-0.9,0.101534,0.161812,0.135366,0.146037,0.133163,0.135582
