In [1]:
# import

import pandas as pd 
import numpy as np 
import math as math
import joblib
from joblib import dump
import os
import time

In [2]:
import numpy as np
import pandas as pd

def TetanggaK4(target_user, target_item, rating_matrix_np, similarity_np, k, jenis="user-based"):
    """
    Mengembalikan indeks K tetangga terdekat (user atau item) berdasarkan similarity.
    """
    if jenis == "user-based":
        # Ambil semua rating terhadap item target
        item_ratings = rating_matrix_np[:, target_item]
        # User yang pernah memberi rating terhadap item tersebut (kecuali user target)
        tetangga_idx = np.where((item_ratings != 0) & (np.arange(len(item_ratings)) != target_user))[0]
        # Ambil skor similarity user-user
        sim_scores = similarity_np[target_user, tetangga_idx]
    else:  # item-based
        # Di item-based, user tetap user, item adalah target
        user_ratings = rating_matrix_np[target_user, :]
        # Cari item lain yang pernah diberi rating oleh user target (selain item target)
        tetangga_idx = np.where((user_ratings != 0) & (np.arange(rating_matrix_np.shape[1]) != target_item))[0]
        # Ambil skor similarity item-item
        sim_scores = similarity_np[tetangga_idx, target_item]
    if len(sim_scores) == 0:
        return []
    # Urutkan tetangga berdasarkan similarity tertinggi
    sorted_idx = np.argsort(-sim_scores)[:k]
    return tetangga_idx[sorted_idx]


def prediksiCF_v2_optimized(RatingMatriks, similarityFunction, mean, meanCen, user, item, k=2, jenis="user-based"):
    """
    Menghitung prediksi rating menggunakan Collaborative Filtering berbasis user/item, versi optimal.
    """
    # Cache ke bentuk NumPy
    rating_np = RatingMatriks.values
    similarity_np = similarityFunction.values
    mean_np = mean.values
    meanCen_np = meanCen.values
    # Set indeks untuk pencarian tetangga
    target_user = user
    target_item = item
    mean_value = mean_np[user] if jenis == "user-based" else mean_np[item]
    # Cari tetangga
    tetangga = TetanggaK4(target_user, target_item, rating_np, similarity_np, k, jenis)
    if len(tetangga) == 0:
        return float(mean_value)
    # Ambil similarity dan mean centered rating
    if jenis == "user-based":
        sim = similarity_np[user, tetangga]
        mean_cen = meanCen_np[tetangga, item]
    else:  # item-based
        sim = similarity_np[tetangga, item]
        mean_cen = meanCen_np[user, tetangga]
    pembilang = np.dot(mean_cen, sim)
    penyebut = np.sum(np.abs(sim))
    prediksi = mean_value + (pembilang / penyebut) if penyebut != 0 else mean_value
    return float(prediksi)


def prediksi_semua_matriks2(RatingMatriks, similarityFunction, mean, meanCen, k=2, jenis="user-based"):
    """
    Mengisi semua rating kosong dalam matriks dengan hasil prediksi CF.
    """
    hasil_matriks = RatingMatriks.copy()
    num_users, num_items = hasil_matriks.shape

    for user in range(num_users):
        for item in range(num_items):
            if hasil_matriks.iloc[user, item] == 0:
                pred_rating = prediksiCF_v2_optimized(
                    RatingMatriks=RatingMatriks,
                    similarityFunction=similarityFunction,
                    mean=mean,
                    meanCen=meanCen,
                    user=user,
                    item=item,
                    k=k,
                    jenis=jenis
                )
                hasil_matriks.iloc[user, item] = pred_rating
    return hasil_matriks


In [3]:
def simpan_semua_prediksi_k(
    RatingMatriks, similarityFunction, mean, meanCen,
    variasi_k, jenis="item-based",
    folder_output="../case/prediksiModel/RJ/ub/2"
):
    for k in variasi_k:
        # Hitung prediksi
        prediksi = prediksi_semua_matriks2(
            RatingMatriks=RatingMatriks,
            similarityFunction=similarityFunction,
            mean=mean,
            meanCen=meanCen,
            k=k,
            jenis=jenis
        )

        # Siapkan path file
        path_file = os.path.join(folder_output, f"{k}.joblib")
        os.makedirs(folder_output, exist_ok=True)

        # Simpan hasil prediksi
        joblib.dump(prediksi, path_file)
        print(f"✔️ Prediksi untuk k={k} disimpan di: {path_file}")

In [12]:
ratingMatriks = joblib.load("../case/ratingMatriks/u5.joblib")
similartias = joblib.load("../case/sim/relevantJ2/ib/simRJFold5.joblib")
mean = joblib.load("../case/mean/ib/meanFold5.joblib")
meanCen = joblib.load("../case/meanCen/ib/meanCen5.joblib")

In [13]:
simpan_semua_prediksi_k(
    RatingMatriks=ratingMatriks,
    similarityFunction=similartias,
    mean=mean,
    meanCen=meanCen,
    variasi_k=[5, 10, 15, 18, 20, 25, 30, 40, 50, 100, 200],
    jenis="item-based",
    folder_output="../case/prediksiModel/RJ/ib/5"
)

✔️ Prediksi untuk k=5 disimpan di: ../case/prediksiModel/RJ/ib/5\5.joblib
✔️ Prediksi untuk k=10 disimpan di: ../case/prediksiModel/RJ/ib/5\10.joblib
✔️ Prediksi untuk k=15 disimpan di: ../case/prediksiModel/RJ/ib/5\15.joblib
✔️ Prediksi untuk k=18 disimpan di: ../case/prediksiModel/RJ/ib/5\18.joblib
✔️ Prediksi untuk k=20 disimpan di: ../case/prediksiModel/RJ/ib/5\20.joblib
✔️ Prediksi untuk k=25 disimpan di: ../case/prediksiModel/RJ/ib/5\25.joblib
✔️ Prediksi untuk k=30 disimpan di: ../case/prediksiModel/RJ/ib/5\30.joblib
✔️ Prediksi untuk k=40 disimpan di: ../case/prediksiModel/RJ/ib/5\40.joblib
✔️ Prediksi untuk k=50 disimpan di: ../case/prediksiModel/RJ/ib/5\50.joblib
✔️ Prediksi untuk k=100 disimpan di: ../case/prediksiModel/RJ/ib/5\100.joblib
✔️ Prediksi untuk k=200 disimpan di: ../case/prediksiModel/RJ/ib/5\200.joblib
