In [23]:
import numpy as np
import math
from math import prod
from decimal import Decimal, getcontext
getcontext().prec = 13

---


# Functions


## Compute User & item prior


In [None]:
def compute_priors(ratings,plausible_rating, alpha=0.01, R=8):
    num_users = len(ratings)
    num_items = len(ratings[0])
    rating_values = plausible_rating

    prior_userbased = [[0 for _ in range(num_items)] for _ in rating_values]
    prior_itembased = [[0 for _ in range(num_users)] for _ in rating_values]

    y_index = 0
    for y in rating_values:
        y_index = y_index

        # Prior user-based (per item j)
        for j in range(num_items):
            count_y = 0
            count_nonzero = 0
            for u in range(num_users):
                r = ratings[u][j]   
                if r != 0:
                    count_nonzero += 1
                    if r == y:
                        count_y += 1
            prior_userbased[y_index][j] = (count_y + alpha) / (count_nonzero + alpha * R)

        # Prior item-based (per user u)
        for u in range(num_users):
            count_y = 0
            count_nonzero = 0
            for j in range(num_items):
                r = ratings[u][j]
                if r != 0:
                    count_nonzero += 1
                    if r == y:
                        count_y += 1
            prior_itembased[y_index][u] = (count_y + alpha) / (count_nonzero + alpha * R)
        y_index = y_index + 1

    return prior_userbased, prior_itembased

## Compute likelihood User


In [None]:
def compute_likelihood_userbased(ratings, u, i, y, alpha=0.01, R=8):
    num_users = len(ratings)
    num_items = len(ratings[0])
    Iu = [j for j in range(num_items) if j != i and ratings[u][j] != 0]
    product = Decimal(1.0)

    for j in Iu:
        k = ratings[u][j]
        count_joint = 0
        count_cond = 0
        for v in range(num_users):
            if ratings[v][i] == y:
                if ratings[v][j] != 0:
                    count_cond += 1
                    if ratings[v][j] == k:
                        count_joint += 1
        prob = (count_joint + alpha) / (count_cond + alpha * R)
        product *= Decimal(prob)
        # prob_list.append(prob)
    # print("======")
    # print(product, end="\n\n")

    return product


## Compute likelihood Item


In [None]:
def compute_likelihood_itembased(ratings, u, i, y, alpha=0.01, R=8):
    num_users = len(ratings)
    num_items = len(ratings[0])
    Ui = [v for v in range(num_users) if v != u and ratings[v][i] != 0]
    product = Decimal(1.0)

    for v in Ui:
        k = ratings[v][i]
        count_joint = 0
        count_cond = 0
        for j in range(num_items):
            if ratings[u][j] == y:
                if ratings[v][j] != 0:
                    count_cond += 1
                    if ratings[v][j] == k:
                        count_joint += 1
        prob = (count_joint + alpha) / (count_cond + alpha * R)
        product *= Decimal(prob)
        # print(prob)
        # print("======")
        # prob_list.append(prob)

    return product


# Predict Functions


In [None]:
def predict_rating(ratings, u, i, prior_userbased, prior_itembased,plausible_rating, alpha=0.01):
    scores = []
    all_likelihood_user = []  
    all_likelihood_item = []
    R = len(plausible_rating)  

    y_index = 0
    for y in plausible_rating:
        prior_user = prior_userbased[y_index][i]
        prior_item = prior_itembased[y_index][u]
    

        likelihood_user = compute_likelihood_userbased(ratings, u, i, y, alpha, R)
        likelihood_item = compute_likelihood_itembased(ratings, u, i, y, alpha, R)
        
        # simpan sebagai justifikasi
        all_likelihood_user.append(likelihood_user)
        all_likelihood_item.append(likelihood_item)
        
        
        len_Iu = sum(1 for j in range(len(ratings[0])) if ratings[u][j] != 0)
        len_Ui = sum(1 for v in range(len(ratings)) if ratings[v][i] != 0)

        score_item = (Decimal(prior_item) * likelihood_item) ** Decimal(1 / (1 + len_Ui)) if len_Ui > 0 else 0
        score_user = (Decimal(prior_user) * likelihood_user) ** Decimal(1 / (1 + len_Iu)) if len_Iu > 0 else 0

        score = score_user * score_item

        scores.append(score)
        y_index += 1

    predicted_rating = plausible_rating[scores.index(max(scores))]

    return predicted_rating, {
        'scores': scores,
        'likelihood_user': all_likelihood_user,
        'likelihood_item': all_likelihood_item
    }


## Load Data Full


Untuk justifikasi tain test split


In [28]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

In [29]:

def load_filmtrust_full(path):
    df = pd.read_csv(path, sep=' ', engine='python', names=['user', 'item', 'rating'])

    num_users = df['user'].nunique()
    num_items = df['item'].nunique()

    user_map = {uid: idx for idx, uid in enumerate(df['user'].unique())}
    item_map = {iid: idx for idx, iid in enumerate(df['item'].unique())}

    ratings = np.zeros((num_users, num_items))
    for _, row in df.iterrows():
        u = user_map[row['user']]
        i = item_map[row['item']]
        ratings[u][i] = row['rating']

    return ratings

ratings_full = load_filmtrust_full("./film-trust/ratings.txt")


In [30]:
ratings_full

array([[2. , 4. , 3.5, ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 4. , 0. , ..., 0. , 0. , 0. ],
       [1.5, 3. , 2. , ..., 0. , 0. , 0. ]], shape=(1508, 2071))

# Train Test Split


In [44]:
full_df = pd.read_csv('./film-trust/ratings.txt', sep=' ', names=['user', 'item', 'rating'])
user_ids = full_df['user'].unique()
item_ids = full_df['item'].unique()
user_map = {uid: idx for idx, uid in enumerate(user_ids)}
item_map = {iid: idx for idx, iid in enumerate(item_ids)}

# Split dataset
full_df['user_idx'] = full_df['user'].map(user_map)
full_df['item_idx'] = full_df['item'].map(item_map)
train_df, test_df = train_test_split(full_df[['user_idx', 'item_idx', 'rating']], test_size=0.2, random_state=42)

print(f"\nTrain set: {len(train_df)} ratings ({len(train_df)/len(full_df)*100:.1f}%)")
print(f"Test set: {len(test_df)} ratings ({len(test_df)/len(full_df)*100:.1f}%)")
train_df.to_csv('./film-trust/train.txt', sep=' ', header=False, index=False)
test_df.to_csv('./film-trust/test.txt', sep=' ', header=False, index=False)


Train set: 28397 ratings (80.0%)
Test set: 7100 ratings (20.0%)


## Get Plausible Ratings


In [None]:
temp_df = pd.read_csv("./film-trust/ratings.txt", sep=' ', engine='python', names=['rating'])
plausible_rating = temp_df['rating'].unique()
plausible_rating.sort()
plausible_rating

array([2. , 4. , 3.5, 3. , 2.5, 0.5, 1. , 1.5])

In [33]:
pR = plausible_rating
num_r = len(pR)
num_r

8

## Load train and test data


In [34]:
def load_filmtrust_train_make_matrix(path, user_map, item_map):
    df = pd.read_csv(path, sep=' ', engine='python', names=['user_idx', 'item_idx', 'rating'])

    num_users = len(user_map)
    num_items = len(item_map)

    ratings = np.zeros((num_users, num_items))
    for _, row in df.iterrows():
        u = int(row['user_idx']) 
        i = int(row['item_idx']) 
        ratings[u][i] = row['rating']

    return ratings


In [35]:
ratings_train = load_filmtrust_train_make_matrix('./film-trust/train.txt', user_map, item_map)

In [36]:
ratings_train

array([[2. , 4. , 3.5, ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 4. , 0. , ..., 0. , 0. , 0. ],
       [1.5, 3. , 2. , ..., 0. , 0. , 0. ]], shape=(1508, 2071))

In [37]:
def load_test_ratings(path):
    data = np.loadtxt(path, dtype={'names': ('u', 'i', 'r'), 'formats': (int, int, float)})
    test = np.array(
        [(int(row[0]), int(row[1]), float(row[2])) for row in data],
        dtype=[('u', int), ('i', int), ('r', float)]
    )
    return test


In [38]:
test_set = load_test_ratings("./film-trust/test.txt")

In [39]:
test_set

array([( 470,   5, 3.5), (1033, 902, 3. ), (1248, 256, 3. ), ...,
       (1082, 256, 2. ), ( 570,  11, 4. ), ( 525,   3, 3. )],
      shape=(7100,), dtype=[('u', '<i8'), ('i', '<i8'), ('r', '<f8')])

In [40]:
# y_index = 0
# all_likelihood_user = []
# all_likelihood_item = []
# for y in pR:
#     likelihood_user = compute_likelihood_userbased(ratings_train, 0, 10, y, 0.01, 8)
#     likelihood_item = compute_likelihood_itembased(ratings_train, 0, 10, y, 0.01, 8)
#     all_likelihood_user.append(likelihood_user)
#     all_likelihood_item.append(likelihood_item)
#     y_index = y_index + 1


## Precompute User & Item Prior


In [41]:

prior_userbased, prior_itembased = compute_priors(ratings_train,pR)

In [42]:
prior_userbased

[[0.0945383115732262,
  0.08093762001426455,
  0.08149996604875398,
  0.05650749416283799,
  0.05937894190101654,
  0.06516936671575847,
  0.07220724839967271,
  0.059255403173691386,
  0.052220526823000324,
  0.13445251058681185,
  0.0342651630921642,
  0.11092342342342343,
  0.07631779741845234,
  0.125,
  0.001968503937007874,
  0.0024509803921568627,
  0.08613592573130145,
  0.125,
  0.4855769230769231,
  0.1426553672316384,
  0.0032467532467532465,
  0.9351851851851851,
  0.004807692307692308,
  0.0032467532467532465,
  0.009259259259259259,
  0.0009025270758122744,
  0.4855769230769231,
  0.16611842105263158,
  0.24754901960784315,
  0.004807692307692308,
  0.09115523465703972,
  0.9351851851851851,
  0.004807692307692308,
  0.004807692307692308,
  0.009259259259259259,
  0.4855769230769231,
  0.9351851851851851,
  0.3279220779220779,
  0.004807692307692308,
  0.009259259259259259,
  0.009259259259259259,
  0.0032467532467532465,
  0.0024509803921568627,
  0.004807692307692308,
 

In [43]:
prior_itembased

[[0.11123348017621146,
  0.9351851851851851,
  0.07929058995611896,
  0.001968503937007874,
  0.009259259259259259,
  0.0032467532467532465,
  0.09115523465703972,
  0.004807692307692308,
  0.009259259259259259,
  0.001968503937007874,
  0.0032467532467532465,
  0.001968503937007874,
  0.0008278145695364238,
  0.0014124293785310734,
  0.125,
  0.11123348017621146,
  0.009259259259259259,
  0.125,
  0.11123348017621146,
  0.4855769230769231,
  0.00026260504201680677,
  0.0011013215859030838,
  0.1426553672316384,
  0.004807692307692308,
  0.14629990262901654,
  0.0032467532467532465,
  0.10534591194968554,
  0.21601941747572817,
  0.027238403451995685,
  0.04776615969581749,
  0.19881889763779528,
  0.0011013215859030838,
  0.1426553672316384,
  0.26591511936339524,
  0.19881889763779528,
  0.00041528239202657814,
  0.023444753946146705,
  0.10004990019960079,
  0.0008278145695364238,
  0.009259259259259259,
  0.19881889763779528,
  0.001968503937007874,
  0.19881889763779528,
  0.00196

In [47]:
pred, _ = predict_rating(ratings_train, 1037 ,6, prior_userbased, prior_itembased,plausible_rating = pR)
_


{'scores': [Decimal('0.1158065813251'),
  Decimal('0.1079453491863'),
  Decimal('0.1110624313170'),
  Decimal('0.1259315126142'),
  Decimal('0.1236587616836'),
  Decimal('0.02628047868470'),
  Decimal('0.04041058762262'),
  Decimal('0.05716386647242')],
 'likelihood_user': [Decimal('4.953941841579E-31'),
  Decimal('1.042355391833E-32'),
  Decimal('4.075230125741E-32'),
  Decimal('2.936780571332E-30'),
  Decimal('3.024733928038E-30'),
  Decimal('4.606333805309E-52'),
  Decimal('7.250502927260E-46'),
  Decimal('4.674104564328E-41')],
 'likelihood_item': [Decimal('1.008404276527E-1028'),
  Decimal('6.913488986211E-1068'),
  Decimal('2.401618724702E-1115'),
  Decimal('1.291350023815E-1099'),
  Decimal('2.247207083791E-1049'),
  Decimal('3.405813085030E-751'),
  Decimal('3.405813085030E-751'),
  Decimal('2.129323099769E-1118')]}

In [48]:
pred

np.float64(3.0)

## Loop prediction for each data test


In [None]:


# Prediksi dan evaluasi
from tqdm import tqdm

y_true = []
y_pred = []

for u, i, actual in tqdm(test_set):
    pred, _ = predict_rating(ratings_train, u, i, prior_userbased, prior_itembased,plausible_rating= pR)
    y_true.append(actual)
    y_pred.append(pred)

## Export to evaluation


In [None]:
import pandas as pd

df_results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
df_results.to_csv('./film-trust/predictions.csv', index=False)

In [None]:
df_test = pd.DataFrame(test_set)
df_test.to_csv('test_set.csv', index=False)

In [None]:

df_full = pd.DataFrame(ratings_full)
df_full.to_csv('ratings_full.csv', index=False)

df_train = pd.DataFrame(ratings_train)
df_train.to_csv('ratings_train.csv', index=False)

df_test = pd.DataFrame(test_set)
df_test.to_csv('test_set.csv', index=False)



In [None]:
df_prior_userbased = pd.DataFrame(prior_userbased)
df_prior_userbased.to_csv('prior_userbased.csv', index=False)

df_prior_itembased = pd.DataFrame(prior_itembased)
df_prior_itembased.to_csv('prior_itembased.csv', index=False)



---


In [None]:

y_pred = pd.read_csv('./film-trust/predictions.csv')['y_pred'].tolist()
y_true = pd.read_csv('./film-trust/predictions.csv')['y_true'].tolist()

## Using Library


In [54]:
from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error(y_true, y_pred)
print("MAE :", mae)


MAE : 0.9229577464788732


## Using Manual


### Mae Dari rumus PPT


In [59]:

total_error = 0
n = len(y_true)

for actual, pred in zip(y_true, y_pred):
    total_error += abs(actual - pred)

mae = total_error / n
print(f"MAE : {mae}")

MAE : 0.9229577464788732


### Mae Dari rumus Paper


In [55]:
user_data = {}

# proses penyusunan ulang rate predict & rate asli agar mudah diakses
for i in range(len(test_set)): # loop sebanyak panjang test set min (1508)
    user_id, _, actual = test_set[i] # var penampung user & rate asli
    # print(user_id,actual)
    predicted = y_pred[i] # var penampung predict rating
    if user_id not in user_data: # cek apakah user id U ada di user data agar menghindari error oengisian kosong
        user_data[user_id] = []
    
    user_data[user_id].append((actual, predicted)) # tambah rating asli $ predict ke user data


#penghitungan MAE
mae_total = 0 # MAE
for user_id in user_data: # akses setiap user pada user data
    total_abs_error = 0 # penampung nilai error mutlak/user
    ratings = user_data[user_id] # ambil nilai rating pred & actual
    
    for actual, predicted in ratings: # indexing nilai rating pred & actual setiap item pada user U
        total_abs_error += abs(actual - predicted) # r u,i (pred) - r u,i (actual)
        print(f"pred {predicted}, actual {actual}")
    
    #MAEu
    mae_user = total_abs_error / len(ratings) #hitung MAEu

    mae_total += mae_user # tamping MAEu ke MAE untuk proses perhitungan MAE nantinya
    print(f"User {user_id}: MAE = {mae_user} (dari {len(ratings)} item)")



pred 3.5, actual 3.5
pred 2.5, actual 2.5
pred 3.0, actual 2.0
pred 2.5, actual 2.0
User 470: MAE = 0.375 (dari 4 item)
pred 0.5, actual 3.0
pred 0.5, actual 4.0
pred 0.5, actual 4.0
pred 0.5, actual 3.0
pred 4.0, actual 2.5
pred 0.5, actual 3.0
pred 3.5, actual 3.5
pred 0.5, actual 4.0
pred 3.5, actual 2.5
pred 4.0, actual 3.0
pred 0.5, actual 4.0
pred 1.0, actual 4.0
pred 2.5, actual 4.0
pred 3.5, actual 3.5
pred 0.5, actual 3.0
User 1033: MAE = 2.1333333333333333 (dari 15 item)
pred 3.5, actual 3.0
pred 3.5, actual 3.0
pred 0.5, actual 3.0
pred 0.5, actual 3.5
pred 0.5, actual 1.5
pred 3.5, actual 3.0
pred 3.0, actual 3.5
pred 3.0, actual 3.0
pred 3.0, actual 3.5
pred 0.5, actual 1.0
pred 3.5, actual 3.5
pred 0.5, actual 4.0
pred 4.0, actual 0.5
pred 0.5, actual 3.5
pred 0.5, actual 2.5
pred 2.5, actual 4.0
pred 3.0, actual 3.0
pred 0.5, actual 2.0
pred 0.5, actual 1.5
pred 3.0, actual 3.0
pred 3.5, actual 4.0
pred 0.5, actual 1.0
pred 4.0, actual 1.5
pred 0.5, actual 4.0
pred 0.5, 

In [56]:
# Langkah 3: hitung MAE keseluruhan dari rata-rata MAE user
overall_mae = mae_total / len(user_data)
# print(y_pred)
print(f"MAE Tot : {overall_mae}")

MAE Tot : 0.8500413081877337
