# 1. Library


In [1]:
import numpy as np

---


# 2. Code based on formula


In [None]:
ratings = np.array([
    [np.nan, 1, 2, 2, 5, np.nan, 4, 3, 5]   , 
    [1, 5, 3, np.nan, 2, 3, 4, 3, np.nan], 
    [1, 1, 2, np.nan, 2, 4, 4, 5, np.nan], 
    [3, 2, 2, 3, np.nan, 1, 3, 2, np.nan], 
    [5, 1, 5, 5, 4, 4, 5, 2, np.nan], 
])

In [3]:
ratings.shape

(5, 9)

In [5]:
num_users, num_items = ratings.shape
num_ratings = 5 
alpha = 0.01

## 2.1 User based Prior


In [None]:
# =============================================
# STEP 1: Hitung User Prior (PUP) dan UC
# =============================================
def calculate_pup(ratings, alpha, num_ratings):
    UC = np.sum(~np.isnan(ratings), axis=1) + alpha * num_ratings
    pup = np.zeros((num_users, num_ratings + 1))  # Indeks 0 tidak digunakan
    
    for u in range(num_users):
        for r in range(1, num_ratings + 1):
            count = np.sum(ratings[u, :] == r)
            pup[u, r] = (count + alpha) / UC[u]
    
    return pup, UC

pup, UC = calculate_pup(ratings, alpha, num_ratings)

In [7]:
pup

array([[0.        , 0.14326241, 0.28510638, 0.14326241, 0.14326241,
        0.28510638],
       [0.        , 0.14326241, 0.14326241, 0.42695035, 0.14326241,
        0.14326241],
       [0.        , 0.28510638, 0.28510638, 0.00141844, 0.28510638,
        0.14326241],
       [0.        , 0.14326241, 0.42695035, 0.42695035, 0.00141844,
        0.00141844],
       [0.        , 0.12546584, 0.12546584, 0.00124224, 0.24968944,
        0.49813665]])

In [8]:
UC

array([7.05, 7.05, 7.05, 7.05, 8.05])

## 2.2 Item based Prior


In [9]:
# =============================================
# STEP 2: Hitung Item Prior (PIP) dan IC
# =============================================
def calculate_pip(ratings, alpha, num_ratings):
    IC = np.sum(~np.isnan(ratings), axis=0) + alpha * num_ratings
    pip = np.zeros((num_items, num_ratings + 1))
    
    for i in range(num_items):
        for r in range(1, num_ratings + 1):
            count = np.sum(ratings[:, i] == r)
            pip[i, r] = (count + alpha) / IC[i]
    
    return pip, IC

pip, IC = calculate_pip(ratings, alpha, num_ratings)

In [10]:
pip

array([[0.        , 0.4962963 , 0.00246914, 0.24938272, 0.00246914,
        0.24938272],
       [0.        , 0.5960396 , 0.2       , 0.0019802 , 0.0019802 ,
        0.2       ],
       [0.        , 0.0019802 , 0.5960396 , 0.2       , 0.0019802 ,
        0.2       ],
       [0.        , 0.00327869, 0.33114754, 0.33114754, 0.00327869,
        0.33114754],
       [0.        , 0.00246914, 0.4962963 , 0.00246914, 0.24938272,
        0.24938272],
       [0.        , 0.24938272, 0.00246914, 0.24938272, 0.4962963 ,
        0.00246914],
       [0.        , 0.0019802 , 0.0019802 , 0.2       , 0.5960396 ,
        0.2       ],
       [0.        , 0.0019802 , 0.3980198 , 0.3980198 , 0.0019802 ,
        0.2       ],
       [0.        , 0.00952381, 0.00952381, 0.00952381, 0.00952381,
        0.96190476]])

In [11]:
IC

array([4.05, 5.05, 5.05, 3.05, 4.05, 4.05, 5.05, 5.05, 1.05])

## 2.3 User Based Likelihood


In [12]:
# =============================================
# STEP 3: Hitung Conditional Item Probability (CIP)
# CIP[i, y, j, k] = P(item j diberi rating k | item i diberi rating y)
# =============================================
def calculate_cip(ratings, alpha, num_ratings):
    cip = np.full((num_items, num_ratings+1, num_items, num_ratings+1), alpha)
    
    for i in range(num_items):
        for y in range(1, num_ratings + 1):
            # Ambil user yang memberi rating y ke item i
            users_i_y = np.where(ratings[:, i] == y)[0]
            if len(users_i_y) == 0: continue
            
            for j in range(num_items):
                if i == j: continue
                # Hitung frekuensi rating k di item j oleh user yang sama
                ratings_j = ratings[users_i_y, j]
                for k in range(1, num_ratings + 1):
                    count = np.sum(ratings_j == k)
                    total = np.sum(~np.isnan(ratings_j))
                    cip[i, y, j, k] = (count + alpha) / (total + alpha * num_ratings)
    
    return cip

cip = calculate_cip(ratings, alpha, num_ratings)

In [13]:
cip

array([[[[0.01      , 0.01      , 0.01      , 0.01      , 0.01      ,
          0.01      ],
         [0.01      , 0.01      , 0.01      , 0.01      , 0.01      ,
          0.01      ],
         [0.01      , 0.01      , 0.01      , 0.01      , 0.01      ,
          0.01      ],
         ...,
         [0.01      , 0.01      , 0.01      , 0.01      , 0.01      ,
          0.01      ],
         [0.01      , 0.01      , 0.01      , 0.01      , 0.01      ,
          0.01      ],
         [0.01      , 0.01      , 0.01      , 0.01      , 0.01      ,
          0.01      ]],

        [[0.01      , 0.01      , 0.01      , 0.01      , 0.01      ,
          0.01      ],
         [0.01      , 0.49268293, 0.00487805, 0.00487805, 0.00487805,
          0.49268293],
         [0.01      , 0.00487805, 0.49268293, 0.49268293, 0.00487805,
          0.00487805],
         ...,
         [0.01      , 0.00487805, 0.00487805, 0.00487805, 0.9804878 ,
          0.00487805],
         [0.01      , 0.00487805, 0.0048

## 2.3 Item Based Likelihood


In [14]:
# =============================================
# STEP 4: Hitung Conditional User Probability (CUP)
# CUP[u, y, v, k] = P(user v memberi rating k | user u memberi rating y)
# =============================================
def calculate_cup(ratings, alpha, num_ratings):
    cup = np.full((num_users, num_ratings+1, num_users, num_ratings+1), alpha)
    
    for u in range(num_users):
        for y in range(1, num_ratings + 1):
            # Ambil item yang diberi rating y oleh user u
            items_u_y = np.where(ratings[u, :] == y)[0]
            if len(items_u_y) == 0: continue
            
            for v in range(num_users):
                if u == v: continue
                # Hitung frekuensi rating k oleh user v di item yang sama
                ratings_v = ratings[v, items_u_y]
                for k in range(1, num_ratings + 1):
                    count = np.sum(ratings_v == k)
                    total = np.sum(~np.isnan(ratings_v))
                    cup[u, y, v, k] = (count + alpha) / (total + alpha * num_ratings)
    
    return cup

cup = calculate_cup(ratings, alpha, num_ratings)

In [15]:
cup

array([[[[0.01      , 0.01      , 0.01      , 0.01      , 0.01      ,
          0.01      ],
         [0.01      , 0.01      , 0.01      , 0.01      , 0.01      ,
          0.01      ],
         [0.01      , 0.01      , 0.01      , 0.01      , 0.01      ,
          0.01      ],
         [0.01      , 0.01      , 0.01      , 0.01      , 0.01      ,
          0.01      ],
         [0.01      , 0.01      , 0.01      , 0.01      , 0.01      ,
          0.01      ]],

        [[0.01      , 0.01      , 0.01      , 0.01      , 0.01      ,
          0.01      ],
         [0.01      , 0.00952381, 0.00952381, 0.00952381, 0.00952381,
          0.96190476],
         [0.01      , 0.96190476, 0.00952381, 0.00952381, 0.00952381,
          0.00952381],
         [0.01      , 0.00952381, 0.96190476, 0.00952381, 0.00952381,
          0.00952381],
         [0.01      , 0.96190476, 0.00952381, 0.00952381, 0.00952381,
          0.00952381]],

        [[0.01      , 0.01      , 0.01      , 0.01      , 0.01    

---


# 2. Code based on Algorithm


In [6]:
import numpy as np

ratings = np.array([
    [0, 1, 2, 2, 5, 0, 4, 3, 5], 
    [1, 5, 3, 0, 2, 3, 4, 3, 0], 
    [1, 1, 2, 0, 2, 4, 4, 5, 0], 
    [3, 2, 2, 3, 0, 1, 3, 2, 0], 
    [5, 1, 5, 5, 4, 4, 5, 2, 0], 
])

alpha = 0.01
rating_values = np.array([1, 2, 3, 4, 5]) 
num_ratings = len(rating_values)
num_users, num_items = ratings.shape

PUP = np.ones((num_users, num_ratings)) * alpha  
PIP = np.ones((num_items, num_ratings)) * alpha 
UC = np.ones(num_users) * num_ratings * alpha  
IC = np.ones(num_items) * num_ratings * alpha  

CUP = np.ones((num_users, num_users, num_ratings, num_ratings)) * alpha
CIP = np.ones((num_items, num_items, num_ratings, num_ratings)) * alpha
UVC = np.ones((num_users, num_users, num_ratings)) * num_ratings * alpha
IJC = np.ones((num_items, num_items, num_ratings)) * num_ratings * alpha 

def update_model(ratings):
    for u in range(num_users):
        for i in range(num_items):
            # if ratings[u, i] == 0: 
            #     continue
            y = ratings[u, i] - 1 
            
            PUP[u, y] = (UC[u] * PUP[u, y] + 1) / (UC[u] + 1)
            UC[u] += 1
            
            PIP[i, y] = (IC[i] * PIP[i, y] + 1) / (IC[i] + 1)
            IC[i] += 1
            
            for j in range(num_items):
                if j == i or ratings[u, j] == 0:
                    continue
                k = ratings[u, j] - 1
                CIP[j, i, k, y] = (IJC[j, i, y] * CIP[j, i, k, y] + 1) / (IJC[j, i, y] + 1)
                IJC[j, i, y] += 1
            
            for v in range(num_users):
                if v == u or ratings[v, i] == 0:
                    continue
                k = ratings[v, i] - 1
                CUP[v, u, k, y] = (UVC[v, u, y] * CUP[v, u, k, y] + 1) / (UVC[v, u, y] + 1)
                UVC[v, u, y] += 1

update_model(ratings)


In [7]:
PUP

array([[0.49292683, 0.49888889, 0.13298137, 0.15042553, 0.97192868],
       [0.95285714, 0.2060396 , 0.51357212, 0.15042553, 0.66032565],
       [0.97585366, 0.46635449, 0.01      , 0.29085106, 0.4192081 ],
       [0.17363636, 0.70151818, 0.96953319, 0.01      , 0.29377003],
       [0.49292683, 0.13298137, 0.01      , 0.33727273, 0.98178504]])

---


In [None]:
import numpy as np

In [None]:
ratings = np.array([
    [0, 1, 2, 2, 5, 0, 4, 3, 5], 
    [1, 5, 3, 0, 2, 3, 4, 3, 0], 
    [1, 1, 2, 0, 2, 4, 4, 5, 0], 
    [3, 2, 2, 3, 0, 1, 3, 2, 0], 
    [5, 1, 5, 5, 4, 4, 5, 2, 0], 
], dtype=int)

In [None]:

def init_priors(num_users, num_items, num_ratings, alpha):
   
    pip = np.full((num_users, num_ratings), alpha)    
    pup = np.full((num_items, num_ratings), alpha)
    
    return pip, pup 


In [None]:
def update_item_prior(u, rating, pip, ic): 
    y = rating - 1
    pip[u][y] += 1
    ic[u] += 1
    return pip, ic

In [None]:
def update_user_prior(i, rating, pup, uc): 
    y = rating - 1 
    pup[i][y] += 1
    uc[i] += 1
    return pup, uc

In [None]:
def build_prior_model(ratings, alpha=0.01, num_ratings=5):
    num_users, num_items = ratings.shape
    
    pip, pup = init_priors(num_users, num_items, num_ratings, alpha) 

   
    ic = np.zeros(num_users) + num_ratings*alpha  
    uc = np.zeros(num_items) + num_ratings*alpha 
    
    for u in range(num_users):
        for i in range(num_items):
            rating = ratings[u][i]
            if rating == 0: 
                continue
            
            pip, ic = update_item_prior(u, rating, pip, ic)
            
            pup, uc = update_user_prior(i, rating, pup, uc)
    
    return pip, pup, ic, uc 

In [None]:
pip, pup, ic, uc = build_prior_model(ratings, alpha=0.01, num_ratings=5)

In [None]:
print(pup)
print("======")
print(uc)

In [None]:
print(pip)
print("======")
print(ic)

In [None]:
for u in range(pup.shape[0]):
    for y in range(pup.shape[1]):
        pup[u][y] /= (uc[u] )

for i in range(pip.shape[0]):
    for y in range(pip.shape[1]):
        pip[i][y] /= (ic[i] )

In [None]:
pup

In [None]:
pip

---


---


# Temporary Test (Belum Fix)


In [None]:
ratings = np.array([
    [0, 1, 2, 2, 5, 0, 4, 3, 5], 
    [1, 5, 3, 0, 2, 3, 4, 3, 0], 
    [1, 1, 2, 0, 2, 4, 4, 5, 0], 
    [3, 2, 2, 3, 0, 1, 3, 2, 0], 
    [5, 1, 5, 5, 4, 4, 5, 2, 0], 
], dtype=int)

In [None]:
def init_priors(num_users, num_items, num_ratings, alpha):
    pup = np.full((num_users, num_ratings), alpha)    
    pip = np.full((num_items, num_ratings), alpha)
    
    return pup, pip

In [None]:
def init_conditionals():
    cup = {}  # Format: (user_v, user_u, rating_y) -> probabilities
    cip = {}  # Format: (item_j, item_i, rating_y) -> probabilities
    
    return cup, cip

In [None]:
def update_itembase_prior(u, rating, pup, uc):
    y = rating - 1
    pup[u][y] += 1
    uc[u] += 1
    return pup, uc

In [None]:
def update_userbase_prior(i, rating, pip, ic):
    y = rating - 1 
    pip[i][y] += 1
    ic[i] += 1
    return pip, ic

In [None]:
def update_item_conditionals(u, i, rating, items_rated, cip, ijc, alpha, num_ratings):
    y = rating - 1
    for j in items_rated:
        if j == i: continue 
        
        # Dapatkan rating untuk item j
        rating_j = ratings[u][j] # type: ignore
        k = rating_j - 1
        
        
        key = (j, i, y)
        if key not in cip:
            cip[key] = np.full(num_ratings, alpha)
            ijc[key] = num_ratings * alpha
        
        cip[key][k] += 1
        ijc[key] += 1
    
    return cip, ijc

In [None]:
def update_user_conditionals(u, i, rating, users_rated, cup, uvc, alpha, num_ratings):
    y = rating - 1
    for v in users_rated:
        if v == u: continue 
        
        # Dapatkan rating dari user v
        rating_v = ratings[v][i] # type: ignore
        k = rating_v - 1
        
        # Update counter
        key = (v, u, y)
        if key not in cup:
            cup[key] = np.full(num_ratings, alpha)
            uvc[key] = num_ratings * alpha
        
        cup[key][k] += 1
        uvc[key] += 1
    
    return cup, uvc

In [None]:
def normalize_probabilities(pup, pip, cup, cip, uc, ic, ijc, uvc):

    for u in range(pup.shape[0]):
        for y in range(pup.shape[1]):
            pup[u][y] /= (uc[u] + 1e-9)

    for i in range(pip.shape[0]):
        for y in range(pip.shape[1]):
            pip[i][y] /= (ic[i] + 1e-9)
    
    for key in cip:
        total = ijc[key]
        cip[key] = cip[key] / (total + 1e-9)
    
    for key in cup:
        total = uvc[key]
        cup[key] = cup[key] / (total + 1e-9)
    
    return pup, pip, cup, cip

In [None]:
def build_nbcf_model(ratings, alpha=0.01, num_ratings=5):
    num_users, num_items = ratings.shape
    
    pup, pip = init_priors(num_users, num_items, num_ratings, alpha)
    cup, cip = init_conditionals()
    

    uc = np.zeros(num_users) + num_ratings*alpha  # User counter
    ic = np.zeros(num_items) + num_ratings*alpha   # Item counter
    ijc = {}  # Item-item counter
    uvc = {}  # User-user counter
    
    for u in range(num_users):
        for i in range(num_items):
            rating = ratings[u][i]
            # print(rating)
            if rating == 0: 
                continue
            
        
            pup, uc = update_userbase_prior(u, rating, pup, uc)
            pip, ic = update_itembase_prior(i, rating, pip, ic)
            
            items_rated = np.where(ratings[u] != 0)[0]  # Item yang sudah di-rating user
            cip, ijc = update_item_conditionals(u, i, rating, items_rated, cip, ijc, alpha, num_ratings)
            
            users_rated = np.where(ratings[:, i] != 0)[0]  # User yang sudah rating item ini
            cup, uvc = update_user_conditionals(u, i, rating, users_rated, cup, uvc, alpha, num_ratings)
    
    # pup, pip, cup, cip = normalize_probabilities(pup, pip, cup, cip, uc, ic, ijc, uvc)
    
    return pup, pip, cup, cip

In [None]:
pup, pip, cup, cip = build_nbcf_model(ratings, alpha=0.01)

In [None]:
cup

---
