In [11]:
import numpy as np

# Create User-Rating (Utility) Matrix

In [12]:
def create_utility_matrix(idx_row, idx_col, data, size):
    utility_matrix = np.empty(size)
    utility_matrix.fill(np.NaN)

    for i in range(len(data)):
        utility_matrix[idx_row[i], idx_col[i]] = data[i]

    return utility_matrix

In [13]:
# Fill util matrix
row = np.array([0, 0, 0, 0, 0, 
                1, 1, 1, 1, 1, 1,
                2, 2, 2, 2, 2, 2, 2, 2,
                3, 3, 3, 3, 3,
                4, 4, 4, 4, 4, 4,
                5, 5, 5, 5, 5])
col = np.array([0, 2, 5, 8, 10,
                2, 3, 6, 9, 10, 11,
                0, 1, 3, 4, 6, 8, 9, 10,
                1, 2, 4, 7, 10,
                2, 3, 4, 5, 10, 11,
                0, 2, 4, 7, 10])
data = np.array([1, 3, 5, 5, 4,
                 5, 4, 4, 2, 1, 3,
                 2, 4, 1, 2, 3, 4, 3, 5,
                 2, 4, 5, 4, 2,
                 4, 3, 4, 2, 2, 5,
                 1, 3, 3, 2, 4])

# Create utility matrix
util_matrix = create_utility_matrix(idx_row = row, 
                                    idx_col = col, 
                                    data = data, 
                                    size = (6, 12))

util_matrix

array([[ 1., nan,  3., nan, nan,  5., nan, nan,  5., nan,  4., nan],
       [nan, nan,  5.,  4., nan, nan,  4., nan, nan,  2.,  1.,  3.],
       [ 2.,  4., nan,  1.,  2., nan,  3., nan,  4.,  3.,  5., nan],
       [nan,  2.,  4., nan,  5., nan, nan,  4., nan, nan,  2., nan],
       [nan, nan,  4.,  3.,  4.,  2., nan, nan, nan, nan,  2.,  5.],
       [ 1., nan,  3., nan,  3., nan, nan,  2., nan, nan,  4., nan]])

# Distance Functions

In [14]:
def calc_similarity(x, y, types):
    x = x.copy()
    y = y.copy()
    n = len(x)

    if types == "jaccard":
        r_x = np.isnull(x)
        r_y = np.isnull(y)

        intersect_ = np.sum(~(r_x | r_y))
        union_ = np.sum(~(r_x & r_y))

        similarity = intersect_ / union_

    elif types == "cosine":
        num_ = np.dot(x, y)
        den_ = np.linalg.norm(x) * np.linalg.norm(y)

        similarity = num_/den_

    return similarity

# Find Neighbors

In [15]:
def normalize_utility_matrix(utility_matrix):
    utility_matrix = utility_matrix.copy()
    n_items = utility_matrix.shape[0]
    
    # cari mean
    m_item = np.nanmean(utility_matrix, axis = 1)

    # perbaiki row
    for i in range(n_items):
        data_i = utility_matrix[i]
        utility_matrix[i] = np.where(np.isnan(data_i), 0.0, data_i-m_item[i])

    return utility_matrix


In [16]:
def find_similarity(idx_item, utility_matrix):
    n_items, n_users = utility_matrix.shape

    # Find item-item similarity
    # iterate over rows
    target = utility_matrix[idx_item, :]

    sim_val = np.zeros(n_items)
    for i in range(n_items):
        data_i = utility_matrix[i, :]
        sim_i = calc_similarity(target, data_i, "cosine")
        sim_val[i] = sim_i

    return sim_val

# Predict Rating

In [17]:
def predict_rating(idx_user, idx_item, n_neighbors, utility_matrix):
    # overall mean item rating
    mu = np.nanmean(utility_matrix)
    b_x = np.nanmean(utility_matrix[:, idx_user]) - mu
    b_i = np.nanstd(utility_matrix[idx_item, :])
    b_xi = mu + b_x + b_i

    # find rating of targetted user
    rating_user = utility_matrix[:, idx_user]

    # Normalize utility_matrix
    util_matrix = normalize_utility_matrix(utility_matrix)

    # Find item similarity
    sim_items = find_similarity(idx_item, util_matrix)

    # Find neighbors
    idx_neighbors = np.argsort(sim_items)[::-1][1:n_neighbors+1]

    # Calculate rating
    num_ = 0
    den_ = 0
    for j in idx_neighbors:
        b_j = np.nanstd(utility_matrix[j, :])
        b_xj = mu + b_x + b_j
        num_ += sim_items[j] * (rating_user[j] - b_xj)
        den_ += sim_items[j]

    rating_user_item = b_xi + num_/den_

    return rating_user_item


# Show Times

In [18]:
idx_user = 4
idx_item = 0

pred_rating = predict_rating(idx_user = idx_user,
                             idx_item = idx_item,
                             n_neighbors = 2,
                             utility_matrix = util_matrix)
print(f"rating of user-{idx_user+1} to item-{idx_item+1}, r_({idx_item+1},{idx_user+1}) : {pred_rating:.1f}")

rating of user-5 to item-1, r_(1,5) : 3.0


In [19]:
idx_user = 0
idx_item = 0

pred_rating = predict_rating(idx_user = idx_user,
                             idx_item = idx_item,
                             n_neighbors = 2,
                             utility_matrix = util_matrix)
print(f"rating of user-{idx_user+1} to item-{idx_item+1}, r_({idx_item+1},{idx_user+1}) : {pred_rating:.1f}")

rating of user-1 to item-1, r_(1,1) : 1.8


*comment: user-6 has not been previously rated some/all of the most similar item to item-2* --> first rater

In [21]:
idx_user = 2

for i in range(6):
    pred_rating = predict_rating(idx_user = idx_user,
                                 idx_item = i,
                                 n_neighbors = 2,
                                 utility_matrix = util_matrix)
    print(f"rating of user-{idx_user+1} to item-{i+1}, r_({i+1},{idx_user+1}) : {pred_rating:.1f}")

rating of user-3 to item-1, r_(1,3) : nan
rating of user-3 to item-2, r_(2,3) : 4.2
rating of user-3 to item-3, r_(3,3) : 3.0
rating of user-3 to item-4, r_(4,3) : 4.5
rating of user-3 to item-5, r_(5,3) : 4.3
rating of user-3 to item-6, r_(6,3) : nan
