# Simple Item-based Nearest Neighbor

Berdasarkan artikel: https://blog.ariflaksito.net/2021/07/memahami-collaborative-filtering-di.html

In [468]:
import pandas as pd

In [469]:
# Memuat dump data rating
data = [['Arif','Item_1', 2],['Arif','Item_3',3],
                       ['Bob','Item_1',5],['Bob','Item_2',2],
                       ['Clark','Item_1',3],['Clark','Item_2',3],['Clark','Item_3',1],
                       ['Don','Item_2',2],['Don','Item_3',2]]
ratings = pd.DataFrame(data, columns=['User', 'Item', 'Rating'])

In [470]:
ratings

Unnamed: 0,User,Item,Rating
0,Arif,Item_1,2
1,Arif,Item_3,3
2,Bob,Item_1,5
3,Bob,Item_2,2
4,Clark,Item_1,3
5,Clark,Item_2,3
6,Clark,Item_3,1
7,Don,Item_2,2
8,Don,Item_3,2


In [471]:
# diubah menjadi matrix user - item
rating_matrix = ratings.pivot_table(index='User', columns='Item', values='Rating')
rating_matrix = rating_matrix.fillna(0).T
rating_matrix

User,Arif,Bob,Clark,Don
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Item_1,2.0,5.0,3.0,0.0
Item_2,0.0,2.0,3.0,2.0
Item_3,3.0,0.0,1.0,2.0


In [472]:
from sklearn.metrics.pairwise import cosine_similarity

# fungsi untuk mencari nilai similarity antara 2 item
def sim(x, y): 
    
    a = pd.DataFrame(x)
    b = pd.DataFrame(y)
    
    df = pd.concat([a,b])
    zero = df.all()
    
    newdf = df[df.columns[zero]]
    sim = cosine_similarity(newdf)
    
    return sim[0][1]

In [473]:
i1 = rating_matrix[0:1].to_numpy()
i2 = rating_matrix[1:2].to_numpy()
i3 = rating_matrix[2:3].to_numpy()

In [474]:
sim(i1, i2)

0.9037378388935386

In [475]:
sim(i2, i3)

0.8682431421244592

In [476]:
sim(i1, i3)

0.7893522173763263

In [477]:
rating_matrix

User,Arif,Bob,Clark,Don
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Item_1,2.0,5.0,3.0,0.0
Item_2,0.0,2.0,3.0,2.0
Item_3,3.0,0.0,1.0,2.0


In [478]:
rating_matrix.T

Item,Item_1,Item_2,Item_3
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Arif,2.0,0.0,3.0
Bob,5.0,2.0,0.0
Clark,3.0,3.0,1.0
Don,0.0,2.0,2.0


In [479]:
import numpy as np

def predict(df):
    
    dft = df.T
    m,n = df.shape
    
    col_item = []
    
    for x in range(m):
        col_item.append(df[x:x+1].to_numpy())
    
    pred = []
    for i in range(n):
        row_user = dft[i:i+1].to_numpy()
        zero = np.where(row_user == 0)
        arr_zero = zero[1]
        
        if len(arr_zero) > 0:
            
            a = 0
            b = 0
            
            for j in range(m):
                if row_user[0][j] > 0:
                    a += row_user[0][j] * sim(col_item[j], col_item[arr_zero[0]])
                    b += sim(col_item[j], col_item[arr_zero[0]])
                else:
                    a += 0
                    b += 0
                    
            pred.append(a/b)        
    
    return pred   


In [480]:
predict(rating_matrix)

[2.4899844588770113, 3.4286096052067627, 2.0]