<a href="https://colab.research.google.com/github/jayarnim/RS/blob/main/CollaborativeFiltering/(1)_User_Based_Collaborative_Filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm
import numpy as np
import pandas as pd

In [None]:
path_links = "https://raw.githubusercontent.com/jayarnim/RS/main/data/links.csv"
path_movies = "https://raw.githubusercontent.com/jayarnim/RS/main/data/movies.csv"
path_ratings = "https://raw.githubusercontent.com/jayarnim/RS/main/data/ratings.csv"
path_tags = "https://raw.githubusercontent.com/jayarnim/RS/main/data/tags.csv"

In [None]:
links = pd.read_csv(path_links)
movies = pd.read_csv(path_movies)
ratings = pd.read_csv(path_ratings)
tags = pd.read_csv(path_tags)

In [None]:
# User-Item Matrix 생성
user_item_matrix = ratings.pivot_table('rating', index = 'userId', columns = 'movieId')

# 오프라인

In [None]:
# User Mean Ratings
mean_ratings = user_item_matrix.mean(axis = 0)

In [None]:
def calculate_cosine_similarity(row1, row2):
    # 공통 아이템만 추출
    common_columns = ~np.isnan(row1) & ~np.isnan(row2)

    if not common_columns.any(): return 0

    else:
        vector1 = row1[common_columns].values.reshape(1, -1)
        vector2 = row2[common_columns].values.reshape(1, -1)

    return cosine_similarity(vector1, vector2)[0][0]

In [None]:
# 각 사용자 벡터쌍에 대한 코사인 유사도 계산
n_rows = user_item_matrix.shape[0]
similarity_matrix = np.zeros((n_rows, n_rows))

for i in tqdm(range(n_rows)):
    for j in range(n_rows):
        if i != j: similarity_matrix[i, j] = calculate_cosine_similarity(user_item_matrix.iloc[i], user_item_matrix.iloc[j])
        else: similarity_matrix[i, j] = 1

100%|██████████| 671/671 [09:02<00:00,  1.24it/s]


# 온라인

In [None]:
def predict_rating(R, user_index, item_index, user_similarity):
    # 타깃 아이템에 평점을 준 사용자만 필터링
    peer_group = np.where(R[:, item_index] > 0)[0]
    peer_group = list(peer_group)

    # 타깃 사용자와 피어 그룹 사이의 코사인 유사도
    similarities = user_similarity[user_index, peer_group]

    # 타깃 아이템에 대한 피어 그룹의 평점
    ratings = R[peer_group, item_index]

    # 코사인 유사도에 따른 가중치가 적용된 평균 평점 계산
    numerator = np.dot(similarities, ratings)
    denominator = np.sum(np.abs(similarities))

    if denominator == 0: return mean_ratings.values[user_index]
    else: return mean_ratings.values[user_index] + (numerator / denominator)

In [None]:
# 결측치 0으로 채우기
user_item_matrix = user_item_matrix.fillna(0)

In [None]:
# 원본 데이터 복사
predicted_matrix = np.copy(user_item_matrix)

In [None]:
for user_index in tqdm(range(predicted_matrix.shape[0])):
    for item_index in range(predicted_matrix.shape[1]):
        if predicted_matrix[user_index, item_index] == 0:
            predicted_matrix[user_index, item_index] = predict_rating(predicted_matrix, user_index, item_index, similarity_matrix)

100%|██████████| 671/671 [09:21<00:00,  1.19it/s]


In [None]:
predicted_df = pd.DataFrame(predicted_matrix, index = user_item_matrix.index, columns = user_item_matrix.columns)
predicted_df

movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,7.704260,7.155442,6.812989,6.227821,7.011640,7.751662,7.053090,6.840031,7.036424,7.276363,...,6.372470,4.372470,6.872470,4.872470,5.372470,8.872470,8.372470,8.872470,6.872470,8.872470
2,7.295730,6.794290,6.559278,5.783348,6.685935,7.274455,6.705744,7.188807,6.450786,4.000000,...,5.901869,3.901869,6.401869,4.401869,4.901869,8.401869,7.901869,8.401869,6.401869,8.401869
3,7.055094,6.585616,6.375889,5.756603,6.451532,7.070545,6.560783,7.375458,6.428775,6.618888,...,7.378818,5.435727,7.863820,5.862063,6.362063,9.847588,9.363820,9.846812,7.846812,9.878818
4,6.307194,5.881902,5.702459,5.448078,5.836708,6.365616,5.863040,7.427691,5.970623,4.000000,...,7.943758,5.948541,8.406002,6.406271,6.906271,10.388467,9.906002,10.424792,8.424792,10.443758
5,7.200961,6.796946,4.000000,6.488780,6.756524,7.274412,6.790036,8.636174,6.973699,6.782333,...,9.305648,7.375435,9.787558,7.782814,8.282814,11.754248,11.287558,11.783442,9.783442,11.805648
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
667,9.598952,11.813801,13.709955,17.974026,14.033010,4.000000,14.282695,22.149661,17.546008,11.541880,...,24.801839,22.926676,25.250115,23.256237,23.756237,27.225115,26.750115,27.374258,25.374258,27.301839
668,9.910945,12.149552,14.092179,18.389214,14.433327,12.904410,14.663934,22.524198,17.912816,11.857119,...,25.154520,23.281753,25.604859,23.609720,24.109720,27.579902,27.104859,27.766880,25.766880,27.654520
669,9.588163,11.733691,13.732483,17.922080,14.008277,12.562758,14.294142,22.065419,17.502829,11.508157,...,24.686957,22.811282,25.135158,23.140026,23.640026,27.111768,26.635158,27.255268,25.255268,27.186957
670,4.000000,14.227287,16.105708,20.352725,16.430903,14.961960,16.669082,24.520690,19.927887,13.981626,...,27.159765,25.285093,27.609571,25.615751,26.115751,29.584024,29.109571,29.732126,27.732126,29.659765


# 사용자 추천

In [None]:
def recommenders(user_id, K, user_item_matrix=user_item_matrix, predicted_df=predicted_df):
    target_user_vector = user_item_matrix.iloc[[user_id]]
    target_items = target_user_vector[target_user_vector == 0].columns
    sorted_items = predicted_df.loc[user_id, target_items].sort_values(ascending=False)
    top_k_items = movies[movies["movieId"].isin(sorted_items[:K].index)]["title"]
    return top_k_items

In [None]:
recommenders(10, 10)

687     Day the Sun Turned Cold, The (Tianguo niezi) (...
705        Supercop 2 (Project S) (Chao ji ji hua) (1993)
2064                                         Tango (1998)
2308              Armour of God (Long xiong hu di) (1987)
3416                            King Is Alive, The (2000)
3506                              Last Dragon, The (1985)
3595                                American Ninja (1985)
3597                  American Ninja 3: Blood Hunt (1989)
3697           Battle Creek Brawl (Big Brawl, The) (1980)
8485    Am Ende eiens viel zu kurzen Tages (Death of a...
Name: title, dtype: object