# User-Based Callobarative Filtering

#### User-Item Matrix

In [1]:
import pandas as pd
import sys

sys.path.append('..')
from src.data_preprocessing import *
from src.utils import eval_model

ratings_df = pd.read_csv('../data/ratings.csv')
movies_df = pd.read_csv('../data/movies.csv')

rating_matrix, user_map, movie_map, user_ids, movie_ids = create_rating_matrix(ratings_df)

print(rating_matrix.toarray())

[[4.  4.  4.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 ...
 [2.5 2.  0.  ... 0.  0.  0. ]
 [3.  0.  0.  ... 0.  0.  0. ]
 [5.  0.  5.  ... 3.  3.5 3.5]]


Train test split

In [2]:
train_matrix, test_matrix = train_test_split_by_user(rating_matrix)

No feature-tuning UBCF model results

In [3]:
from src.models import UBCF

model = UBCF()

model.fit(train_matrix)

metrcis, _, _ = eval_model(model, test_matrix)

print(metrcis)

{'rmse': 1.1691448849703214, 'mae': 0.8420452434016349}


In [4]:
models = [
    UBCF(metric='pearson', k_similar=10),
    UBCF(metric='cosine', k_similar=10),
    UBCF(metric='pearson', k_similar=100),
    UBCF(metric='cosine', k_similar=100),
]

for model in models:
    model.fit(train_matrix)
    metrics, _, _ = eval_model(model, test_matrix)
    print(f"Params: {model.k_similar}, {model.metric} -> [RMSE: {metrics['rmse']}, MAE: {metrcis['mae']}]")

Params: 10, pearson -> [RMSE: 3.653540975464964, MAE: 0.8420452434016349]
Params: 10, cosine -> [RMSE: 1.1755759404378558, MAE: 0.8420452434016349]
Params: 100, pearson -> [RMSE: 3.653540975464964, MAE: 0.8420452434016349]
Params: 100, cosine -> [RMSE: 1.169805999915558, MAE: 0.8420452434016349]
