In [1]:
import pandas as pd
from src.utils import train_test_split
from src.models.item_collaborative_filtering import ItemCollaborativeFiltering
from src.metrics import map_score, mrr_score, ndcg_score, rmse_score
from tqdm import tqdm
from src.utils import RatingMatrix

In [2]:
ratings = pd.read_table("../data/ratings.dat", sep="::", names=['UserID', 'MovieID', 'Rating', 'Timestamp'], engine='python')
ratings['Timestamp'] = pd.to_datetime(ratings['Timestamp'], unit='s')

In [3]:
ratings1 = RatingMatrix(ratings[ratings.UserID <= 1000].pivot(index='MovieID', columns='UserID', values='Rating'))

In [4]:
filtering = ItemCollaborativeFiltering()
filtering.fit(ratings1)

In [5]:
predicted_ratings = RatingMatrix(pd.DataFrame(index=ratings1.get_rating_matrix().index, columns=ratings1.get_rating_matrix().columns))
for user_id in tqdm(ratings1.get_rating_matrix().columns):
    for movie_id in ratings1.get_rating_matrix().index:
        if pd.isna(ratings1.get_rating(user_id, movie_id)):
            continue
        predicted_rating = filtering.predict(user_id, movie_id)
        predicted_ratings.matrix.loc[movie_id, user_id] = predicted_rating

100%|██████████| 1000/1000 [16:20<00:00,  1.02it/s]


In [None]:
make_binary_matrix()

In [6]:
map_result = map_score(ratings1, predicted_ratings)
mrr_result = mrr_score(ratings1, predicted_ratings)
ndcg_result = ndcg_score(ratings1, predicted_ratings)
rmse_result = rmse_score(ratings1, predicted_ratings)

In [7]:
print(f"MAP: {map_result}")
print(f"MRR: {mrr_result}")
print(f"NDCG: {ndcg_result}")
print(f"RMSE: {rmse_result}")

MAP: 0.186777857142857
MRR: 0.481240873015873
NDCG: 0.31036263285669474
RMSE: 336.0719909314065
