In [1]:
import pandas as pd
from src.utils import train_test_split
from src.models.user_collaborative_filtering import UserCollaborativeFiltering
from src.metrics import map_score, mrr_score, ndcg_score, rmse_score
from tqdm import tqdm
from src.utils import RatingMatrix

In [2]:
ratings = pd.read_table("../data/ratings.dat", sep="::", names=['UserID', 'MovieID', 'Rating', 'Timestamp'], engine='python')
ratings['Timestamp'] = pd.to_datetime(ratings['Timestamp'], unit='s')

In [3]:
ratings.head()

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,1,1193,5,2000-12-31 22:12:40
1,1,661,3,2000-12-31 22:35:09
2,1,914,3,2000-12-31 22:32:48
3,1,3408,4,2000-12-31 22:04:35
4,1,2355,5,2001-01-06 23:38:11


In [4]:
print(ratings.UserID.unique().shape[0], ratings.MovieID.unique().shape[0])

6040 3706


In [5]:
train_ratings, test_ratings = train_test_split(ratings, 'Timestamp')

In [6]:
train_ratings.get_user_ratings(1000)

MovieID
1       5.0
2       NaN
3       NaN
4       NaN
5       NaN
       ... 
3948    NaN
3949    NaN
3950    NaN
3951    NaN
3952    NaN
Name: 1000, Length: 3651, dtype: float64

In [17]:
ratings[ratings.UserID <= 100]

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,1,1193,5,2000-12-31 22:12:40
1,1,661,3,2000-12-31 22:35:09
2,1,914,3,2000-12-31 22:32:48
3,1,3408,4,2000-12-31 22:04:35
4,1,2355,5,2001-01-06 23:38:11
...,...,...,...,...
12971,100,1221,3,2000-12-23 17:52:30
12972,100,2028,4,2000-12-23 17:53:08
12973,100,480,3,2000-12-23 17:54:52
12974,100,1304,3,2000-12-23 17:53:08


In [23]:
ratings1 = RatingMatrix(ratings[ratings.UserID <= 1000].pivot(index='MovieID', columns='UserID', values='Rating'))

In [24]:
filtering = UserCollaborativeFiltering()
filtering.fit(ratings1)

In [26]:
predicted_ratings = RatingMatrix(pd.DataFrame(index=ratings1.get_rating_matrix().index, columns=ratings1.get_rating_matrix().columns))
for user_id in tqdm(ratings1.get_rating_matrix().columns):
    for movie_id in ratings1.get_rating_matrix().index:
        if pd.isna(ratings1.get_rating(user_id, movie_id)):
            continue
        predicted_rating = filtering.predict(user_id, movie_id)
        predicted_ratings.matrix.loc[movie_id, user_id] = predicted_rating

100%|██████████| 1000/1000 [27:17<00:00,  1.64s/it]


In [27]:
map_result = map_score(ratings1, predicted_ratings)
mrr_result = mrr_score(ratings1, predicted_ratings)
ndcg_result = ndcg_score(ratings1, predicted_ratings)
rmse_result = rmse_score(ratings1, predicted_ratings)

In [28]:
print(f"MAP: {map_result}")
print(f"MRR: {mrr_result}")
print(f"NDCG: {ndcg_result}")
print(f"RMSE: {rmse_result}")

MAP: 0.18885749999999984
MRR: 0.48265357142857146
NDCG: 0.3128752801938456
RMSE: 336.2590067201386
