In [1]:
import pandas as pd

from src.metrics import map_score, mrr_score, ndcg_score, rmse_score
from src.models.content_based_filtering import ContentBasedFilteringRecommender
from src.utils import train_test_split, to_user_movie_matrix, make_binary_matrix

In [2]:
users = pd.read_table("../data/users.dat", sep="::", names=['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code'], engine='python')

In [3]:
movies = pd.read_table("../data/movies.dat", sep="::", names=['MovieID', 'Title', 'Genres'], engine='python', encoding='latin1')

In [4]:
ratings = pd.read_table("../data/ratings.dat", sep="::", names=['UserID', 'MovieID', 'Rating', 'Timestamp'], engine='python')
ratings['Timestamp'] = pd.to_datetime(ratings['Timestamp'], unit='s')

train_ratings, test_ratings = train_test_split(ratings, 'Timestamp')
user_movie_train = to_user_movie_matrix(train_ratings)
user_movie_test = to_user_movie_matrix(test_ratings)

In [5]:
model = ContentBasedFilteringRecommender()
model.train(user_movie_train, movies, 'tfidf')

y_pred = model.predict(make_binary_matrix(user_movie_test.get_rating_matrix()), user_movie_test.get_users(), user_movie_test.get_movies())

In [6]:
from src.utils import RatingMatrix

temp_users = y_pred.get_rating_matrix().columns
temp_movies = y_pred.get_rating_matrix().index
final_test = RatingMatrix(user_movie_test.get_rating_matrix().loc[temp_movies][temp_users])

In [7]:
map_score_value = map_score(final_test, y_pred, top=10)
mrr_score_value = mrr_score(final_test, y_pred, top=10)
ndcg_score_value = ndcg_score(final_test, y_pred, top=10)
rmse_score_value = rmse_score(final_test, y_pred)

print(f'Baseline MAP: {map_score_value}')
print(f'Baseline MRR: {mrr_score_value}')
print(f'Baseline NDCG: {ndcg_score_value}')
print(f'Baseline RMSE: {rmse_score_value}')

Baseline MAP: 0.15553108303108282
Baseline MRR: 0.377042042042042
Baseline NDCG: 0.20987471988425865
Baseline RMSE: 357.4582501464409
