In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import sys

# add root folder to path
folder = "../../"
sys.path.append(folder)
from src.utils import load_data, evaluate_model

users,ratings,movies = load_data('../../data/movies.csv')

# Split data into train and test by timestamp
ratings['Timestamp'] = pd.to_datetime(ratings['Timestamp'], unit='s')
split_point = int(len(ratings) * 0.8)
sorted_ratings = ratings.sort_values(by='Timestamp')
train_ratings = sorted_ratings[:split_point]
test_ratings = sorted_ratings[split_point:]

user_item_matrix = train_ratings.pivot(index='UserID', columns='MovieID', values='Rating')
user_item_matrix = user_item_matrix.fillna(0)

user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

def get_user_recommendations(user_id, user_item_matrix, user_similarity_df, n_recommendations=10):
    if user_id not in user_similarity_df.index:
        return []

    sim_scores = user_similarity_df[user_id]
    sim_users = sim_scores.sort_values(ascending=False).index[1:]
    user_ratings = user_item_matrix.loc[user_id]
    weighted_ratings = pd.Series(0, index=user_item_matrix.columns)

    sim_sum = 0
    for sim_user in sim_users:
        sim_score = sim_scores[sim_user]
        sim_ratings = user_item_matrix.loc[sim_user]
        weighted_ratings += sim_ratings * sim_score
        sim_sum += sim_score
        if sim_sum > 1:
            break

    weighted_ratings = weighted_ratings / sim_sum
    weighted_ratings = weighted_ratings[user_ratings == 0]
    recommendations = weighted_ratings.sort_values(ascending=False).head(n_recommendations)
    recommended_movie_ids = recommendations.index

    return recommended_movie_ids

user_id = test_ratings['UserID'].iloc[0]
recommended_movies = get_user_recommendations(user_id, user_item_matrix, user_similarity_df)
print(movies[movies['MovieID'].isin(recommended_movies)][['Title', 'Genres']])

precision, recall, f1 = evaluate_model(test_ratings, user_item_matrix, user_similarity_df, get_user_recommendations)
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')




ModuleNotFoundError: No module named 'src'