In [None]:
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy
import pandas as pd


In [None]:
ratings = pd.read_csv('/content/data/processed/clean_ratings.csv')


In [None]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)


In [None]:
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)


In [None]:
sim_options = {
    'name': 'cosine',
    'user_based': False
}

model = KNNBasic(sim_options=sim_options)
model.fit(trainset)


In [None]:
predictions = model.test(testset)
rmse = accuracy.rmse(predictions)


In [None]:
from collections import defaultdict

def get_top_n_recommendations(predictions, n=10):
    top_n = defaultdict(list)

    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

top_n = get_top_n_recommendations(predictions, n=5)

user_id = str(1)
top_n[user_id]


In [None]:
movies = pd.read_csv('/content/data/movies.csv')
recommended_ids = [int(movie_id) for movie_id, _ in top_n[user_id]]
movies[movies['movieId'].isin(recommended_ids)][['movieId', 'title']]
