In [1]:
import pandas as pd
import time
import numpy as np
from scipy import sparse
import warnings
from implicit_markov_chain import ImplicitMarkovChain
from model_helper import save_model
warnings.filterwarnings('ignore')

# Load the data
print("Loading MovieLens 20M data...")
# selected_users = pd.read_csv('data/users_selection.csv')
ratings = pd.read_csv('data/filtered_high_ratings_20251114_120514.csv')

# print(f"Movies: {movies.shape}")
print(f"Ratings: {ratings.shape}")
print("\nFirst few rows of ratings:")
print(ratings.head())
# print("\nRating distribution:")
# print(ratings['rating'].value_counts().sort_index())

Loading MovieLens 20M data...
Ratings: (10246362, 4)

First few rows of ratings:
   userId  movieId  rating   timestamp
0       1        2     3.5  1112486027
1       1       29     3.5  1112484676
2       1       32     3.5  1112484819
3       1       47     3.5  1112484727
4       1       50     3.5  1112484580


# Loop

In [29]:
leave_k_out = 100
n_users = 1000

all_users = ratings['userId'].unique()

iterations = 100
max_recall, max_prec = 0, 0

for iter in range(iterations):
    print(f"{iter+1}/{iterations}")
    selected_user_ids = np.random.choice(all_users, size=n_users, replace=False)

    # print(f"Selected {len(selected_user_ids)} unique users")

    filtered_ratings = ratings[ratings['userId'].isin(selected_user_ids)]
    # print(f"Original ratings shape: {ratings.shape}")
    # print(f"Filtered ratings shape: {filtered_ratings.shape}")

    filtered_ratings_sorted = filtered_ratings.sort_values(['userId', 'timestamp'])

    train_ratings = filtered_ratings_sorted.groupby('userId').apply(lambda x: x.iloc[:-leave_k_out] if len(x) > leave_k_out else pd.DataFrame()).reset_index(drop=True)
    test_ratings = filtered_ratings_sorted.groupby('userId').tail(leave_k_out)

    print(f"Training set: {train_ratings.shape[0]:,} ratings")
    print(f"Test set: {test_ratings.shape[0]:,} ratings (Leave-{leave_k_out}-out)")

    print("Training implicit feedback Markov chain...")
    model = ImplicitMarkovChain(alpha=2.0)
    start = time.time()
    model.build_transition_matrix_efficient(train_ratings, sample_fraction=1)
    print(f"{time.time() - start:.4f} seconds")

    K = 50
    total_recall = 0
    users_not_considered = 0
    recalls = []
    total_precision = 0
    user_recommendations = {}
    train_ratings_sorted = train_ratings.sort_values(['userId', 'timestamp'])

    for user_id in selected_user_ids:
        user_data = train_ratings_sorted[train_ratings_sorted['userId'] == user_id]
        user_training_samples = len(user_data)
        
        # Only consider last 5 samples
        recent_movies = user_data['movieId'].tail(1).tolist()
        
        candidate_scores = {}
        
        # Process only the last 5 movies
        for movie_id in recent_movies:
            probabilities = model.transition_matrix[model.movie_to_idx[movie_id]]
            top_indices = np.argsort(probabilities)[-(K):][::-1]
            
            for i in top_indices:
                candidate_movie_id = model.idx_to_movie[i]
                candidate_prob = probabilities[i]
                
                # Skip if user has already rated this movie
                if candidate_movie_id in user_data['movieId'].values:
                    continue
                    
                # Keep the highest probability for each movie
                if candidate_movie_id not in candidate_scores:
                    candidate_scores[candidate_movie_id] = candidate_prob
                elif candidate_prob > candidate_scores[candidate_movie_id]:
                    candidate_scores[candidate_movie_id] = candidate_prob
        
        # Get top K movies from candidate scores
        top_movies = sorted(candidate_scores.items(), key=lambda x: x[1], reverse=True)[:K]
        
        if len(top_movies) < K:
            users_not_considered += 1
            # print(f"userid: {user_id} - Insufficient recommendations: {len(top_movies)}/{K} - Skipping")
            continue
            
        candidate_movies = [movie_id for movie_id, score in top_movies]
        recommended_movies = set(candidate_movies)
        user_test_ratings = test_ratings[test_ratings['userId'] == user_id]
        relevant_movies = set(user_test_ratings['movieId'].tolist())
        
        # Validation
        hits = relevant_movies.intersection(recommended_movies)
        
        user_recall = 0
        if len(relevant_movies) > 0:
            user_recall = len(hits) / len(relevant_movies)
            total_recall += user_recall    
        recalls.append(user_recall)
        
        if len(recommended_movies) > 0:
            user_precision = len(hits) / len(recommended_movies)
            total_precision += user_precision

        # print(f"userid: {user_id} - recent_movies: {len(recent_movies)} - recommendations: {len(top_movies)} - test_data: {len(relevant_movies)} - hits: {len(hits)} - recall: {user_recall:.4f}")

    # print(f"total users: {len(selected_user_ids)} - users not consid. {users_not_considered} - total_recall: {total_recall}")

    if (len(selected_user_ids) - users_not_considered) > 0:
        avg_recall = total_recall / (len(selected_user_ids) - users_not_considered)
        avg_precision = total_precision / (len(selected_user_ids) - users_not_considered)
    else:
        avg_recall = 0
        avg_precision = 0

    max_recall = max(max_recall, avg_recall)
    max_prec = max(max_prec, avg_precision)
    print(f"Recall@{K}: {avg_recall:.4f} - max: {max_recall:.4f}")
    print(f"Precision@{K}: {avg_precision:.4f} - max: {max_prec:.4f}")


1/100
Training set: 57,449 ratings
Test set: 76,045 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 423


100%|██████████| 423/423 [00:14<00:00, 29.41it/s]


16.1827 seconds
Recall@50: 0.1778 - max: 0.1778
Precision@50: 0.3557 - max: 0.3557
2/100
Training set: 59,767 ratings
Test set: 77,501 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 441


100%|██████████| 441/441 [00:15<00:00, 29.23it/s]


17.1506 seconds
Recall@50: 0.1700 - max: 0.1778
Precision@50: 0.3400 - max: 0.3557
3/100
Training set: 47,312 ratings
Test set: 74,921 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 406


100%|██████████| 406/406 [00:08<00:00, 48.55it/s]


9.6433 seconds
Recall@50: 0.1732 - max: 0.1778
Precision@50: 0.3464 - max: 0.3557
4/100
Training set: 50,344 ratings
Test set: 76,194 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 407


100%|██████████| 407/407 [00:10<00:00, 40.66it/s]


11.3961 seconds
Recall@50: 0.1819 - max: 0.1819
Precision@50: 0.3638 - max: 0.3638
5/100
Training set: 57,209 ratings
Test set: 76,266 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 433


100%|██████████| 433/433 [00:13<00:00, 32.90it/s]


14.8228 seconds
Recall@50: 0.1650 - max: 0.1819
Precision@50: 0.3300 - max: 0.3638
6/100
Training set: 51,502 ratings
Test set: 75,785 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 421


100%|██████████| 421/421 [00:12<00:00, 34.00it/s]


14.0204 seconds
Recall@50: 0.1788 - max: 0.1819
Precision@50: 0.3576 - max: 0.3638
7/100
Training set: 48,818 ratings
Test set: 75,346 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 405


100%|██████████| 405/405 [00:09<00:00, 44.34it/s]


10.5320 seconds
Recall@50: 0.1700 - max: 0.1819
Precision@50: 0.3400 - max: 0.3638
8/100
Training set: 53,814 ratings
Test set: 75,236 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 393


100%|██████████| 393/393 [00:14<00:00, 26.55it/s]


16.7859 seconds
Recall@50: 0.1729 - max: 0.1819
Precision@50: 0.3458 - max: 0.3638
9/100
Training set: 47,724 ratings
Test set: 74,825 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 400


100%|██████████| 400/400 [00:10<00:00, 38.39it/s]


11.8348 seconds
Recall@50: 0.1691 - max: 0.1819
Precision@50: 0.3383 - max: 0.3638
10/100
Training set: 52,591 ratings
Test set: 75,769 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 408


100%|██████████| 408/408 [00:13<00:00, 31.20it/s]


14.6128 seconds
Recall@50: 0.1707 - max: 0.1819
Precision@50: 0.3413 - max: 0.3638
11/100
Training set: 52,100 ratings
Test set: 75,848 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 421


100%|██████████| 421/421 [00:11<00:00, 35.55it/s]


13.3929 seconds
Recall@50: 0.1759 - max: 0.1819
Precision@50: 0.3519 - max: 0.3638
12/100
Training set: 57,390 ratings
Test set: 77,299 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 432


100%|██████████| 432/432 [00:15<00:00, 28.31it/s]


17.5385 seconds
Recall@50: 0.1782 - max: 0.1819
Precision@50: 0.3565 - max: 0.3638
13/100
Training set: 48,405 ratings
Test set: 74,427 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 401


100%|██████████| 401/401 [00:09<00:00, 42.43it/s]


10.7889 seconds
Recall@50: 0.1784 - max: 0.1819
Precision@50: 0.3568 - max: 0.3638
14/100
Training set: 48,382 ratings
Test set: 75,465 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 412


100%|██████████| 412/412 [00:10<00:00, 40.22it/s]


11.8525 seconds
Recall@50: 0.1844 - max: 0.1844
Precision@50: 0.3689 - max: 0.3689
15/100
Training set: 53,679 ratings
Test set: 76,168 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 427


100%|██████████| 427/427 [00:12<00:00, 35.28it/s]


13.6188 seconds
Recall@50: 0.1456 - max: 0.1844
Precision@50: 0.2913 - max: 0.3689
16/100
Training set: 50,431 ratings
Test set: 75,415 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 404


100%|██████████| 404/404 [00:12<00:00, 33.31it/s]


13.7462 seconds
Recall@50: 0.1791 - max: 0.1844
Precision@50: 0.3582 - max: 0.3689
17/100
Training set: 56,822 ratings
Test set: 76,042 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 420


100%|██████████| 420/420 [00:13<00:00, 31.60it/s]


14.7588 seconds
Recall@50: 0.1833 - max: 0.1844
Precision@50: 0.3667 - max: 0.3689
18/100
Training set: 51,098 ratings
Test set: 75,701 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 411


100%|██████████| 411/411 [00:09<00:00, 42.48it/s]


11.0839 seconds
Recall@50: 0.1636 - max: 0.1844
Precision@50: 0.3271 - max: 0.3689
19/100
Training set: 61,179 ratings
Test set: 75,635 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 427


100%|██████████| 427/427 [00:12<00:00, 33.09it/s]


14.4383 seconds
Recall@50: 0.1565 - max: 0.1844
Precision@50: 0.3130 - max: 0.3689
20/100
Training set: 56,352 ratings
Test set: 75,509 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 423


100%|██████████| 423/423 [00:12<00:00, 33.68it/s]


14.2616 seconds
Recall@50: 0.1576 - max: 0.1844
Precision@50: 0.3152 - max: 0.3689
21/100
Training set: 52,171 ratings
Test set: 75,985 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 416


100%|██████████| 416/416 [00:10<00:00, 38.64it/s]


12.1088 seconds
Recall@50: 0.1589 - max: 0.1844
Precision@50: 0.3179 - max: 0.3689
22/100
Training set: 57,191 ratings
Test set: 77,480 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 443


100%|██████████| 443/443 [00:11<00:00, 40.17it/s]


12.5872 seconds
Recall@50: 0.1629 - max: 0.1844
Precision@50: 0.3257 - max: 0.3689
23/100
Training set: 60,024 ratings
Test set: 77,289 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 459


100%|██████████| 459/459 [00:11<00:00, 40.61it/s]


13.0705 seconds
Recall@50: 0.1636 - max: 0.1844
Precision@50: 0.3273 - max: 0.3689
24/100
Training set: 55,126 ratings
Test set: 75,597 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 404


100%|██████████| 404/404 [00:11<00:00, 36.19it/s]


12.6550 seconds
Recall@50: 0.2043 - max: 0.2043
Precision@50: 0.4086 - max: 0.4086
25/100
Training set: 50,665 ratings
Test set: 74,967 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 399


100%|██████████| 399/399 [00:09<00:00, 40.75it/s]


11.0885 seconds
Recall@50: 0.1442 - max: 0.2043
Precision@50: 0.2884 - max: 0.4086
26/100
Training set: 55,522 ratings
Test set: 76,115 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 415


100%|██████████| 415/415 [00:12<00:00, 32.36it/s]


14.3302 seconds
Recall@50: 0.1738 - max: 0.2043
Precision@50: 0.3476 - max: 0.4086
27/100
Training set: 55,073 ratings
Test set: 76,219 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 430


100%|██████████| 430/430 [00:10<00:00, 42.43it/s]


11.4285 seconds
Recall@50: 0.1792 - max: 0.2043
Precision@50: 0.3583 - max: 0.4086
28/100
Training set: 57,492 ratings
Test set: 76,733 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 427


100%|██████████| 427/427 [00:12<00:00, 33.21it/s]


14.4041 seconds
Recall@50: 0.1826 - max: 0.2043
Precision@50: 0.3653 - max: 0.4086
29/100
Training set: 55,130 ratings
Test set: 75,574 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 415


100%|██████████| 415/415 [00:12<00:00, 33.45it/s]


14.0901 seconds
Recall@50: 0.2115 - max: 0.2115
Precision@50: 0.4230 - max: 0.4230
30/100
Training set: 56,926 ratings
Test set: 76,038 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 411


100%|██████████| 411/411 [00:12<00:00, 32.99it/s]


14.2174 seconds
Recall@50: 0.1675 - max: 0.2115
Precision@50: 0.3350 - max: 0.4230
31/100
Training set: 48,009 ratings
Test set: 75,843 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 400


100%|██████████| 400/400 [00:07<00:00, 52.91it/s]


8.6394 seconds
Recall@50: 0.1518 - max: 0.2115
Precision@50: 0.3035 - max: 0.4230
32/100
Training set: 56,599 ratings
Test set: 76,806 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 433


100%|██████████| 433/433 [00:11<00:00, 38.42it/s]


12.8686 seconds
Recall@50: 0.1923 - max: 0.2115
Precision@50: 0.3845 - max: 0.4230
33/100
Training set: 53,814 ratings
Test set: 75,561 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 408


100%|██████████| 408/408 [00:10<00:00, 39.12it/s]


11.7209 seconds
Recall@50: 0.1669 - max: 0.2115
Precision@50: 0.3338 - max: 0.4230
34/100
Training set: 49,175 ratings
Test set: 75,780 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 408


100%|██████████| 408/408 [00:08<00:00, 46.10it/s]


10.1653 seconds
Recall@50: 0.1964 - max: 0.2115
Precision@50: 0.3927 - max: 0.4230
35/100
Training set: 53,419 ratings
Test set: 77,670 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 441


100%|██████████| 441/441 [00:09<00:00, 44.59it/s]


11.2372 seconds
Recall@50: 0.1696 - max: 0.2115
Precision@50: 0.3392 - max: 0.4230
36/100
Training set: 55,582 ratings
Test set: 76,028 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 409


100%|██████████| 409/409 [00:11<00:00, 34.98it/s]


13.4628 seconds
Recall@50: 0.1812 - max: 0.2115
Precision@50: 0.3624 - max: 0.4230
37/100
Training set: 52,392 ratings
Test set: 75,926 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 412


100%|██████████| 412/412 [00:11<00:00, 36.06it/s]


13.0925 seconds
Recall@50: 0.1576 - max: 0.2115
Precision@50: 0.3152 - max: 0.4230
38/100
Training set: 55,625 ratings
Test set: 77,494 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 451


100%|██████████| 451/451 [00:10<00:00, 42.94it/s]


12.0730 seconds
Recall@50: 0.1713 - max: 0.2115
Precision@50: 0.3426 - max: 0.4230
39/100
Training set: 52,319 ratings
Test set: 75,679 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 400


100%|██████████| 400/400 [00:13<00:00, 29.18it/s]


15.5462 seconds
Recall@50: 0.1521 - max: 0.2115
Precision@50: 0.3042 - max: 0.4230
40/100
Training set: 52,273 ratings
Test set: 76,071 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 412


100%|██████████| 412/412 [00:11<00:00, 35.17it/s]


13.0369 seconds
Recall@50: 0.1771 - max: 0.2115
Precision@50: 0.3542 - max: 0.4230
41/100
Training set: 53,018 ratings
Test set: 76,366 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 425


100%|██████████| 425/425 [00:10<00:00, 42.32it/s] 


11.4230 seconds
Recall@50: 0.1624 - max: 0.2115
Precision@50: 0.3248 - max: 0.4230
42/100
Training set: 57,738 ratings
Test set: 77,341 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 442


100%|██████████| 442/442 [00:10<00:00, 40.73it/s]


12.2113 seconds
Recall@50: 0.1653 - max: 0.2115
Precision@50: 0.3306 - max: 0.4230
43/100
Training set: 61,433 ratings
Test set: 75,699 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 421


100%|██████████| 421/421 [00:12<00:00, 33.97it/s]


13.9405 seconds
Recall@50: 0.1569 - max: 0.2115
Precision@50: 0.3138 - max: 0.4230
44/100
Training set: 53,112 ratings
Test set: 76,407 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 418


100%|██████████| 418/418 [00:13<00:00, 30.30it/s]


15.0613 seconds
Recall@50: 0.1642 - max: 0.2115
Precision@50: 0.3284 - max: 0.4230
45/100
Training set: 58,965 ratings
Test set: 76,358 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 418


100%|██████████| 418/418 [00:12<00:00, 32.40it/s]


14.3972 seconds
Recall@50: 0.1563 - max: 0.2115
Precision@50: 0.3126 - max: 0.4230
46/100
Training set: 58,342 ratings
Test set: 76,703 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 437


100%|██████████| 437/437 [00:11<00:00, 38.75it/s]


12.7166 seconds
Recall@50: 0.1950 - max: 0.2115
Precision@50: 0.3900 - max: 0.4230
47/100
Training set: 54,629 ratings
Test set: 76,678 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 422


100%|██████████| 422/422 [00:09<00:00, 44.52it/s]


11.0729 seconds
Recall@50: 0.1921 - max: 0.2115
Precision@50: 0.3842 - max: 0.4230
48/100
Training set: 54,388 ratings
Test set: 75,945 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 429


100%|██████████| 429/429 [00:10<00:00, 42.42it/s]


11.5801 seconds
Recall@50: 0.1586 - max: 0.2115
Precision@50: 0.3171 - max: 0.4230
49/100
Training set: 55,299 ratings
Test set: 75,952 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 417


100%|██████████| 417/417 [00:11<00:00, 36.17it/s]


13.4178 seconds
Recall@50: 0.1506 - max: 0.2115
Precision@50: 0.3013 - max: 0.4230
50/100
Training set: 50,520 ratings
Test set: 76,081 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 421


100%|██████████| 421/421 [00:08<00:00, 49.65it/s]


9.7350 seconds
Recall@50: 0.1783 - max: 0.2115
Precision@50: 0.3567 - max: 0.4230
51/100
Training set: 48,933 ratings
Test set: 75,602 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 398


100%|██████████| 398/398 [00:08<00:00, 48.31it/s]


9.4509 seconds
Recall@50: 0.1750 - max: 0.2115
Precision@50: 0.3500 - max: 0.4230
52/100
Training set: 60,293 ratings
Test set: 76,937 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 435


100%|██████████| 435/435 [00:11<00:00, 36.35it/s]


13.5167 seconds
Recall@50: 0.1492 - max: 0.2115
Precision@50: 0.2983 - max: 0.4230
53/100
Training set: 55,823 ratings
Test set: 75,015 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 414


100%|██████████| 414/414 [00:10<00:00, 38.06it/s]


12.2820 seconds
Recall@50: 0.1716 - max: 0.2115
Precision@50: 0.3432 - max: 0.4230
54/100
Training set: 49,043 ratings
Test set: 75,101 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 395


100%|██████████| 395/395 [00:08<00:00, 48.48it/s]


9.3002 seconds
Recall@50: 0.1900 - max: 0.2115
Precision@50: 0.3800 - max: 0.4230
55/100
Training set: 53,870 ratings
Test set: 75,363 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 399


100%|██████████| 399/399 [00:09<00:00, 42.43it/s]


10.6739 seconds
Recall@50: 0.1983 - max: 0.2115
Precision@50: 0.3967 - max: 0.4230
56/100
Training set: 53,102 ratings
Test set: 75,150 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 399


100%|██████████| 399/399 [00:08<00:00, 45.42it/s]


10.1999 seconds
Recall@50: 0.1812 - max: 0.2115
Precision@50: 0.3623 - max: 0.4230
57/100
Training set: 52,052 ratings
Test set: 74,943 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 408


100%|██████████| 408/408 [00:11<00:00, 34.45it/s]


13.4420 seconds
Recall@50: 0.1621 - max: 0.2115
Precision@50: 0.3242 - max: 0.4230
58/100
Training set: 45,622 ratings
Test set: 75,033 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 386


100%|██████████| 386/386 [00:07<00:00, 48.85it/s]


9.1321 seconds
Recall@50: 0.1850 - max: 0.2115
Precision@50: 0.3700 - max: 0.4230
59/100
Training set: 49,652 ratings
Test set: 76,247 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 418


100%|██████████| 418/418 [00:07<00:00, 52.53it/s]


9.0990 seconds
Recall@50: 0.1639 - max: 0.2115
Precision@50: 0.3279 - max: 0.4230
60/100
Training set: 51,354 ratings
Test set: 77,025 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 432


100%|██████████| 432/432 [00:09<00:00, 47.83it/s]


10.2398 seconds
Recall@50: 0.1955 - max: 0.2115
Precision@50: 0.3910 - max: 0.4230
61/100
Training set: 48,632 ratings
Test set: 76,250 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 421


100%|██████████| 421/421 [00:08<00:00, 49.16it/s] 


9.7297 seconds
Recall@50: 0.1592 - max: 0.2115
Precision@50: 0.3184 - max: 0.4230
62/100
Training set: 58,514 ratings
Test set: 75,958 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 411


100%|██████████| 411/411 [00:12<00:00, 33.80it/s]


13.6692 seconds
Recall@50: 0.1821 - max: 0.2115
Precision@50: 0.3642 - max: 0.4230
63/100
Training set: 54,531 ratings
Test set: 76,936 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 431


100%|██████████| 431/431 [00:09<00:00, 45.58it/s]


10.8286 seconds
Recall@50: 0.1896 - max: 0.2115
Precision@50: 0.3791 - max: 0.4230
64/100
Training set: 54,218 ratings
Test set: 76,378 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 438


100%|██████████| 438/438 [00:10<00:00, 42.86it/s]


11.6162 seconds
Recall@50: 0.1893 - max: 0.2115
Precision@50: 0.3786 - max: 0.4230
65/100
Training set: 49,770 ratings
Test set: 75,522 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 401


100%|██████████| 401/401 [00:08<00:00, 45.97it/s]


9.9904 seconds
Recall@50: 0.1891 - max: 0.2115
Precision@50: 0.3782 - max: 0.4230
66/100
Training set: 54,081 ratings
Test set: 76,993 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 435


100%|██████████| 435/435 [00:09<00:00, 45.75it/s]


10.8312 seconds
Recall@50: 0.1920 - max: 0.2115
Precision@50: 0.3840 - max: 0.4230
67/100
Training set: 55,755 ratings
Test set: 75,527 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 406


100%|██████████| 406/406 [00:11<00:00, 35.49it/s]


12.9375 seconds
Recall@50: 0.1578 - max: 0.2115
Precision@50: 0.3156 - max: 0.4230
68/100
Training set: 54,639 ratings
Test set: 76,012 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 417


100%|██████████| 417/417 [00:09<00:00, 42.07it/s]


11.2860 seconds
Recall@50: 0.1563 - max: 0.2115
Precision@50: 0.3127 - max: 0.4230
69/100
Training set: 51,007 ratings
Test set: 76,120 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 403


100%|██████████| 403/403 [00:08<00:00, 45.62it/s]


10.0776 seconds
Recall@50: 0.1606 - max: 0.2115
Precision@50: 0.3211 - max: 0.4230
70/100
Training set: 53,229 ratings
Test set: 75,875 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 413


100%|██████████| 413/413 [00:12<00:00, 32.14it/s]


14.4416 seconds
Recall@50: 0.1859 - max: 0.2115
Precision@50: 0.3719 - max: 0.4230
71/100
Training set: 56,481 ratings
Test set: 74,385 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 400


100%|██████████| 400/400 [00:12<00:00, 31.51it/s]


14.2564 seconds
Recall@50: 0.1640 - max: 0.2115
Precision@50: 0.3280 - max: 0.4230
72/100
Training set: 57,233 ratings
Test set: 77,054 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 429


100%|██████████| 429/429 [00:12<00:00, 34.62it/s]


14.3276 seconds
Recall@50: 0.1895 - max: 0.2115
Precision@50: 0.3789 - max: 0.4230
73/100
Training set: 56,140 ratings
Test set: 77,217 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 430


100%|██████████| 430/430 [00:11<00:00, 37.17it/s]


13.5735 seconds
Recall@50: 0.1527 - max: 0.2115
Precision@50: 0.3055 - max: 0.4230
74/100
Training set: 58,001 ratings
Test set: 75,187 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 408


100%|██████████| 408/408 [00:11<00:00, 36.02it/s]


12.7199 seconds
Recall@50: 0.1725 - max: 0.2115
Precision@50: 0.3450 - max: 0.4230
75/100
Training set: 53,861 ratings
Test set: 75,563 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 422


100%|██████████| 422/422 [00:10<00:00, 41.98it/s]


11.4891 seconds
Recall@50: 0.1489 - max: 0.2115
Precision@50: 0.2978 - max: 0.4230
76/100
Training set: 50,247 ratings
Test set: 76,573 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 409


100%|██████████| 409/409 [00:09<00:00, 45.31it/s]


10.4498 seconds
Recall@50: 0.1648 - max: 0.2115
Precision@50: 0.3296 - max: 0.4230
77/100
Training set: 54,833 ratings
Test set: 76,393 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 419


100%|██████████| 419/419 [00:11<00:00, 35.39it/s]


13.4577 seconds
Recall@50: 0.1786 - max: 0.2115
Precision@50: 0.3571 - max: 0.4230
78/100
Training set: 53,914 ratings
Test set: 74,665 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 396


100%|██████████| 396/396 [00:11<00:00, 34.04it/s]


13.2570 seconds
Recall@50: 0.1885 - max: 0.2115
Precision@50: 0.3770 - max: 0.4230
79/100
Training set: 56,250 ratings
Test set: 77,124 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 444


100%|██████████| 444/444 [00:10<00:00, 43.63it/s]


11.4831 seconds
Recall@50: 0.1647 - max: 0.2115
Precision@50: 0.3294 - max: 0.4230
80/100
Training set: 53,475 ratings
Test set: 75,817 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 413


100%|██████████| 413/413 [00:12<00:00, 32.21it/s]


14.7734 seconds
Recall@50: 0.1611 - max: 0.2115
Precision@50: 0.3221 - max: 0.4230
81/100
Training set: 57,640 ratings
Test set: 76,800 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 427


100%|██████████| 427/427 [00:11<00:00, 36.49it/s]


13.1063 seconds
Recall@50: 0.1550 - max: 0.2115
Precision@50: 0.3100 - max: 0.4230
82/100
Training set: 55,400 ratings
Test set: 76,173 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 415


100%|██████████| 415/415 [00:11<00:00, 37.67it/s]


12.5442 seconds
Recall@50: 0.1773 - max: 0.2115
Precision@50: 0.3545 - max: 0.4230
83/100
Training set: 55,509 ratings
Test set: 77,047 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 443


100%|██████████| 443/443 [00:09<00:00, 45.47it/s]


11.0522 seconds
Recall@50: 0.1668 - max: 0.2115
Precision@50: 0.3336 - max: 0.4230
84/100
Training set: 56,453 ratings
Test set: 76,543 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 451


100%|██████████| 451/451 [00:09<00:00, 48.54it/s]


10.5510 seconds
Recall@50: 0.1712 - max: 0.2115
Precision@50: 0.3423 - max: 0.4230
85/100
Training set: 54,974 ratings
Test set: 75,600 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 408


100%|██████████| 408/408 [00:09<00:00, 41.16it/s]


11.2102 seconds
Recall@50: 0.1629 - max: 0.2115
Precision@50: 0.3257 - max: 0.4230
86/100
Training set: 52,755 ratings
Test set: 77,614 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 437


100%|██████████| 437/437 [00:10<00:00, 43.01it/s]


11.5838 seconds
Recall@50: 0.1555 - max: 0.2115
Precision@50: 0.3109 - max: 0.4230
87/100
Training set: 55,311 ratings
Test set: 75,542 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 404


100%|██████████| 404/404 [00:11<00:00, 35.44it/s]


12.8861 seconds
Recall@50: 0.1743 - max: 0.2115
Precision@50: 0.3486 - max: 0.4230
88/100
Training set: 52,611 ratings
Test set: 75,926 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 404


100%|██████████| 404/404 [00:10<00:00, 36.99it/s]


12.3631 seconds
Recall@50: 0.1575 - max: 0.2115
Precision@50: 0.3150 - max: 0.4230
89/100
Training set: 54,423 ratings
Test set: 76,237 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 417


100%|██████████| 417/417 [00:10<00:00, 40.22it/s]


11.8746 seconds
Recall@50: 0.1480 - max: 0.2115
Precision@50: 0.2960 - max: 0.4230
90/100
Training set: 53,931 ratings
Test set: 76,485 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 431


100%|██████████| 431/431 [00:10<00:00, 41.51it/s]


11.8086 seconds
Recall@50: 0.1700 - max: 0.2115
Precision@50: 0.3400 - max: 0.4230
91/100
Training set: 51,571 ratings
Test set: 75,386 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 391


100%|██████████| 391/391 [00:12<00:00, 30.33it/s]


14.6773 seconds
Recall@50: 0.1800 - max: 0.2115
Precision@50: 0.3600 - max: 0.4230
92/100
Training set: 49,537 ratings
Test set: 75,992 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 411


100%|██████████| 411/411 [00:07<00:00, 52.44it/s]


9.0672 seconds
Recall@50: 0.1612 - max: 0.2115
Precision@50: 0.3223 - max: 0.4230
93/100
Training set: 57,285 ratings
Test set: 76,601 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 436


100%|██████████| 436/436 [00:11<00:00, 36.70it/s]


13.4204 seconds
Recall@50: 0.1415 - max: 0.2115
Precision@50: 0.2830 - max: 0.4230
94/100
Training set: 54,644 ratings
Test set: 74,508 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 387


100%|██████████| 387/387 [00:12<00:00, 30.34it/s]


14.3473 seconds
Recall@50: 0.1844 - max: 0.2115
Precision@50: 0.3689 - max: 0.4230
95/100
Training set: 57,617 ratings
Test set: 77,412 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 429


100%|██████████| 429/429 [00:11<00:00, 37.70it/s]


12.8297 seconds
Recall@50: 0.1770 - max: 0.2115
Precision@50: 0.3540 - max: 0.4230
96/100
Training set: 59,135 ratings
Test set: 77,405 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 442


100%|██████████| 442/442 [00:11<00:00, 37.62it/s]


13.3637 seconds
Recall@50: 0.1886 - max: 0.2115
Precision@50: 0.3771 - max: 0.4230
97/100
Training set: 55,480 ratings
Test set: 75,624 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 418


100%|██████████| 418/418 [00:10<00:00, 38.00it/s]


12.5294 seconds
Recall@50: 0.2010 - max: 0.2115
Precision@50: 0.4021 - max: 0.4230
98/100
Training set: 60,620 ratings
Test set: 76,425 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 434


100%|██████████| 434/434 [00:21<00:00, 20.42it/s]


23.2665 seconds
Recall@50: 0.1810 - max: 0.2115
Precision@50: 0.3620 - max: 0.4230
99/100
Training set: 52,893 ratings
Test set: 75,322 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 409


100%|██████████| 409/409 [00:13<00:00, 30.78it/s]


14.9486 seconds
Recall@50: 0.1852 - max: 0.2115
Precision@50: 0.3704 - max: 0.4230
100/100
Training set: 53,746 ratings
Test set: 76,134 ratings (Leave-100-out)
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 430


100%|██████████| 430/430 [00:13<00:00, 32.01it/s]


16.3158 seconds
Recall@50: 0.1858 - max: 0.2115
Precision@50: 0.3717 - max: 0.4230


In [20]:
leave_k_out = 100
n_users = 10000

# selected_user_ids = np.random.choice(selected_users['x'].unique(), 1000, replace=False)
all_users = ratings['userId'].unique()
selected_user_ids = np.random.choice(all_users, size=n_users, replace=False)
print(f"Selected {len(selected_user_ids)} unique users")

filtered_ratings = ratings[ratings['userId'].isin(selected_user_ids)]
print(f"Original ratings shape: {ratings.shape}")
print(f"Filtered ratings shape: {filtered_ratings.shape}")

filtered_ratings_sorted = filtered_ratings.sort_values(['userId', 'timestamp'])

train_ratings = filtered_ratings_sorted.groupby('userId').apply(lambda x: x.iloc[:-leave_k_out] if len(x) > leave_k_out else pd.DataFrame()).reset_index(drop=True)
test_ratings = filtered_ratings_sorted.groupby('userId').tail(leave_k_out)

print(f"Training set: {train_ratings.shape[0]:,} ratings")
print(f"Test set: {test_ratings.shape[0]:,} ratings")

print("Training implicit feedback Markov chain...")
model = ImplicitMarkovChain(alpha=2.0)
start = time.time()
model.build_transition_matrix_efficient(train_ratings, sample_fraction=1)
print(f"{time.time() - start:.4f} seconds")

Selected 10000 unique users
Original ratings shape: (10246362, 4)
Filtered ratings shape: (1307853, 4)
Training set: 542,327 ratings
Test set: 765,526 ratings
Training implicit feedback Markov chain...
Processing user ratings...
User Groups: 4234


100%|██████████| 4234/4234 [04:05<00:00, 17.25it/s]


253.0982 seconds


In [None]:
# from curses import window

K = 50
total_recall = 0
users_not_considered = 0
recalls = []
total_precision = 0
user_recommendations = {}
train_ratings_sorted = train_ratings.sort_values(['userId', 'timestamp'])

for user_id in selected_user_ids:
    user_data = train_ratings_sorted[train_ratings_sorted['userId'] == user_id]
    user_training_samples = len(user_data)
    
    candidate_scores = {}
    
    window_size = 0
    while len(candidate_scores) < K and window_size < len(user_data):
        # print(window_size)
        window_size += K
        additional_movies = user_data['movieId'].tail(window_size).head(K).tolist()

        
        for movie_id in additional_movies:
            probabilities = model.transition_matrix[model.movie_to_idx[movie_id]]
            top_indices = np.argsort(probabilities)[-(K):][::-1]
            
            movies_added = 0
            for i in top_indices:
                # print(f"movies added: {movies_added}")
                # if movies_added >= K:
                #     break
                    
                candidate_movie_id = model.idx_to_movie[i]
                candidate_prob = probabilities[i]
                
                # Skip if user has already rated this movie
                if candidate_movie_id in user_data['movieId'].values:
                    continue
                    
                # Keep the highest probability for each movie
                if candidate_movie_id not in candidate_scores:
                    candidate_scores[candidate_movie_id] = candidate_prob
                    movies_added += 1
                elif candidate_prob > candidate_scores[candidate_movie_id]:
                    candidate_scores[candidate_movie_id] = candidate_prob
                    
            # if len(candidate_scores) >= K:
            #     break


    top_movies = sorted(candidate_scores.items(), key=lambda x: x[1], reverse=True)[:K]
    if len(top_movies) < K:
        users_not_considered += 1
        continue
    candidate_movies = [movie_id for movie_id, score in top_movies]
    recommended_movies = set(candidate_movies)
    user_test_ratings = test_ratings[test_ratings['userId'] == user_id]
    relevant_movies = set(user_test_ratings['movieId'].tolist())
    
    # validation
    hits = relevant_movies.intersection(recommended_movies)
    
    user_recall = 0
    if len(relevant_movies) > 0:
        user_recall = len(hits) / len(relevant_movies)
        total_recall += user_recall    
    recalls.append(user_recall)
    print(f"userid: {user_id} - len(top_movies): {len(top_movies)} - training_data: {len(user_data)} - test_data: {len(relevant_movies)} - window: {window_size} - hits: {len(hits)} recall: {user_recall}")

    if len(recommended_movies) > 0:
        user_precision = len(hits) / len(recommended_movies)
        total_precision += user_precision


print(f"total users: {len(selected_user_ids)} - users not consid. {users_not_considered} - total_recall: {total_recall}")
avg_recall = total_recall / (len(selected_user_ids)-users_not_considered)
avg_precision = total_precision / ((len(selected_user_ids)-users_not_considered)*K)

print("\n=== Final Results ===")
# print(f"Total users processed: {len(selected_user_ids)}")
print(f"Recall@{K}: {avg_recall:.4f}")
# print(f"Precision@{K}: {avg_precision:.4f}")

userid: 117305 - len(top_movies): 50 - training_data: 81 - test_data: 100 - window: 10 - hits: 14 recall: 0.14
userid: 28943 - len(top_movies): 50 - training_data: 45 - test_data: 100 - window: 10 - hits: 19 recall: 0.19
userid: 23301 - len(top_movies): 50 - training_data: 17 - test_data: 100 - window: 10 - hits: 34 recall: 0.34
userid: 67749 - len(top_movies): 50 - training_data: 206 - test_data: 100 - window: 10 - hits: 8 recall: 0.08
userid: 96468 - len(top_movies): 50 - training_data: 87 - test_data: 100 - window: 10 - hits: 17 recall: 0.17
userid: 67629 - len(top_movies): 50 - training_data: 148 - test_data: 100 - window: 10 - hits: 8 recall: 0.08
userid: 45958 - len(top_movies): 50 - training_data: 102 - test_data: 100 - window: 10 - hits: 3 recall: 0.03
userid: 58606 - len(top_movies): 50 - training_data: 6 - test_data: 100 - window: 10 - hits: 33 recall: 0.33
userid: 122589 - len(top_movies): 50 - training_data: 18 - test_data: 100 - window: 10 - hits: 37 recall: 0.37
userid: 1

In [27]:
K = 50
total_recall = 0
users_not_considered = 0
recalls = []
total_precision = 0
user_recommendations = {}
train_ratings_sorted = train_ratings.sort_values(['userId', 'timestamp'])

for user_id in selected_user_ids:
    user_data = train_ratings_sorted[train_ratings_sorted['userId'] == user_id]
    user_training_samples = len(user_data)
    
    # Only consider last 5 samples
    recent_movies = user_data['movieId'].tail(1).tolist()
    
    candidate_scores = {}
    
    # Process only the last 5 movies
    for movie_id in recent_movies:
        probabilities = model.transition_matrix[model.movie_to_idx[movie_id]]
        top_indices = np.argsort(probabilities)[-(K):][::-1]
        
        for i in top_indices:
            candidate_movie_id = model.idx_to_movie[i]
            candidate_prob = probabilities[i]
            
            # Skip if user has already rated this movie
            if candidate_movie_id in user_data['movieId'].values:
                continue
                
            # Keep the highest probability for each movie
            if candidate_movie_id not in candidate_scores:
                candidate_scores[candidate_movie_id] = candidate_prob
            elif candidate_prob > candidate_scores[candidate_movie_id]:
                candidate_scores[candidate_movie_id] = candidate_prob
    
    # Get top K movies from candidate scores
    top_movies = sorted(candidate_scores.items(), key=lambda x: x[1], reverse=True)[:K]
    
    if len(top_movies) < K:
        users_not_considered += 1
        # print(f"userid: {user_id} - Insufficient recommendations: {len(top_movies)}/{K} - Skipping")
        continue
        
    candidate_movies = [movie_id for movie_id, score in top_movies]
    recommended_movies = set(candidate_movies)
    user_test_ratings = test_ratings[test_ratings['userId'] == user_id]
    relevant_movies = set(user_test_ratings['movieId'].tolist())
    
    # Validation
    hits = relevant_movies.intersection(recommended_movies)
    
    user_recall = 0
    if len(relevant_movies) > 0:
        user_recall = len(hits) / len(relevant_movies)
        total_recall += user_recall    
    recalls.append(user_recall)
    
    if len(recommended_movies) > 0:
        user_precision = len(hits) / len(recommended_movies)
        total_precision += user_precision

    print(f"userid: {user_id} - recent_movies: {len(recent_movies)} - recommendations: {len(top_movies)} - test_data: {len(relevant_movies)} - hits: {len(hits)} - recall: {user_recall:.4f}")

print(f"total users: {len(selected_user_ids)} - users not consid. {users_not_considered} - total_recall: {total_recall}")

if (len(selected_user_ids) - users_not_considered) > 0:
    avg_recall = total_recall / (len(selected_user_ids) - users_not_considered)
    avg_precision = total_precision / (len(selected_user_ids) - users_not_considered)
else:
    avg_recall = 0
    avg_precision = 0

print("\n=== Final Results ===")
print(f"Recall@{K}: {avg_recall:.4f}")
print(f"Precision@{K}: {avg_precision:.4f}")

userid: 58606 - recent_movies: 1 - recommendations: 50 - test_data: 100 - hits: 34 - recall: 0.3400
userid: 137151 - recent_movies: 1 - recommendations: 50 - test_data: 100 - hits: 3 - recall: 0.0300
userid: 116479 - recent_movies: 1 - recommendations: 50 - test_data: 100 - hits: 15 - recall: 0.1500
userid: 47743 - recent_movies: 1 - recommendations: 50 - test_data: 100 - hits: 18 - recall: 0.1800
userid: 58327 - recent_movies: 1 - recommendations: 50 - test_data: 100 - hits: 18 - recall: 0.1800
userid: 123958 - recent_movies: 1 - recommendations: 50 - test_data: 100 - hits: 19 - recall: 0.1900
userid: 122728 - recent_movies: 1 - recommendations: 50 - test_data: 100 - hits: 17 - recall: 0.1700
userid: 80029 - recent_movies: 1 - recommendations: 50 - test_data: 100 - hits: 23 - recall: 0.2300
userid: 111335 - recent_movies: 1 - recommendations: 50 - test_data: 100 - hits: 18 - recall: 0.1800
userid: 55021 - recent_movies: 1 - recommendations: 50 - test_data: 100 - hits: 30 - recall: 0.3