# üìì Notebook 03: Model Training
## H·ªá th·ªëng Recommendation Phim

**M·ª•c ti√™u:**
1. Load data t·ª´ MongoDB
2. Train/Test split (80/20)
3. Train 4 m√¥ h√¨nh:
   - Content-Based Filtering
   - Item-Based Collaborative Filtering
   - User-Based Collaborative Filtering
   - Hybrid Model
4. L∆∞u trained models v√†o file .pkl
5. Test predictions cho sample users

---

## 1. Import Libraries

In [None]:
# Import libraries
import os
import sys
import pickle
import warnings
from datetime import datetime

# Data manipulation
import pandas as pd
import numpy as np

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse

# MongoDB
from pymongo import MongoClient

# Progress bar
from tqdm import tqdm

# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.getcwd()))

# Suppress warnings
warnings.filterwarnings('ignore')

print("‚úÖ Libraries imported successfully!")

## 2. Configuration

In [None]:
# Configuration
MONGO_URI = "mongodb://localhost:27017"
DB_NAME = "movie_recommendation"
MODELS_DIR = "../models_saved"
PROCESSED_DIR = "../data/processed"

# Model parameters
TEST_SIZE = 0.2
RANDOM_STATE = 42

# Create directories
os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(PROCESSED_DIR, exist_ok=True)

print(f"üìÅ Models will be saved to: {MODELS_DIR}")
print(f"üé≤ Random state: {RANDOM_STATE}")
print(f"üìä Test size: {TEST_SIZE}")

## 3. Load Data from MongoDB

In [None]:
# Connect to MongoDB
print("üîó Connecting to MongoDB...")
client = MongoClient(MONGO_URI)
db = client[DB_NAME]
print(f"‚úÖ Connected to: {DB_NAME}")

# Load data
print("\nüì• Loading data...")

# Load movies
movies_df = pd.DataFrame(list(db.movies.find({}, {'_id': 0})))
print(f"  üìΩÔ∏è Movies: {len(movies_df):,}")

# Load ratings
ratings_df = pd.DataFrame(list(db.ratings.find({}, {'_id': 0})))
print(f"  ‚≠ê Ratings: {len(ratings_df):,}")

print("\n‚úÖ Data loaded successfully!")

## 4. Train/Test Split

In [None]:
# Train/Test split
print("üìä Splitting data into train/test sets...")

train_ratings, test_ratings = train_test_split(
    ratings_df, 
    test_size=TEST_SIZE, 
    random_state=RANDOM_STATE
)

print(f"  üìà Train set: {len(train_ratings):,} ratings ({100-TEST_SIZE*100:.0f}%)")
print(f"  üìâ Test set:  {len(test_ratings):,} ratings ({TEST_SIZE*100:.0f}%)")

# Save train/test split for later evaluation
train_ratings.to_csv(os.path.join(PROCESSED_DIR, 'train_ratings.csv'), index=False)
test_ratings.to_csv(os.path.join(PROCESSED_DIR, 'test_ratings.csv'), index=False)
print(f"\n‚úÖ Train/test data saved to {PROCESSED_DIR}")

## 5. Train Content-Based Filtering Model

In [None]:
class ContentBasedModel:
    """Content-Based Filtering using TF-IDF on movie genres."""
    
    def __init__(self):
        self.movies_df = None
        self.tfidf_matrix = None
        self.similarity_matrix = None
        self.movie_id_to_idx = {}
        self.idx_to_movie_id = {}
        self.tfidf_vectorizer = None
        self.is_fitted = False
    
    def fit(self, movies_df):
        """Fit the model with movie data."""
        print("üîß Training Content-Based Model...")
        
        self.movies_df = movies_df.reset_index(drop=True)
        
        # Create index mappings
        for idx, movie_id in enumerate(movies_df['movieId']):
            self.movie_id_to_idx[movie_id] = idx
            self.idx_to_movie_id[idx] = movie_id
        
        # Prepare genres for TF-IDF
        genres_text = movies_df['genres'].apply(
            lambda x: ' '.join(x) if isinstance(x, list) else ''
        )
        
        # TF-IDF Vectorization
        self.tfidf_vectorizer = TfidfVectorizer(max_features=50)
        self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(genres_text)
        
        print(f"  ‚úÖ TF-IDF Matrix: {self.tfidf_matrix.shape}")
        
        # Compute cosine similarity
        print("  Computing similarity matrix...")
        self.similarity_matrix = cosine_similarity(self.tfidf_matrix)
        print(f"  ‚úÖ Similarity Matrix: {self.similarity_matrix.shape}")
        
        self.is_fitted = True
        return self
    
    def get_similar_movies(self, movie_id, n=10):
        """Get similar movies based on content."""
        if movie_id not in self.movie_id_to_idx:
            return []
        
        idx = self.movie_id_to_idx[movie_id]
        sim_scores = list(enumerate(self.similarity_matrix[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:n+1]  # Exclude self
        
        results = []
        for i, score in sim_scores:
            mid = self.idx_to_movie_id[i]
            movie = self.movies_df[self.movies_df['movieId'] == mid].iloc[0]
            results.append({
                'movieId': int(mid),
                'title': movie['title'],
                'similarity': float(score),
                'avgRating': float(movie.get('avgRating', 0))
            })
        return results
    
    def recommend_for_user(self, user_id, user_ratings, n=10, exclude=None):
        """Recommend movies for a user based on their rating history."""
        if not user_ratings:
            return []
        
        exclude = exclude or set()
        
        # Get highly rated movies by user (rating >= 4)
        liked_movies = [r['movieId'] for r in user_ratings if r['rating'] >= 4]
        
        if not liked_movies:
            liked_movies = [r['movieId'] for r in user_ratings][:5]
        
        # Aggregate similarity scores
        scores = {}
        for movie_id in liked_movies:
            similar = self.get_similar_movies(movie_id, n=50)
            for item in similar:
                mid = item['movieId']
                if mid not in exclude:
                    if mid not in scores:
                        scores[mid] = {'score': 0, 'count': 0, 'title': item['title'], 
                                       'avgRating': item['avgRating']}
                    scores[mid]['score'] += item['similarity']
                    scores[mid]['count'] += 1
        
        # Sort by average score
        recommendations = []
        for mid, data in scores.items():
            recommendations.append({
                'movieId': mid,
                'title': data['title'],
                'score': data['score'] / data['count'],
                'avgRating': data['avgRating']
            })
        
        recommendations.sort(key=lambda x: x['score'], reverse=True)
        return recommendations[:n]

print("‚úÖ ContentBasedModel class defined")

In [None]:
# Train Content-Based Model
content_model = ContentBasedModel()
content_model.fit(movies_df)

# Test
print("\nüß™ Testing Content-Based Model:")
similar = content_model.get_similar_movies(1, n=5)  # Toy Story
print(f"Movies similar to Toy Story (1995):")
for movie in similar:
    print(f"  - {movie['title']} (similarity: {movie['similarity']:.3f})")

## 6. Train Item-Based Collaborative Filtering Model

In [None]:
class ItemBasedModel:
    """Item-Based Collaborative Filtering using rating patterns."""
    
    def __init__(self, k_neighbors=50, min_common_users=3):
        self.k_neighbors = k_neighbors
        self.min_common_users = min_common_users
        self.item_similarity = {}
        self.movie_users = {}  # movie_id -> {user_id: rating}
        self.user_movies = {}  # user_id -> {movie_id: rating}
        self.movies_df = None
        self.movie_id_to_title = {}
        self.is_fitted = False
    
    def fit(self, ratings_df, movies_df):
        """Fit the model with rating data."""
        print("üîß Training Item-Based Model...")
        
        self.movies_df = movies_df
        self.movie_id_to_title = dict(zip(movies_df['movieId'], movies_df['title']))
        
        # Build user-item and item-user dictionaries
        print("  Building rating matrices...")
        for _, row in tqdm(ratings_df.iterrows(), total=len(ratings_df), desc="  Processing ratings"):
            user_id = row['userId']
            movie_id = row['movieId']
            rating = row['rating']
            
            if movie_id not in self.movie_users:
                self.movie_users[movie_id] = {}
            self.movie_users[movie_id][user_id] = rating
            
            if user_id not in self.user_movies:
                self.user_movies[user_id] = {}
            self.user_movies[user_id][movie_id] = rating
        
        print(f"  ‚úÖ Processed {len(self.movie_users):,} movies and {len(self.user_movies):,} users")
        
        # Compute item similarities (lazy - on demand)
        self.is_fitted = True
        return self
    
    def _compute_similarity(self, movie1, movie2):
        """Compute cosine similarity between two movies."""
        if movie1 not in self.movie_users or movie2 not in self.movie_users:
            return 0.0
        
        users1 = set(self.movie_users[movie1].keys())
        users2 = set(self.movie_users[movie2].keys())
        common_users = users1 & users2
        
        if len(common_users) < self.min_common_users:
            return 0.0
        
        # Get ratings for common users
        ratings1 = np.array([self.movie_users[movie1][u] for u in common_users])
        ratings2 = np.array([self.movie_users[movie2][u] for u in common_users])
        
        # Compute cosine similarity
        norm1 = np.linalg.norm(ratings1)
        norm2 = np.linalg.norm(ratings2)
        
        if norm1 == 0 or norm2 == 0:
            return 0.0
        
        return float(np.dot(ratings1, ratings2) / (norm1 * norm2))
    
    def get_similar_items(self, movie_id, n=10):
        """Get similar items based on rating patterns."""
        if movie_id not in self.movie_users:
            return []
        
        # Compute similarities with all other movies
        similarities = []
        for other_id in self.movie_users.keys():
            if other_id != movie_id:
                sim = self._compute_similarity(movie_id, other_id)
                if sim > 0:
                    similarities.append((other_id, sim))
        
        # Sort and return top N
        similarities.sort(key=lambda x: x[1], reverse=True)
        
        results = []
        for mid, sim in similarities[:n]:
            results.append({
                'movieId': int(mid),
                'title': self.movie_id_to_title.get(mid, f'Movie {mid}'),
                'similarity': sim
            })
        return results
    
    def predict_rating(self, user_id, movie_id):
        """Predict rating for a user-movie pair."""
        if user_id not in self.user_movies or movie_id not in self.movie_users:
            return None
        
        user_ratings = self.user_movies[user_id]
        
        # Get similar items that user has rated
        weighted_sum = 0.0
        sim_sum = 0.0
        
        for rated_movie, rating in user_ratings.items():
            sim = self._compute_similarity(movie_id, rated_movie)
            if sim > 0:
                weighted_sum += sim * rating
                sim_sum += sim
        
        if sim_sum == 0:
            return None
        
        return weighted_sum / sim_sum
    
    def recommend(self, user_id, n=10, exclude=None):
        """Recommend movies for a user."""
        if user_id not in self.user_movies:
            return []
        
        exclude = exclude or set()
        user_ratings = self.user_movies[user_id]
        
        # Predict ratings for unrated movies
        predictions = []
        candidate_movies = set(self.movie_users.keys()) - set(user_ratings.keys()) - exclude
        
        for movie_id in list(candidate_movies)[:500]:  # Limit for speed
            pred = self.predict_rating(user_id, movie_id)
            if pred is not None:
                predictions.append({
                    'movieId': int(movie_id),
                    'title': self.movie_id_to_title.get(movie_id, f'Movie {movie_id}'),
                    'predictedRating': pred
                })
        
        predictions.sort(key=lambda x: x['predictedRating'], reverse=True)
        return predictions[:n]

print("‚úÖ ItemBasedModel class defined")

In [None]:
# Train Item-Based Model
item_model = ItemBasedModel(k_neighbors=50, min_common_users=3)
item_model.fit(train_ratings, movies_df)

# Test
print("\nüß™ Testing Item-Based Model:")
similar = item_model.get_similar_items(1, n=5)  # Toy Story
print(f"Items similar to Toy Story (1995):")
for movie in similar:
    print(f"  - {movie['title']} (similarity: {movie['similarity']:.3f})")

## 7. Train User-Based Collaborative Filtering Model

In [None]:
class UserBasedModel:
    """User-Based Collaborative Filtering using user similarity."""
    
    def __init__(self, k_neighbors=50, min_common_items=5):
        self.k_neighbors = k_neighbors
        self.min_common_items = min_common_items
        self.user_movies = {}  # user_id -> {movie_id: rating}
        self.movie_users = {}  # movie_id -> {user_id: rating}
        self.user_mean_rating = {}
        self.movies_df = None
        self.movie_id_to_title = {}
        self.is_fitted = False
    
    def fit(self, ratings_df, movies_df):
        """Fit the model with rating data."""
        print("üîß Training User-Based Model...")
        
        self.movies_df = movies_df
        self.movie_id_to_title = dict(zip(movies_df['movieId'], movies_df['title']))
        
        # Build dictionaries
        print("  Building user-movie matrices...")
        for _, row in tqdm(ratings_df.iterrows(), total=len(ratings_df), desc="  Processing ratings"):
            user_id = row['userId']
            movie_id = row['movieId']
            rating = row['rating']
            
            if user_id not in self.user_movies:
                self.user_movies[user_id] = {}
            self.user_movies[user_id][movie_id] = rating
            
            if movie_id not in self.movie_users:
                self.movie_users[movie_id] = {}
            self.movie_users[movie_id][user_id] = rating
        
        # Compute mean ratings
        for user_id, movies in self.user_movies.items():
            self.user_mean_rating[user_id] = np.mean(list(movies.values()))
        
        print(f"  ‚úÖ Processed {len(self.user_movies):,} users and {len(self.movie_users):,} movies")
        
        self.is_fitted = True
        return self
    
    def _compute_user_similarity(self, user1, user2):
        """Compute Pearson correlation between two users."""
        if user1 not in self.user_movies or user2 not in self.user_movies:
            return 0.0
        
        movies1 = set(self.user_movies[user1].keys())
        movies2 = set(self.user_movies[user2].keys())
        common_movies = movies1 & movies2
        
        if len(common_movies) < self.min_common_items:
            return 0.0
        
        # Get ratings for common movies
        ratings1 = np.array([self.user_movies[user1][m] for m in common_movies])
        ratings2 = np.array([self.user_movies[user2][m] for m in common_movies])
        
        # Compute Pearson correlation
        mean1 = np.mean(ratings1)
        mean2 = np.mean(ratings2)
        
        num = np.sum((ratings1 - mean1) * (ratings2 - mean2))
        den = np.sqrt(np.sum((ratings1 - mean1)**2) * np.sum((ratings2 - mean2)**2))
        
        if den == 0:
            return 0.0
        
        return float(num / den)
    
    def get_similar_users(self, user_id, n=10):
        """Get similar users."""
        if user_id not in self.user_movies:
            return []
        
        similarities = []
        for other_id in self.user_movies.keys():
            if other_id != user_id:
                sim = self._compute_user_similarity(user_id, other_id)
                if sim > 0:
                    similarities.append((other_id, sim))
        
        similarities.sort(key=lambda x: x[1], reverse=True)
        return similarities[:n]
    
    def predict_rating(self, user_id, movie_id):
        """Predict rating for a user-movie pair."""
        if user_id not in self.user_movies:
            return None
        
        user_mean = self.user_mean_rating.get(user_id, 3.0)
        
        # Find similar users who rated this movie
        similar_users = self.get_similar_users(user_id, n=self.k_neighbors)
        
        weighted_sum = 0.0
        sim_sum = 0.0
        
        for other_id, sim in similar_users:
            if movie_id in self.user_movies.get(other_id, {}):
                other_rating = self.user_movies[other_id][movie_id]
                other_mean = self.user_mean_rating.get(other_id, 3.0)
                weighted_sum += sim * (other_rating - other_mean)
                sim_sum += abs(sim)
        
        if sim_sum == 0:
            return user_mean
        
        return user_mean + (weighted_sum / sim_sum)
    
    def recommend(self, user_id, n=10, exclude=None):
        """Recommend movies for a user."""
        if user_id not in self.user_movies:
            return []
        
        exclude = exclude or set()
        rated_movies = set(self.user_movies[user_id].keys())
        
        # Get similar users
        similar_users = self.get_similar_users(user_id, n=self.k_neighbors)
        
        # Collect candidate movies from similar users
        candidate_movies = set()
        for other_id, _ in similar_users:
            candidate_movies.update(self.user_movies[other_id].keys())
        
        candidate_movies = candidate_movies - rated_movies - exclude
        
        # Predict ratings
        predictions = []
        for movie_id in list(candidate_movies)[:500]:  # Limit for speed
            pred = self.predict_rating(user_id, movie_id)
            if pred is not None:
                predictions.append({
                    'movieId': int(movie_id),
                    'title': self.movie_id_to_title.get(movie_id, f'Movie {movie_id}'),
                    'predictedRating': max(0.5, min(5.0, pred))  # Clip to valid range
                })
        
        predictions.sort(key=lambda x: x['predictedRating'], reverse=True)
        return predictions[:n]

print("‚úÖ UserBasedModel class defined")

In [None]:
# Train User-Based Model
user_model = UserBasedModel(k_neighbors=50, min_common_items=5)
user_model.fit(train_ratings, movies_df)

# Test
print("\nüß™ Testing User-Based Model:")
print(f"User 1 similar users: {len(user_model.get_similar_users(1, n=10))} found")

## 8. Train Hybrid Model

In [None]:
class HybridModel:
    """Hybrid model combining Content-Based and Collaborative Filtering."""
    
    def __init__(self, content_weight=0.3, item_weight=0.35, user_weight=0.35):
        # Normalize weights
        total = content_weight + item_weight + user_weight
        self.content_weight = content_weight / total
        self.item_weight = item_weight / total
        self.user_weight = user_weight / total
        
        self.content_model = None
        self.item_model = None
        self.user_model = None
        self.movies_df = None
        self.is_fitted = False
    
    def fit(self, movies_df, ratings_df, 
            content_model=None, item_model=None, user_model=None):
        """Fit the hybrid model."""
        print("üîß Training Hybrid Model...")
        
        self.movies_df = movies_df
        self.content_model = content_model
        self.item_model = item_model
        self.user_model = user_model
        
        print(f"  ‚úÖ Weights - Content: {self.content_weight:.2f}, Item: {self.item_weight:.2f}, User: {self.user_weight:.2f}")
        
        self.is_fitted = True
        return self
    
    def get_similar_movies(self, movie_id, n=10):
        """Get similar movies using weighted combination."""
        all_similar = {}
        
        # Content-based similar
        if self.content_model:
            for item in self.content_model.get_similar_movies(movie_id, n=n*2):
                mid = item['movieId']
                if mid not in all_similar:
                    all_similar[mid] = {'title': item['title'], 'scores': []}
                all_similar[mid]['scores'].append(('content', item['similarity']))
        
        # Item-based similar
        if self.item_model:
            for item in self.item_model.get_similar_items(movie_id, n=n*2):
                mid = item['movieId']
                if mid not in all_similar:
                    all_similar[mid] = {'title': item['title'], 'scores': []}
                all_similar[mid]['scores'].append(('item', item['similarity']))
        
        # Combine scores
        results = []
        for mid, data in all_similar.items():
            score = 0
            for model_type, sim in data['scores']:
                if model_type == 'content':
                    score += self.content_weight * sim
                elif model_type == 'item':
                    score += (self.item_weight + self.user_weight) * sim
            
            results.append({
                'movieId': mid,
                'title': data['title'],
                'score': score
            })
        
        results.sort(key=lambda x: x['score'], reverse=True)
        return results[:n]
    
    def recommend(self, user_id, n=10, exclude=None, user_rated_movies=None):
        """Recommend movies using hybrid approach."""
        exclude = exclude or set()
        all_recs = {}
        
        # Content-based recommendations
        if self.content_model and user_rated_movies:
            content_recs = self.content_model.recommend_for_user(
                user_id, user_rated_movies, n=n*3, exclude=exclude
            )
            for rec in content_recs:
                mid = rec['movieId']
                if mid not in all_recs:
                    all_recs[mid] = {'title': rec['title'], 'scores': [], 'avgRating': rec.get('avgRating', 0)}
                all_recs[mid]['scores'].append(('content', rec['score']))
        
        # Item-based recommendations
        if self.item_model:
            item_recs = self.item_model.recommend(user_id, n=n*3, exclude=exclude)
            for rec in item_recs:
                mid = rec['movieId']
                if mid not in all_recs:
                    all_recs[mid] = {'title': rec['title'], 'scores': [], 'avgRating': 0}
                all_recs[mid]['scores'].append(('item', rec['predictedRating'] / 5.0))
        
        # User-based recommendations
        if self.user_model:
            user_recs = self.user_model.recommend(user_id, n=n*3, exclude=exclude)
            for rec in user_recs:
                mid = rec['movieId']
                if mid not in all_recs:
                    all_recs[mid] = {'title': rec['title'], 'scores': [], 'avgRating': 0}
                all_recs[mid]['scores'].append(('user', rec['predictedRating'] / 5.0))
        
        # Combine scores
        results = []
        for mid, data in all_recs.items():
            score = 0
            for model_type, s in data['scores']:
                if model_type == 'content':
                    score += self.content_weight * s
                elif model_type == 'item':
                    score += self.item_weight * s
                elif model_type == 'user':
                    score += self.user_weight * s
            
            results.append({
                'movieId': mid,
                'title': data['title'],
                'score': score,
                'avgRating': data['avgRating']
            })
        
        results.sort(key=lambda x: x['score'], reverse=True)
        return results[:n]

print("‚úÖ HybridModel class defined")

In [None]:
# Train Hybrid Model
hybrid_model = HybridModel(content_weight=0.3, item_weight=0.35, user_weight=0.35)
hybrid_model.fit(
    movies_df, 
    train_ratings,
    content_model=content_model,
    item_model=item_model,
    user_model=user_model
)

# Test
print("\nüß™ Testing Hybrid Model:")
similar = hybrid_model.get_similar_movies(1, n=5)  # Toy Story
print(f"Movies similar to Toy Story (1995) (Hybrid):")
for movie in similar:
    print(f"  - {movie['title']} (score: {movie['score']:.3f})")

## 9. Save Trained Models

In [None]:
# Save all models
print("üíæ Saving trained models...")

# Save Content-Based Model
with open(os.path.join(MODELS_DIR, 'content_based.pkl'), 'wb') as f:
    pickle.dump(content_model, f)
print(f"  ‚úÖ content_based.pkl saved")

# Save Item-Based Model
with open(os.path.join(MODELS_DIR, 'item_based.pkl'), 'wb') as f:
    pickle.dump(item_model, f)
print(f"  ‚úÖ item_based.pkl saved")

# Save User-Based Model
with open(os.path.join(MODELS_DIR, 'user_based.pkl'), 'wb') as f:
    pickle.dump(user_model, f)
print(f"  ‚úÖ user_based.pkl saved")

# Save Hybrid Model
with open(os.path.join(MODELS_DIR, 'hybrid.pkl'), 'wb') as f:
    pickle.dump(hybrid_model, f)
print(f"  ‚úÖ hybrid.pkl saved")

print(f"\nüìÅ All models saved to: {MODELS_DIR}")

## 10. Test Recommendations for Sample Users

In [None]:
# Test recommendations for a sample user
sample_user_id = 1

# Get user's ratings
user_ratings = train_ratings[train_ratings['userId'] == sample_user_id]
user_rated_movies = [{'movieId': r['movieId'], 'rating': r['rating']} 
                     for _, r in user_ratings.iterrows()]
exclude_ids = set(user_ratings['movieId'].tolist())

print(f"üë§ User {sample_user_id} has rated {len(user_rated_movies)} movies")
print(f"\nüé¨ Top 5 rated movies by User {sample_user_id}:")
for _, row in user_ratings.nlargest(5, 'rating').iterrows():
    movie = movies_df[movies_df['movieId'] == row['movieId']].iloc[0]
    print(f"  - {movie['title']} (‚≠ê {row['rating']})")

In [None]:
# Get recommendations from each model
print(f"\nüìã Recommendations for User {sample_user_id}:")
print("=" * 60)

# Content-Based
print("\nüéØ Content-Based Recommendations:")
content_recs = content_model.recommend_for_user(sample_user_id, user_rated_movies, n=5, exclude=exclude_ids)
for i, rec in enumerate(content_recs, 1):
    print(f"  {i}. {rec['title']} (score: {rec['score']:.3f})")

# Item-Based
print("\nüéØ Item-Based Recommendations:")
item_recs = item_model.recommend(sample_user_id, n=5, exclude=exclude_ids)
for i, rec in enumerate(item_recs, 1):
    print(f"  {i}. {rec['title']} (predicted: {rec['predictedRating']:.2f})")

# User-Based
print("\nüéØ User-Based Recommendations:")
user_recs = user_model.recommend(sample_user_id, n=5, exclude=exclude_ids)
for i, rec in enumerate(user_recs, 1):
    print(f"  {i}. {rec['title']} (predicted: {rec['predictedRating']:.2f})")

# Hybrid
print("\nüéØ Hybrid Recommendations:")
hybrid_recs = hybrid_model.recommend(sample_user_id, n=5, exclude=exclude_ids, user_rated_movies=user_rated_movies)
for i, rec in enumerate(hybrid_recs, 1):
    print(f"  {i}. {rec['title']} (score: {rec['score']:.3f})")

## ‚úÖ Summary

In [None]:
# Summary
print("=" * 60)
print("üéâ MODEL TRAINING COMPLETED!")
print("=" * 60)

# List saved models
print(f"\nüìÅ Saved Models in {MODELS_DIR}:")
for model_file in os.listdir(MODELS_DIR):
    if model_file.endswith('.pkl'):
        size = os.path.getsize(os.path.join(MODELS_DIR, model_file)) / (1024*1024)
        print(f"  ‚úÖ {model_file} ({size:.2f} MB)")

print(f"""
üìä Training Summary:
  ‚îú‚îÄ‚îÄ Train set: {len(train_ratings):,} ratings
  ‚îú‚îÄ‚îÄ Test set:  {len(test_ratings):,} ratings
  ‚îî‚îÄ‚îÄ Models trained: 4

üîß Models:
  ‚îú‚îÄ‚îÄ Content-Based (TF-IDF on genres)
  ‚îú‚îÄ‚îÄ Item-Based Collaborative Filtering
  ‚îú‚îÄ‚îÄ User-Based Collaborative Filtering
  ‚îî‚îÄ‚îÄ Hybrid (weighted combination)

‚úÖ Next Steps:
  1. Run Notebook 04: Model Evaluation
  2. Compare model performance
  3. Save metrics to MongoDB
""")

# Close MongoDB connection
client.close()
print("üîå MongoDB connection closed.")