In [5]:
import numpy as np
import pandas as pd
import os
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score
from math import sqrt
from tqdm import tqdm
import joblib
from typing import Dict, List, Tuple
from surprise import dump
import warnings
warnings.filterwarnings('ignore')

class EnhancedHybridRecommender:
    def __init__(self, train_path: str, test_path: str, products_path: str,
                 content_model_path: str, svd_model_path: str):
        """
        Hybrid Recommender: SVD + Content-based filtering with improvements
        """
        self.train_path = train_path
        self.test_path = test_path
        self.products_path = products_path
        self.content_model_path = content_model_path
        self.svd_model_path = svd_model_path
        
        # Initialize attributes
        self.prod_df = None
        self.prod_embeds = None
        self.svd_model = None
        self.global_avg = 3.0
        self.test_df = None
        self.train_df = None
        self.user_history_cache = {}
        self.product_popularity = {}
        self.product_features = {}
        
        # Load models and data
        self._load_models()
        self._preload_data()
        
        print("✅ Enhanced Hybrid Recommender initialized successfully!")

    def _load_models(self) -> None:
        """Load pre-trained models"""
        print("📦 Loading pre-trained models...")
        
        # Load content-based model
        if os.path.exists(self.content_model_path):
            self.prod_df, self.prod_embeds = joblib.load(self.content_model_path)
            print(f"✅ Content model loaded: {len(self.prod_df)} products")
        else:
            raise FileNotFoundError(f"Content model not found at {self.content_model_path}")
        
        # Load SVD model
        if os.path.exists(self.svd_model_path):
            _, self.svd_model = dump.load(self.svd_model_path)
            print("✅ SVD model loaded")
            
            # Get global average from SVD
            if hasattr(self.svd_model, 'trainset') and self.svd_model.trainset:
                self.global_avg = self.svd_model.trainset.global_mean
                print(f"📈 Global average rating from SVD: {self.global_avg:.3f}")
        else:
            raise FileNotFoundError(f"SVD model not found at {self.svd_model_path}")
        
        # Create index mappings
        self.product_id_to_idx = {str(pid): idx for idx, pid in enumerate(self.prod_df["product_id"])}
        print("✅ Index mappings created")
        
        # Precompute product features for faster similarity calculation
        self.precompute_product_features()

    def precompute_product_features(self):
        """Precompute product features for faster similarity calculation"""
        print("⚡ Precomputing product features for faster similarity...")
        
        self.product_features = {}
        for _, row in self.prod_df.iterrows():
            product_id = str(row["product_id"])
            self.product_features[product_id] = {
                'brand': row["brand_name"],
                'category': row["tertiary_category"],
                'price': row["price_usd"] if pd.notna(row["price_usd"]) else 0,
                'embedding': self.prod_embeds[self.product_id_to_idx[product_id]]
            }
        
        print(f"✅ Precomputed features for {len(self.product_features)} products")

    def _preload_data(self):
        print("📊 Preloading data...")

        # Load a small sample just to detect available columns
        test_sample = pd.read_csv(self.test_path, nrows=5)

        # Base columns we always need
        usecols = ["author_id", "product_id", "rating"]

        # ✅ Try to detect a timestamp column from the sample
        time_col = None
        for col in ["timestamp", "submission_time", "review_date"]:
            if col in test_sample.columns:
                time_col = col
                usecols.append(col)
                break

        # Load full test/train datasets with only needed columns
        self.test_df = pd.read_csv(self.test_path, usecols=usecols)
        self.train_df = pd.read_csv(self.train_path, usecols=["author_id", "product_id", "rating"])

        # Build user history cache (from train)
        self.user_histories = (
            self.train_df.groupby("author_id")["product_id"].apply(list).to_dict()
        )

        print(f"✅ Test data loaded: {len(self.test_df)} records")
        print(f"✅ Train data loaded: {len(self.train_df)} records")

        
        # Build user history cache - OPTIMIZED
        print("🔨 Building user history cache...")
        all_ratings = pd.concat([self.train_df, self.test_df])
        
        # Use groupby for faster processing
        user_groups = all_ratings.groupby("author_id")
        for user_id, group in tqdm(user_groups, desc="Caching user histories"):
            self.user_history_cache[str(user_id)] = {
                'rated_products': group["product_id"].astype(str).tolist(),
                'ratings': group["rating"].tolist(),
                'avg_rating': group["rating"].mean()
            }
        
        # Calculate product popularity
        print("📊 Calculating product popularity...")
        # FIXED: Changed ast(str) to astype(str)
        self.product_popularity = all_ratings['product_id'].astype(str).value_counts().to_dict()
        
        print("=== Dataset Overview ===")
        print(f"Train set: {len(self.train_df):,} rows | {self.train_df['author_id'].nunique():,} users | {self.train_df['product_id'].nunique():,} products")
        print(f"Test set: {len(self.test_df):,} rows | {self.test_df['author_id'].nunique():,} users | {self.test_df['product_id'].nunique():,} products")
        print(f"Products catalog: {len(self.prod_df):,} items")
        print("========================")

    def enhanced_content_similarity(self, target_product_id: str, user_rated_products: List[str]) -> float:
        """Enhanced content similarity with multiple factors - OPTIMIZED using precomputed features"""
        if target_product_id not in self.product_features or not user_rated_products:
            return 0.0
        
        target_features = self.product_features[target_product_id]
        target_embed = target_features['embedding']
        target_brand = target_features['brand']
        target_category = target_features['category']
        target_price = target_features['price']
        
        similarities = []
        
        # Pre-filter rated products that exist in our features
        valid_rated_products = [pid for pid in user_rated_products if pid in self.product_features]
        
        for rated_pid in valid_rated_products:
            rated_features = self.product_features[rated_pid]
            rated_embed = rated_features['embedding']
            rated_brand = rated_features['brand']
            rated_category = rated_features['category']
            rated_price = rated_features['price']
            
            # Multiple similarity measures
            cosine_sim = cosine_similarity([target_embed], [rated_embed])[0][0]
            
            # Brand similarity
            brand_sim = 0.3 if target_brand == rated_brand else 0
            
            # Category similarity
            category_sim = 0.2 if target_category == rated_category else 0
            
            # Price similarity (within 20% price range)
            if target_price > 0 and rated_price > 0:
                price_ratio = min(target_price, rated_price) / max(target_price, rated_price)
                price_sim = 0.2 if price_ratio > 0.8 else 0
            else:
                price_sim = 0
            
            total_sim = cosine_sim + brand_sim + category_sim + price_sim
            if total_sim > 0.3:  # Higher threshold for better quality
                similarities.append(total_sim)
        
        return np.mean(similarities) if similarities else 0.0
    
    def get_adaptive_weights(self, user_id):
        """Adaptively set weights based on user activity"""
        if user_id not in self.user_item_matrix.index:
            return 0.6, 0.4  # fallback for new user (more content-based)

        n_ratings = self.train_df[self.train_df["author_id"] == user_id].shape[0]

        # More ratings → trust collaborative filtering more
        if n_ratings < 5:
            return 0.6, 0.4   # content-heavy
        elif n_ratings < 20:
            return 0.4, 0.6   # balanced
        else:
            return 0.2, 0.8   # collab-heavy


    def hybrid_predict(self, user_id: str, product_id: str,
                   content_weight: float = 0.4, collab_weight: float = 0.6) -> Tuple[float, float]:
        """
        Enhanced hybrid prediction with smooth adaptive weighting
        Returns: (prediction, confidence)
        """
        user_id = str(user_id)
        product_id = str(product_id)
        
        # ========== SVD PREDICTION ==========
        svd_pred = np.nan
        svd_confidence = 0.0
        
        try:
            svd_prediction = self.svd_model.predict(user_id, product_id)
            svd_pred = max(1.0, min(5.0, svd_prediction.est))
            svd_confidence = 0.9 if not svd_prediction.details.get('was_impossible', False) else 0.4
        except:
            svd_pred = self.global_avg
            svd_confidence = 0.3
        
        # ========== CONTENT PREDICTION ==========
        content_pred = np.nan
        content_confidence = 0.0
        
        if user_id in self.user_history_cache:
            user_data = self.user_history_cache[user_id]
            rated_products = user_data['rated_products']
            
            if len(rated_products) >= 2 and product_id in self.product_id_to_idx:
                similarity_score = self.enhanced_content_similarity(product_id, rated_products)
                
                if similarity_score > 0.1:
                    # Map similarity to rating scale (1-5)
                    content_pred = 1.0 + similarity_score * 4.0
                    content_confidence = min(1.0, similarity_score * 1.8)
                    content_pred = max(1.0, min(5.0, content_pred))
        
        # ========== ADVANCED HYBRID COMBINATION ==========
        predictions = []
        confidences = []
        weights = []
        
        user_data = self.user_history_cache.get(user_id, {})
        user_rating_count = len(user_data.get('rated_products', []))
        
        # Smooth scaling: more ratings → more collaborative
        ratio = min(1.0, user_rating_count / 30)  # cap effect at 30 ratings
        effective_collab_weight = collab_weight * (0.4 + 0.6 * ratio)  # grows with activity
        effective_content_weight = content_weight * (1.0 - 0.6 * ratio)  # shrinks with activity
        
        if not np.isnan(svd_pred):
            predictions.append(svd_pred)
            confidences.append(svd_confidence)
            weights.append(effective_collab_weight)
        
        if not np.isnan(content_pred) and content_confidence > 0.2:
            predictions.append(content_pred)
            confidences.append(content_confidence)
            weights.append(effective_content_weight)
        
        if len(predictions) == 2:
            total_confidence = sum(c * w for c, w in zip(confidences, weights))
            weighted_pred = sum(p * c * w for p, c, w in zip(predictions, confidences, weights)) / total_confidence
            final_confidence = total_confidence / sum(weights)
        elif len(predictions) == 1:
            weighted_pred = predictions[0]
            final_confidence = confidences[0]
        else:
            # Fallback: use user's avg rating or global avg with small jitter
            weighted_pred = user_data.get('avg_rating', self.global_avg) + np.random.uniform(-0.2, 0.2)
            weighted_pred = max(1.0, min(5.0, weighted_pred))
            final_confidence = 0.2
        
        return max(1.0, min(5.0, weighted_pred)), final_confidence

    def calculate_match_percentage(self, score: float, user_id: str, product_id: str, 
                             all_recommendation_scores: List[float] = None) -> int:
        """Improved match percentage with relative scoring"""
        
        if all_recommendation_scores:
            # Use percentile ranking within current recommendations
            sorted_scores = sorted(all_recommendation_scores)
            position = sorted_scores.index(score)
            percentile = (position / len(sorted_scores)) * 100
            return int(percentile)
        else:
            # Fallback to original method
            user_data = self.user_history_cache.get(str(user_id), {})
            user_avg = user_data.get('avg_rating', self.global_avg)
            
            # Adjust based on user's rating behavior
            if user_avg >= 4.0:
                match_percent = min(100, max(0, (score - 2.8) / 2.2 * 100))
            elif user_avg <= 2.5:
                match_percent = min(100, max(0, (score - 1.8) / 3.2 * 100))
            else:
                if score >= 3.5:
                    match_percent = 70 + (score - 3.5) / 1.5 * 30
                elif score >= 2.5:
                    match_percent = 40 + (score - 2.5) / 1.0 * 30
                else:
                    match_percent = max(0, score / 2.5 * 40)
            
            return int(match_percent)

    def calculate_diversity_penalty(self, target_product_id: str, current_recommendations: List[Tuple]) -> float:
        """Penalize products too similar to already recommended ones"""
        if not current_recommendations or target_product_id not in self.product_id_to_idx:
            return 0.0
        
        target_idx = self.product_id_to_idx[target_product_id]
        target_embed = self.prod_embeds[target_idx]
        
        max_similarity = 0.0
        for recommendation in current_recommendations:
            # Handle different tuple formats
            if len(recommendation) >= 2:
                rec_product_id = recommendation[0]
            else:
                continue
                
            if rec_product_id in self.product_id_to_idx:
                rec_idx = self.product_id_to_idx[rec_product_id]
                rec_embed = self.prod_embeds[rec_idx]
                sim = cosine_similarity([target_embed], [rec_embed])[0][0]
                max_similarity = max(max_similarity, sim)
        
        # Penalize if too similar to existing recommendations
        return max_similarity * 0.4  # 40% penalty for high similarity

    def generate_recommendations(self, user_id: str, top_n: int = 10, 
                             content_weight: float = 0.4, collab_weight: float = 0.6,
                             min_confidence: float = 0.5) -> List[Tuple[str, float, int]]:
        """
        Generate enhanced recommendations with confidence filtering
        Returns: List of (product_id, predicted_rating, match_percentage)
        """
        user_id = str(user_id)
        user_rated = self.user_history_cache.get(user_id, {}).get('rated_products', [])
        
        all_products = self.prod_df["product_id"].astype(str).tolist()
        candidate_products = [pid for pid in all_products if pid not in user_rated]
        
        if not candidate_products:
            return self._get_popular_fallback(top_n)
        
        recommendations = []
        
        for product_id in tqdm(candidate_products, desc="Predicting ratings"):
            try:
                predicted_rating, confidence = self.hybrid_predict(user_id, product_id, content_weight, collab_weight)
                match_percent = self.calculate_match_percentage(predicted_rating, user_id, product_id)
                
                if confidence >= min_confidence and match_percent >= 40:
                    recommendations.append((product_id, predicted_rating, match_percent))
            except Exception:
                continue
        
        # ✅ Sort by hybrid predicted rating directly (no reranking)
        recommendations.sort(key=lambda x: x[1], reverse=True)
        
        return recommendations[:top_n]

    def _get_popular_fallback(self, top_n: int) -> List[Tuple[str, float, int]]:
        """Fallback to popular products"""
        popular_products = self.test_df.groupby('product_id')['rating'].agg(['count', 'mean']).reset_index()
        popular_products = popular_products[popular_products['count'] >= 10]  # Only reasonably popular
        popular_products = popular_products.sort_values(['mean', 'count'], ascending=False)
        
        result = []
        for _, row in popular_products.head(top_n).iterrows():
            product_id = str(row['product_id'])
            score = row['mean']
            match_percent = self.calculate_match_percentage(score, "average_user", product_id)
            result.append((product_id, score, match_percent))
        
        return result

    def enhanced_demo_recommendations(self, user_id: str, top_n: int = 5,
                                   content_weight: float = 0.4, collab_weight: float = 0.6):
        """Show enhanced recommendations with explanations"""
        recommendations = self.generate_recommendations(user_id, top_n * 2, content_weight, collab_weight)
        
        print(f"\n🎯 ENHANCED RECOMMENDATIONS FOR USER {user_id}:")
        print(f"   Weights: Content={content_weight}, SVD={collab_weight}")
        print("="*80)
        
        user_data = self.user_history_cache.get(str(user_id), {})
        user_avg = user_data.get('avg_rating', self.global_avg)
        
        displayed = 0
        for i, (product_id, score, match_percent) in enumerate(recommendations, 1):
            if displayed >= top_n:
                break
                
            product_info = self.prod_df[self.prod_df["product_id"].astype(str) == product_id]
            if product_info.empty:
                continue
                
            product_info = product_info.iloc[0]
            name = product_info["product_name"]
            brand = product_info["brand_name"]
            category = product_info["tertiary_category"]
            price = product_info["price_usd"]
            
            formatted_price = f"${price:.2f}" if isinstance(price, (int, float)) else f"${price}"
            
            print(f"{displayed + 1}. {name} ({brand})")
            print(f"   📍 {category} • 💰 {formatted_price}")
            print(f"   ⭐ {score:.4f}/5 • 🔍 {match_percent}% match")
            print(f"   🆔 {product_id}")
            
            # Add intelligent explanation
            if score >= 4.2:
                print("   💎 Excellent match! Based on your preferences and highly rated by similar users")
            elif score >= 3.8:
                print("   👍 Great match - combines your product preferences with crowd wisdom")
            elif score >= 3.2:
                print("   🔍 Good suggestion - users with similar tastes enjoyed this product")
            elif score >= 2.8:
                print("   💡 Recommended - similar to products you've liked, worth exploring")
            else:
                print("   🌟 New discovery - different from your usual preferences but highly rated")
            
            print()
            displayed += 1
        
        if displayed == 0:
            print("⚠️  No confident recommendations found. Try rating more products!")
            print("💡 Exploring new categories might help improve recommendations")
        
    def evaluate(self, top_n: int = 10,
                content_weight: float = 0.4, collab_weight: float = 0.6,
                min_confidence: float = 0.0) -> Dict[str, float]:
        """
        Evaluate recommender on test dataset with multiple metrics:
        - RMSE, MAE
        - Accuracy (binary hit if predicted >= 3.5 matches actual >= 3.5)
        - Precision, Recall, F1
        - Coverage (how many unique products were recommended)
        """
        print("\n📊 Evaluating recommender system...")

        y_true, y_pred = [], []
        hit_count, rec_count, relevant_count = 0, 0, 0
        recommended_products = set()

        for _, row in tqdm(self.test_df.iterrows(), total=len(self.test_df), desc="Evaluating"):
            user_id, product_id, actual_rating = str(row["author_id"]), str(row["product_id"]), row["rating"]

            try:
                pred_rating, confidence = self.hybrid_predict(user_id, product_id,
                                                            content_weight, collab_weight)
                # if confidence < min_confidence:
                #     continue

                y_true.append(actual_rating)
                y_pred.append(pred_rating)

                # --- Binary relevance for classification metrics ---
                actual_relevant = 1 if actual_rating >= 3.5 else 0
                predicted_relevant = 1 if pred_rating >= 3.5 else 0

                if predicted_relevant == 1:
                    rec_count += 1
                    recommended_products.add(product_id)

                if actual_relevant == 1:
                    relevant_count += 1

                if predicted_relevant == 1 and actual_relevant == 1:
                    hit_count += 1

            except Exception:
                continue

        # --- Compute metrics ---
        rmse = np.sqrt(mean_squared_error(y_true, y_pred)) if y_true else float("nan")
        mae = mean_absolute_error(y_true, y_pred) if y_true else float("nan")
        accuracy = accuracy_score([1 if r >= 3.5 else 0 for r in y_true],
                                [1 if p >= 3.5 else 0 for p in y_pred]) if y_true else float("nan")
        precision = precision_score([1 if r >= 3.5 else 0 for r in y_true],
                                    [1 if p >= 3.5 else 0 for p in y_pred],
                                    zero_division=0) if y_true else float("nan")
        recall = recall_score([1 if r >= 3.5 else 0 for r in y_true],
                            [1 if p >= 3.5 else 0 for p in y_pred],
                            zero_division=0) if y_true else float("nan")
        f1 = f1_score([1 if r >= 3.5 else 0 for r in y_true],
                    [1 if p >= 3.5 else 0 for p in y_pred],
                    zero_division=0) if y_true else float("nan")

        coverage = len(recommended_products) / len(self.prod_df) if len(self.prod_df) > 0 else 0

        results = {
            "RMSE": rmse,
            "MAE": mae,
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "F1": f1,
            "Coverage": coverage
        }

        print("\n📈 Evaluation Results:")
        for metric, value in results.items():
            print(f"{metric}: {value:.4f}")

        return results     
    def evaluate_by_user_group(self, top_n=10):
        """
        Evaluate recommender performance across user groups:
        - Cold-start (<5 ratings)
        - Medium (5-20 ratings)
        - Heavy (>20 ratings)
        """
        # Count ratings per user from training data
        user_rating_counts = self.train_df.groupby("author_id")["rating"].count().to_dict()
        
        groups = {
            "Cold-start (<5)": [],
            "Medium (5-20)": [],
            "Heavy (>20)": []
        }

        for _, row in self.test_df.iterrows():
            user = str(row["author_id"])
            item = str(row["product_id"])
            true_rating = row["rating"]

            pred_rating, _ = self.hybrid_predict(user, item)

            # Assign to group
            count = user_rating_counts.get(user, 0)
            if count < 5:
                group = "Cold-start (<5)"
            elif count > 20:
                group = "Heavy (>20)"
            else:
                group = "Medium (5-20)"

            groups[group].append((true_rating, pred_rating))

        results = {}
        for group, values in groups.items():
            if not values:
                continue
            y_true, y_pred = zip(*values)
            rmse = sqrt(mean_squared_error(y_true, y_pred))
            mae = mean_absolute_error(y_true, y_pred)

            y_true_bin = [1 if r >= 4 else 0 for r in y_true]
            y_pred_bin = [1 if p >= 4 else 0 for p in y_pred]

            precision = precision_score(y_true_bin, y_pred_bin, zero_division=0)
            recall = recall_score(y_true_bin, y_pred_bin, zero_division=0)
            f1 = f1_score(y_true_bin, y_pred_bin, zero_division=0)
            acc = accuracy_score(y_true_bin, y_pred_bin)

            results[group] = {
                "RMSE": rmse,
                "MAE": mae,
                "Accuracy": acc,
                "Precision": precision,
                "Recall": recall,
                "F1": f1,
                "Count": len(values)
            }

        results_df = pd.DataFrame(results).T
        print("\n📊 Evaluation by User Group:")
        print(results_df)
        return results_df
    
if __name__ == "__main__":
    print("🚀 INITIALIZING ENHANCED HYBRID RECOMMENDER")
    print("="*80)

    # Initialize recommender
    recommender = EnhancedHybridRecommender(
        train_path="data/CleanedDataSet/train_skincare.csv",
        test_path="data/CleanedDataSet/test_skincare.csv",
        products_path="data/CleanedDataSet/filtered_skincare_products.csv",
        content_model_path="models/product_embeddings.pkl",
        svd_model_path="models/surprise_svd_model.pkl"
    )

    print("\n✅ ENHANCED INITIALIZATION COMPLETE!")
    print("="*80)

    # 🔹 Fixed best weights for assignment
    best_content_weight = 0.2
    best_collab_weight = 0.8

    # ---------------- 3) Generate recommendations for a real user ----------------
    real_user_id = 2128891661  # <-- must exist in your dataset
    recommender.enhanced_demo_recommendations(
        user_id=real_user_id,
        top_n=5,
        content_weight=best_content_weight,
        collab_weight=best_collab_weight
    )

    # ---------------- 4) Evaluate system ----------------
    eval_results = recommender.evaluate(
        top_n=10,
        content_weight=best_content_weight,
        collab_weight=best_collab_weight
    )

    # ---------------- 5) Evaluate by user groups ----------------
    group_eval = recommender.evaluate_by_user_group()


🚀 INITIALIZING ENHANCED HYBRID RECOMMENDER
📦 Loading pre-trained models...
✅ Content model loaded: 1760 products
✅ SVD model loaded
📈 Global average rating from SVD: 3.934
✅ Index mappings created
⚡ Precomputing product features for faster similarity...
✅ Precomputed features for 1760 products
📊 Preloading data...
✅ Test data loaded: 160291 records
✅ Train data loaded: 641164 records
🔨 Building user history cache...


Caching user histories: 100%|██████████| 430159/430159 [00:43<00:00, 9939.51it/s] 


📊 Calculating product popularity...
=== Dataset Overview ===
Train set: 641,164 rows | 357,909 users | 1,760 products
Test set: 160,291 rows | 131,518 users | 1,705 products
Products catalog: 1,760 items
✅ Enhanced Hybrid Recommender initialized successfully!

✅ ENHANCED INITIALIZATION COMPLETE!


Predicting ratings: 100%|██████████| 1752/1752 [00:02<00:00, 600.50it/s]



🎯 ENHANCED RECOMMENDATIONS FOR USER 2128891661:
   Weights: Content=0.2, SVD=0.8
1. Vitamin Enriched Face Base Jumbo (Bobbi Brown)
   📍 Moisturizers • 💰 $99.00
   ⭐ 4.3350/5 • 🔍 86% match
   🆔 P468634
   💎 Excellent match! Based on your preferences and highly rated by similar users

2. Ultra Repair Firming Collagen Cream with Peptides and Niacinamide (First Aid Beauty)
   📍 Moisturizers • 💰 $44.00
   ⭐ 4.2842/5 • 🔍 85% match
   🆔 P468821
   💎 Excellent match! Based on your preferences and highly rated by similar users

3. Resveratrol Lift Retinol Alternative Firming Cashmere Moisturizer (Caudalie)
   📍 Moisturizers • 💰 $69.00
   ⭐ 4.2702/5 • 🔍 85% match
   🆔 P467750
   💎 Excellent match! Based on your preferences and highly rated by similar users

4. Absolue Rich Cream Moisturizer (Lancôme)
   📍 Moisturizers • 💰 $270.00
   ⭐ 4.2571/5 • 🔍 85% match
   🆔 P482025
   💎 Excellent match! Based on your preferences and highly rated by similar users

5. Hydra Vizor Invisible Moisturizer Broad 

Evaluating: 100%|██████████| 160291/160291 [04:09<00:00, 641.21it/s]



📈 Evaluation Results:
RMSE: 0.9711
MAE: 0.7986
Accuracy: 0.8682
Precision: 0.8679
Recall: 0.9914
F1: 0.9255
Coverage: 0.9631

📊 Evaluation by User Group:
                     RMSE       MAE  Accuracy  Precision    Recall        F1  \
Cold-start (<5)  1.051506  0.820514  0.747102   0.877615  0.800743  0.837418   
Medium (5-20)    0.890767  0.678058  0.874807   0.890803  0.972494  0.929858   
Heavy (>20)      0.686356  0.583615  0.903208   0.974260  0.920285  0.946504   

                    Count  
Cold-start (<5)  127419.0  
Medium (5-20)     23987.0  
Heavy (>20)        8885.0  
