In [None]:
# %%
"""
Sephora Recommendation System - Test Notebook
=============================================

This notebook tests the SephoraRecommendationSystem class with comprehensive examples.
Make sure you have the following files in your project:
- sephora_recommender.py (contains the SephoraRecommendationSystem class)
- ../data/reviews_df.csv
- ../data/product_info_df.csv
"""

# %%
# Import required libraries
import pandas as pd
import numpy as np
import sys
import os
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
from implicit.evaluation import train_test_split
from utils import SephoraRecommendationSystem

pd.set_option('display.max_rows', None)

print("All imports successful!")

# %%
# Load the data
print("Loading data...")
try:
    reviews_df = pd.read_csv('../data/reviews_df.csv', )
    products_df = pd.read_csv('../data/product_info_df.csv')
    
    print(f"Data loaded successfully!")
    print(f"Reviews shape: {reviews_df.shape}")
    print(f"Products shape: {products_df.shape}")
    
    # Display basic info
    print("\nReviews columns:", list(reviews_df.columns))
    print("Products columns:", list(products_df.columns))
    
except Exception as e:
    print(f"Error loading data: {e}")
    print("Make sure the data files are in the correct path!")

# %%
# Initialize and train the recommendation system
print("="*60)
print("INITIALIZING SEPHORA RECOMMENDATION SYSTEM")
print("="*60)

try:
    # Initialize the system
    rec_system = SephoraRecommendationSystem(reviews_df, products_df)
    print("\nSystem initialized successfully!")
    
    # Train the models
    print("\nTraining models...")
    training_success = rec_system.train_models(test_size=0.2)
    
    if training_success:
        print("Models trained successfully!")
    else:
        print("Model training had issues, but system can still provide recommendations")
        
except Exception as e:
    print(f"Error initializing system: {e}")
    import traceback
    traceback.print_exc()

# %%
# Display system statistics
print("="*60)
print("SYSTEM STATISTICS")
print("="*60)

try:
    stats = rec_system.get_user_stats()
    for key, value in stats.items():
        if isinstance(value, float):
            print(f"{key}: {value:.2f}")
        else:
            print(f"{key}: {value:,}")
    
    print("\nAvailable Categories:")
    categories = rec_system.get_categories()
    for cat_type, cat_list in categories.items():
        print(f"  {cat_type}: {len(cat_list)} categories")
        if cat_list:
            print(f"    Examples: {cat_list[:3]}")
        print()
        
except Exception as e:
    print(f"Error getting statistics: {e}")

# %%
# Test 1: Existing User Recommendations (Case 1)
print("="*60)
print("TEST 1: EXISTING USER RECOMMENDATIONS (Case 1)")
print("="*60)

# Get some user IDs to test
existing_users = list(rec_system.user_to_idx.keys())[:5]
print(f"Testing with users: {existing_users}")

for i, user_id in enumerate(existing_users, 1):
    print(f"\n--- Test {i}: User {user_id} ---")
    
    # Show user's purchase history
    user_history = rec_system.reviews_df[
        rec_system.reviews_df['author_id'] == user_id
    ][['product_id', 'rating']].merge(
        rec_system.products_df[['product_id', 'product_name', 'brand_name']], 
        on='product_id', how='left'
    )
    
    print(f"Purchase History ({len(user_history)} items):")
    for _, item in user_history.head(3).iterrows():
        print(f"  {item.get('rating', 'N/A')}/5 - {item.get('product_name', 'Unknown')} by {item.get('brand_name', 'Unknown')}")
    
    if len(user_history) > 3:
        print(f"  ... and {len(user_history) - 3} more items")
    
    # Get recommendations
    try:
        recs = rec_system.get_user_recommendations(user_id=user_id, n_recommendations=5)
        print(f"\nRecommendations:")
        if recs:
            for j, rec in enumerate(recs, 1):
                print(f"  {j}. {rec['product_name']} by {rec['brand_name']}")
                print(f"     ${rec['price_usd']:.2f} | {rec['rating']:.1f} | {rec['primary_category']}")
        else:
            print("No recommendations found")
    except Exception as e:
        print(f"Error: {e}")

# %%
# Test 2: Cold Start User Recommendations (Case 2)
print("="*60)
print("TEST 2: COLD START USER RECOMMENDATIONS (Case 2)")
print("="*60)

# Test different user profiles
test_profiles = [
    {
        'name': 'Young Professional',
        'profile': {
            'skin_tone': 'light',
            'skin_type': 'combination',
            'eye_color': 'brown',
            'hair_color': 'blonde'
        }
    },
    {
        'name': 'Mature Customer',
        'profile': {
            'skin_tone': 'medium',
            'skin_type': 'dry',
            'eye_color': 'blue',
            'hair_color': 'gray'
        }
    },
    {
        'name': 'Dark Skin Customer',
        'profile': {
            'skin_tone': 'dark',
            'skin_type': 'oily',
            'eye_color': 'brown',
            'hair_color': 'black'
        }
    }
]

for i, test_case in enumerate(test_profiles, 1):
    print(f"\n--- Test {i}: {test_case['name']} ---")
    print(f"Profile: {test_case['profile']}")
    
    try:
        recs = rec_system.get_user_recommendations(
            user_profile=test_case['profile'], 
            n_recommendations=5
        )
        print(f"Recommendations:")
        if recs:
            for j, rec in enumerate(recs, 1):
                print(f"  {j}. {rec['product_name']} by {rec['brand_name']}")
                print(f"     ${rec['price_usd']:.2f} | {rec['rating']:.1f} | {rec['primary_category']}")
        else:
            print("No recommendations found")
    except Exception as e:
        print(f"Error: {e}")

# %%
# Test 3: Category Filtering
print("="*60)
print("TEST 3: CATEGORY FILTERING")
print("="*60)

categories = rec_system.get_categories()
existing_user = existing_users[0]

# Test primary categories
if categories['primary_category']:
    for i, category in enumerate(categories['primary_category'][:3], 1):
        print(f"\n--- Test {i}: Category '{category}' ---")
        try:
            recs = rec_system.get_user_recommendations(
                user_id=existing_user,
                primary_category=category,
                n_recommendations=5
            )
            print(f"Recommendations for {existing_user} in {category}:")
            if recs:
                for j, rec in enumerate(recs, 1):
                    print(f"  {j}. {rec['product_name']} ({rec['primary_category']})")
                    print(f"     by {rec['brand_name']} - ${rec['price_usd']:.2f}")
            else:
                print("No recommendations found for this category")
        except Exception as e:
            print(f"Error: {e}")

# %%
# Test 4: Unrated Categories (Fallback System)
print("="*60)
print("TEST 4: UNRATED CATEGORIES (FALLBACK SYSTEM)")
print("="*60)

# Find categories with few or no ratings
all_rated_products = set(rec_system.reviews_df['product_id'].unique())

print("Analyzing category coverage...")
category_analysis = []

for category in categories['primary_category'][:10]:
    category_products = rec_system.products_df[
        rec_system.products_df['primary_category'] == category
    ]['product_id'].unique()
    
    rated_in_category = len(set(category_products).intersection(all_rated_products))
    total_in_category = len(category_products)
    
    category_analysis.append({
        'category': category,
        'rated': rated_in_category,
        'total': total_in_category,
        'coverage': rated_in_category / total_in_category if total_in_category > 0 else 0
    })
    
    print(f"  {category}: {rated_in_category}/{total_in_category} products have ratings ({category_analysis[-1]['coverage']:.1%})")

# Test categories with low coverage
low_coverage_categories = [c for c in category_analysis if c['coverage'] < 0.5 and c['total'] > 0]

if low_coverage_categories:
    test_category = low_coverage_categories[0]['category']
    print(f"\n🧪 Testing fallback system with low-coverage category: '{test_category}'")
    
    try:
        recs = rec_system.get_user_recommendations(
            user_id=existing_user,
            primary_category=test_category,
            n_recommendations=5
        )
        
        if recs:
            print(f"Fallback system worked! Got {len(recs)} recommendations:")
            for j, rec in enumerate(recs, 1):
                print(f"  {j}. {rec['product_name']} by {rec['brand_name']}")
                print(f"     {rec['primary_category']} | ${rec['price_usd']:.2f}")
        else:
            print("No recommendations returned even with fallback system")
            
    except Exception as e:
        print(f"Error: {e}")
else:
    print("All categories have good coverage - testing with fake category")
    try:
        recs = rec_system.get_user_recommendations(
            user_id=existing_user,
            primary_category="COMPLETELY_FAKE_CATEGORY",
            n_recommendations=3
        )
        
        if recs:
            print(f"Ultimate fallback worked! Got {len(recs)} recommendations")
        else:
            print("Even ultimate fallback failed")
    except Exception as e:
        print(f"Error: {e}")

# %%
# Test 5: Edge Cases and Error Handling
print("="*60)
print("TEST 5: EDGE CASES AND ERROR HANDLING")
print("="*60)

edge_cases = [
    {
        'name': 'Non-existent User',
        'test': lambda: rec_system.get_user_recommendations(user_id="FAKE_USER_123", n_recommendations=5),
        'expected': 'Should fall back to popular products'
    },
    {
        'name': 'Zero Recommendations',
        'test': lambda: rec_system.get_user_recommendations(n_recommendations=0),
        'expected': 'Should return empty list'
    },
    {
        'name': 'Large Number of Recommendations',
        'test': lambda: rec_system.get_user_recommendations(n_recommendations=100),
        'expected': 'Should return available products'
    },
    {
        'name': 'Empty User Profile',
        'test': lambda: rec_system.get_user_recommendations(user_profile={}, n_recommendations=3),
        'expected': 'Should fall back to popular products'
    }
]

for i, test_case in enumerate(edge_cases, 1):
    print(f"\n--- Test {i}: {test_case['name']} ---")
    print(f"Expected: {test_case['expected']}")
    
    try:
        result = test_case['test']()
        print(f"Result: Got {len(result)} recommendations")
        if result:
            print(f"Sample: {result[0]['product_name']} by {result[0]['brand_name']}")
        print("Test passed")
    except Exception as e:
        print(f"Error: {e}")

# %%
# Test 6: Specific Users That Previously Failed
print("="*60)
print("TEST 6: SPECIFIC PROBLEMATIC USERS")
print("="*60)

problematic_users = ['6747641337', '12175422344', '32344707475']

for i, user_id in enumerate(problematic_users, 1):
    print(f"\n--- Test {i}: User {user_id} ---")
    
    # Check if user exists
    if user_id in rec_system.user_to_idx:
        print(f"User found in system")
        
        # Get user's history
        user_history = rec_system.reviews_df[
            rec_system.reviews_df['author_id'] == user_id
        ]
        print(f"User has {len(user_history)} reviews")
        
        try:
            recs = rec_system.get_user_recommendations(user_id=user_id, n_recommendations=5)
            if recs:
                print(f"Got {len(recs)} recommendations:")
                for j, rec in enumerate(recs, 1):
                    print(f"  {j}. {rec['product_name']} by {rec['brand_name']}")
            else:
                print("No recommendations returned")
        except Exception as e:
            print(f"Error getting recommendations: {e}")
    else:
        print(f"User not found in system - testing cold start")
        try:
            recs = rec_system.get_user_recommendations(user_id=user_id, n_recommendations=5)
            if recs:
                print(f"Cold start fallback worked! Got {len(recs)} recommendations")
            else:
                print("No recommendations from fallback")
        except Exception as e:
            print(f"Error in cold start: {e}")

# %%
# Test 7: Performance and Quality Analysis
print("="*60)
print("TEST 7: PERFORMANCE AND QUALITY ANALYSIS")
print("="*60)

import time

# Performance test
print("Performance Test:")
start_time = time.time()

test_users = existing_users[:10]
successful_recs = 0
total_recs = 0

for user_id in test_users:
    try:
        recs = rec_system.get_user_recommendations(user_id=user_id, n_recommendations=5)
        successful_recs += 1
        total_recs += len(recs)
    except:
        pass

end_time = time.time()

print(f"Performance Results:")
print(f"  Processing time: {end_time - start_time:.2f} seconds")
print(f"  Successful requests: {successful_recs}/{len(test_users)}")
print(f"  Total recommendations: {total_recs}")
print(f"  Average time per user: {(end_time - start_time) / len(test_users):.3f} seconds")

# Quality analysis
print(f"\nQuality Analysis for User {existing_users[0]}:")
try:
    recs = rec_system.get_user_recommendations(user_id=existing_users[0], n_recommendations=10)
    
    if recs:
        brands = [rec['brand_name'] for rec in recs]
        categories = [rec['primary_category'] for rec in recs]
        prices = [rec['price_usd'] for rec in recs]
        
        print(f"Diversity Metrics:")
        print(f"  Total recommendations: {len(recs)}")
        print(f"  Unique brands: {len(set(brands))}/{len(brands)}")
        print(f"  Unique categories: {len(set(categories))}/{len(categories)}")
        print(f"  Price range: ${min(prices):.2f} - ${max(prices):.2f}")
        print(f"  Average price: ${np.mean(prices):.2f}")
    
except Exception as e:
    print(f"Error in quality analysis: {e}")

# %%
# Test 8: Real-World Scenarios
print("="*60)
print("TEST 8: REAL-WORLD SCENARIOS")
print("="*60)

scenarios = [
    {
        'name': 'Budget-conscious customer looking for skincare',
        'params': {
            'user_profile': {'skin_tone': 'medium', 'skin_type': 'combination'},
            'primary_category': 'Skincare',
            'n_recommendations': 10
        },
        'post_filter': lambda recs: [r for r in recs if r['price_usd'] <= 30],
        'filter_name': 'Under $30'
    },
    {
        'name': 'Existing customer exploring makeup',
        'params': {
            'user_id': existing_users[0],
            'primary_category': 'Makeup',
            'n_recommendations': 8
        },
        'post_filter': None,
        'filter_name': None
    },
    {
        'name': 'Premium customer (high-end products)',
        'params': {
            'user_id': existing_users[1] if len(existing_users) > 1 else existing_users[0],
            'n_recommendations': 5
        },
        'post_filter': lambda recs: [r for r in recs if r['price_usd'] >= 50],
        'filter_name': 'Over $50'
    }
]

for i, scenario in enumerate(scenarios, 1):
    print(f"\n--- Scenario {i}: {scenario['name']} ---")
    
    try:
        recs = rec_system.get_user_recommendations(**scenario['params'])
        
        # Apply post-filter if specified
        if scenario['post_filter']:
            filtered_recs = scenario['post_filter'](recs)
            print(f"Results ({len(recs)} total, {len(filtered_recs)} after {scenario['filter_name']} filter):")
            recs = filtered_recs
        else:
            print(f"Results ({len(recs)} recommendations):")
        
        for j, rec in enumerate(recs[:5], 1):  # Show first 5
            print(f"  {j}. {rec['product_name']} by {rec['brand_name']}")
            print(f"${rec['price_usd']:.2f} | {rec.get('primary_category', 'N/A')}")
            
    except Exception as e:
        print(f"Error in scenario: {e}")

# %%
# Final Summary
print("="*60)
print("TESTING COMPLETE - SUMMARY")
print("="*60)

print("All tests completed!")
print("\nKey Features Verified:")
print("  ✓ Case 1: Existing user recommendations (Collaborative Filtering + Content-based)")
print("  ✓ Case 2: Cold start recommendations (Similar user profiles)")
print("  ✓ Category filtering with intelligent fallbacks")
print("  ✓ Handling of unrated categories")
print("  ✓ Error handling and edge cases")
print("  ✓ Performance and quality metrics")
print("  ✓ Real-world usage scenarios")

print(f"\nSystem Statistics:")
print(f"  Users in system: {len(rec_system.user_to_idx):,}")
print(f"  Products in system: {len(rec_system.product_to_idx):,}")
print(f"  Total interactions: {rec_system.user_item_matrix.nnz:,}")

print("\n🎉 Sephora Recommendation System is working correctly!")

# %%
# Interactive Testing Section
print("="*60)
print("INTERACTIVE TESTING")
print("="*60)
print("Use the cells below to test specific scenarios:")

# %%
# Interactive Test 1: Test any user ID
user_id_to_test = "5061282401"  # Change this to any user ID you want to test

print(f"Testing user: {user_id_to_test}")
try:
    recs = rec_system.get_user_recommendations(user_id=user_id_to_test, n_recommendations=10)
    
    if recs:
        print(f"\nGot {len(recs)} recommendations:")
        for i, rec in enumerate(recs, 1):
            print(f"{i:2d}. {rec['product_name']}")
            print(f"     Brand: {rec['brand_name']}")
            print(f"     Price: ${rec['price_usd']:.2f}")
            print(f"     Category: {rec['primary_category']}")
            print(f"     Rating: {rec['rating']:.1f}/5")
            print()
    else:
        print("No recommendations found")
        
except Exception as e:
    print(f"Error: {e}")

# %%
# Interactive Test 2: Test new user profile
new_user_profile = {
    'skin_tone': 'light',    # Change these values
    'skin_type': 'dry',      # to test different profiles
    'eye_color': 'brown',
    'hair_color': 'blonde'
}

print(f"Testing new user profile: {new_user_profile}")
try:
    recs = rec_system.get_user_recommendations(
        user_profile=new_user_profile, 
        n_recommendations=8
    )
    
    if recs:
        print(f"\nGot {len(recs)} recommendations:")
        for i, rec in enumerate(recs, 1):
            print(f"{i}. {rec['product_name']} by {rec['brand_name']} (${rec['price_usd']:.2f})")
    else:
        print("No recommendations found")
        
except Exception as e:
    print(f"Error: {e}")

# %%
# Interactive Test 3: Test category filtering
test_user = existing_users[0]
test_category = "Skincare"  # Change this to any category

print(f"Testing category filtering: User {test_user} in category '{test_category}'")
try:
    recs = rec_system.get_user_recommendations(
        user_id=test_user,
        primary_category=test_category,
        n_recommendations=6
    )
    
    if recs:
        print(f"\nGot {len(recs)} recommendations in {test_category}:")
        for i, rec in enumerate(recs, 1):
            print(f"{i}. {rec['product_name']} ({rec['primary_category']})")
            print(f"   by {rec['brand_name']} - ${rec['price_usd']:.2f}")
    else:
        print("No recommendations found in this category")
        
except Exception as e:
    print(f"Error: {e}")

# %%
print("Test notebook completed successfully!")
print("You can now modify the interactive test cells above to explore different scenarios.")

All imports successful!
Loading data...
Data loaded successfully!
Reviews shape: (843386, 16)
Products shape: (5621, 18)

Reviews columns: ['author_id', 'rating', 'is_recommended', 'total_feedback_count', 'total_neg_feedback_count', 'total_pos_feedback_count', 'submission_time', 'review_text', 'skin_tone', 'eye_color', 'skin_type', 'hair_color', 'product_id', 'product_name', 'brand_name', 'price_usd']
Products columns: ['product_id', 'product_name', 'brand_id', 'brand_name', 'loves_count', 'rating', 'reviews', 'ingredients', 'price_usd', 'limited_edition', 'new', 'online_only', 'out_of_stock', 'sephora_exclusive', 'highlights', 'primary_category', 'secondary_category', 'tertiary_category']
INITIALIZING SEPHORA RECOMMENDATION SYSTEM
Preparing data...
User-Item Matrix Shape: (437854, 2345)
Sparsity: 99.92%
Content Similarity Matrix Shape: (1863, 1863)
Content mapping created for 1863 products
Created 969 user profiles

System initialized successfully!

Training models...
Training with ma

100%|██████████| 20/20 [00:31<00:00,  1.56s/it]


ALS model trained successfully!
Models training completed!
Models trained successfully!
SYSTEM STATISTICS
total_users: 437,854
total_products: 2,345
total_interactions: 841,651
sparsity: 99.92
avg_ratings_per_user: 1.93
avg_ratings_per_product: 359.65

Available Categories:
  primary_category: 8 categories
    Examples: ['Bath & Body', 'Fragrance', 'Hair']

  secondary_category: 39 categories
    Examples: ['Accessories', 'Bath & Body', 'Bath & Shower']

  tertiary_category: 109 categories
    Examples: ['Accessories', 'Aftershave', 'Anti-Aging']

TEST 1: EXISTING USER RECOMMENDATIONS (Case 1)
Testing with users: [5061282401, 42802569154, 6941883808, 27926227988, 7656791726]

--- Test 1: User 5061282401 ---
Purchase History (1 items):
  5/5 - Lip Sleeping Mask Intense Hydration with Vitamin C by LANEIGE

Recommendations:
  1. Superberry Hydrate + Glow Dream Night Mask with Vitamin C by Youth To The People
     $52.00 | 4.3 | Skincare
  2. Mini Superberry Hydrate + Glow Dream Mask by Yo

In [11]:
reviews_df.head(2)

Unnamed: 0,author_id,rating,is_recommended,total_feedback_count,total_neg_feedback_count,total_pos_feedback_count,submission_time,review_text,skin_tone,eye_color,skin_type,hair_color,product_id,product_name,brand_name,price_usd
0,5061282401,5,1.0,0,0,0,2023-03-21,My review title says it all! I get so excited ...,light,brown,dry,blonde,P420652,lip sleeping mask intense hydration with vitam...,laneige,24.0
1,42802569154,4,1.0,1,0,1,2023-03-19,The scent isn’t my favourite but it works grea...,lightmedium,brown,normal,brown,P420652,lip sleeping mask intense hydration with vitam...,laneige,24.0


In [9]:
reviews_df.product_name.value_counts()

product_name
lip sleeping mask intense hydration with vitamin c                                                             14714
protini polypeptide firming refillable moisturizer                                                              5715
green clean makeup meltaway cleansing balm limited edition jumbo                                                5607
green clean makeup removing cleansing balm                                                                      5607
alpha beta extra strength daily peel pads                                                                       5485
the true cream aqua bomb                                                                                        5411
niacinamide 10% + zinc 1% oil control serum                                                                     5397
superfood antioxidant cleanser                                                                                  5281
mini superfood antioxidant cleanser                

In [3]:
products_df.head(1)

Unnamed: 0,product_id,product_name,brand_id,brand_name,loves_count,rating,reviews,ingredients,price_usd,limited_edition,new,online_only,out_of_stock,sephora_exclusive,highlights,primary_category,secondary_category,tertiary_category
0,P473671,Fragrance Discovery Set,6342,19-69,6320,3.6364,11,"['Capri Eau de Parfum:', 'Alcohol Denat. (SD A...",35.0,0,0,1,0,0,"['Unisex/ Genderless Scent', 'Warm &Spicy Scen...",Fragrance,Value & Gift Sets,Perfume Gift Sets
