In [1]:
# Imports
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, hstack
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder, StandardScaler
import implicit
from implicit.evaluation import train_test_split
import warnings
warnings.filterwarnings('ignore')


pd.set_option('display.max_columns', None)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Data
reviews_df = pd.read_csv('../data/reviews_df.csv')
products_df = pd.read_csv('../data/product_info_df.csv')

In [3]:
class SephoraRecommendationSystem:
    def __init__(self, reviews_df, products_df):
        """
        Initialize the recommendation system with reviews and products data
        
        Args:
            reviews_df: DataFrame with review data
            products_df: DataFrame with product information
        """
        self.reviews_df = reviews_df.copy()
        self.products_df = products_df.copy()
        
        # Initialize models
        self.user_item_model = None
        self.item_similarity_model = None
        self.content_similarity_matrix = None
        
        # Mappings
        self.user_to_idx = {}
        self.idx_to_user = {}
        self.product_to_idx = {}
        self.idx_to_product = {}
        
        # User profiles for cold start
        self.user_profiles = {}
        
        # Initialize matrices
        self.user_item_matrix = None
        self.train_matrix = None
        self.test_matrix = None
        self.product_features_matrix = None
        self.user_features_matrix = None
        
        # Encoders
        self.user_profile_encoders = {}
        self.tfidf_ingredients = TfidfVectorizer(max_features=500, stop_words='english')
        self.tfidf_highlights = TfidfVectorizer(max_features=300, stop_words='english')
        
        # Clean and prepare data
        success = self._prepare_data()
        if not success:
            raise ValueError("Failed to prepare data. Please check your input DataFrames.")
        
    def _prepare_data(self):
        """Clean and prepare the data for modeling"""
        try:
            print("Preparing data...")
            
            # Check if required columns exist
            required_review_cols = ['author_id', 'product_id', 'rating']
            required_product_cols = ['product_id']
            
            missing_review_cols = [col for col in required_review_cols if col not in self.reviews_df.columns]
            missing_product_cols = [col for col in required_product_cols if col not in self.products_df.columns]
            
            if missing_review_cols:
                print(f"ERROR: Missing required columns in reviews_df: {missing_review_cols}")
                return False
            if missing_product_cols:
                print(f"ERROR: Missing required columns in products_df: {missing_product_cols}")
                return False
            
            # Clean data
            self._clean_data()
            
            # Create user-item interaction matrix
            success = self._create_user_item_matrix()
            if not success:
                return False
            
            # Create content-based features
            self._create_content_features()
            
            # Create user profiles for cold start
            self._create_user_profiles()
            
            return True
            
        except Exception as e:
            print(f"ERROR in _prepare_data: {e}")
            import traceback
            traceback.print_exc()
            return False
    
    def _clean_data(self):
        """Clean and preprocess the data"""
        # Convert string representations to actual strings for TF-IDF
        self.products_df['ingredients_text'] = self.products_df['ingredients'].astype(str)
        self.products_df['highlights_text'] = self.products_df['highlights'].astype(str)
    
    def _create_user_item_matrix(self):
        """Create sparse user-item interaction matrix"""
        try:
            # Create mappings
            unique_users = self.reviews_df['author_id'].unique()
            unique_products = self.reviews_df['product_id'].unique()
            
            if len(unique_users) == 0 or len(unique_products) == 0:
                print("ERROR: No users or products found")
                return False
            
            self.user_to_idx = {user: idx for idx, user in enumerate(unique_users)}
            self.idx_to_user = {idx: user for user, idx in self.user_to_idx.items()}
            self.product_to_idx = {product: idx for idx, product in enumerate(unique_products)}
            self.idx_to_product = {idx: product for product, idx in self.product_to_idx.items()}
            
            # Create interaction matrix (using rating as implicit feedback)
            rows = self.reviews_df['author_id'].map(self.user_to_idx)
            cols = self.reviews_df['product_id'].map(self.product_to_idx)
            data = self.reviews_df['rating'].values
            
            # Check for any mapping errors
            if rows.isna().any() or cols.isna().any():
                print("ERROR: Failed to map users or products to indices")
                return False
            
            self.user_item_matrix = csr_matrix(
                (data, (rows, cols)), 
                shape=(len(unique_users), len(unique_products))
            )
            
            print(f"User-Item Matrix Shape: {self.user_item_matrix.shape}")
            print(f"Sparsity: {(1 - self.user_item_matrix.nnz / (self.user_item_matrix.shape[0] * self.user_item_matrix.shape[1])) * 100:.2f}%")
            
            if self.user_item_matrix.nnz == 0:
                print("ERROR: User-item matrix has no interactions")
                return False
                
            return True
            
        except Exception as e:
            print(f"ERROR in _create_user_item_matrix: {e}")
            import traceback
            traceback.print_exc()
            return False
    
    def _create_content_features(self):
        """Create content-based similarity matrix using TF-IDF"""
        try:
            # Get products that are in the reviews (to match matrix dimensions)
            product_ids_in_reviews = set(self.reviews_df['product_id'].unique())
            products_filtered = self.products_df[
                self.products_df['product_id'].isin(product_ids_in_reviews)
            ].copy()
            
            if products_filtered.empty:
                print("Warning: No products found in reviews")
                self.content_similarity_matrix = None
                return
            
            # Sort by product_id to ensure consistent ordering with user_item_matrix
            products_filtered = products_filtered.sort_values('product_id').reset_index(drop=True)
            
            # Create TF-IDF features for ingredients and highlights
            if 'ingredients_text' in products_filtered.columns:
                ingredients_tfidf = self.tfidf_ingredients.fit_transform(
                    products_filtered['ingredients_text']
                )
            else:
                ingredients_tfidf = csr_matrix((len(products_filtered), 0))
            
            if 'highlights_text' in products_filtered.columns:
                highlights_tfidf = self.tfidf_highlights.fit_transform(
                    products_filtered['highlights_text']
                )
            else:
                highlights_tfidf = csr_matrix((len(products_filtered), 0))
            
            # Combine content features
            if ingredients_tfidf.shape[1] > 0 and highlights_tfidf.shape[1] > 0:
                combined_features = hstack([ingredients_tfidf, highlights_tfidf])
            elif ingredients_tfidf.shape[1] > 0:
                combined_features = ingredients_tfidf
            elif highlights_tfidf.shape[1] > 0:
                combined_features = highlights_tfidf
            else:
                print("Warning: No content features available")
                self.content_similarity_matrix = None
                return
            
            # Calculate cosine similarity
            self.content_similarity_matrix = cosine_similarity(combined_features)
            print(f"Content Similarity Matrix Shape: {self.content_similarity_matrix.shape}")
            
        except Exception as e:
            print(f"Warning: Could not create content features: {e}")
            self.content_similarity_matrix = None
    
    def _create_user_profiles(self):
        """Create user profiles based on their characteristics for cold start"""
        try:
            self.user_profiles = {}
            
            # Group users by their characteristics
            profile_features = ['skin_tone', 'eye_color', 'skin_type', 'hair_color']
            available_features = [f for f in profile_features if f in self.reviews_df.columns]
            
            if not available_features:
                print("Warning: No profile features available for user profiling")
                return
            
            # Create encoders for user profile features
            for feature in available_features:
                encoder = LabelEncoder()
                self.reviews_df[f'{feature}_encoded'] = encoder.fit_transform(
                    self.reviews_df[feature].astype(str)
                )
                self.user_profile_encoders[feature] = encoder
            
            # Group by profile features
            for features, group in self.reviews_df.groupby(available_features, dropna=False):
                if len(group) >= 5:  # Only consider profiles with enough data
                    # Get popular products for this profile
                    popular_products = (group.groupby('product_id')['rating']
                                      .agg(['mean', 'count'])
                                      .reset_index())
                    popular_products = popular_products[popular_products['count'] >= 2]
                    popular_products = popular_products.sort_values(['mean', 'count'], ascending=False)
                    
                    profile_key = tuple(features) if isinstance(features, tuple) else (features,)
                    self.user_profiles[profile_key] = popular_products['product_id'].head(20).tolist()
                    
            print(f"Created {len(self.user_profiles)} user profiles")
            
        except Exception as e:
            print(f"Warning: Could not create user profiles: {e}")
            self.user_profiles = {}
    
    def train_models(self, test_size=0.2):
        """Train both collaborative filtering and content-based models"""
        try:
            if self.user_item_matrix is None:
                raise ValueError("User-item matrix is None. Data preparation may have failed.")
            
            if self.user_item_matrix.nnz == 0:
                raise ValueError("User-item matrix is empty. No interactions found.")
            
            print(f"Training with matrix shape: {self.user_item_matrix.shape}")
            print(f"Non-zero entries: {self.user_item_matrix.nnz}")
            
            # Split data for evaluation
            try:
                train_matrix, test_matrix = train_test_split(self.user_item_matrix, train_percentage=1-test_size)
                self.train_matrix = train_matrix
                self.test_matrix = test_matrix
                print(f"Train matrix shape: {train_matrix.shape}, nnz: {train_matrix.nnz}")
                print(f"Test matrix shape: {test_matrix.shape}, nnz: {test_matrix.nnz}")
            except Exception as e:
                print(f"Warning: Error in train_test_split: {e}")
                self.train_matrix = self.user_item_matrix
                self.test_matrix = None
                print("Using full matrix for training (no test split)")
            
            print("Training Collaborative Filtering Models...")
            
            # Train ALS model (Matrix Factorization)
            try:
                self.user_item_model = implicit.als.AlternatingLeastSquares(
                    factors=50,
                    regularization=0.1,
                    iterations=20,
                    alpha=40,
                    random_state=42
                )
                self.user_item_model.fit(self.train_matrix)
                print("ALS model trained successfully!")
            except Exception as e:
                print(f"Warning: Error training ALS model: {e}")
                self.user_item_model = None
            
            print("Models training completed!")
            return True
            
        except Exception as e:
            print(f"ERROR in train_models: {e}")
            import traceback
            traceback.print_exc()
            return False
    
    def get_user_recommendations(self, user_id=None, user_profile=None, 
                               primary_category=None, secondary_category=None, 
                               tertiary_category=None, n_recommendations=10):
        """
        Get recommendations for a user
        
        Args:
            user_id: Known user ID
            user_profile: Dict with user characteristics if user_id is None
            primary_category: Filter by primary category
            secondary_category: Filter by secondary category  
            tertiary_category: Filter by tertiary category
            n_recommendations: Number of recommendations to return
        """
        try:
            if user_id is not None and user_id in self.user_to_idx:
                return self._get_existing_user_recommendations(
                    user_id, primary_category, secondary_category, 
                    tertiary_category, n_recommendations
                )
            else:
                return self._get_cold_start_recommendations(
                    user_profile, primary_category, secondary_category,
                    tertiary_category, n_recommendations
                )
        except Exception as e:
            print(f"ERROR in get_user_recommendations: {e}")
            return self._get_popular_products_formatted(n_recommendations)
    
    def _get_existing_user_recommendations(self, user_id, primary_category, 
                                        secondary_category, tertiary_category, n_recommendations):
        """Get recommendations for existing user using collaborative filtering + content-based"""
        user_idx = self.user_to_idx[user_id]
        
        # Get user's rated products for content-based filtering
        user_rated_products = set(self.reviews_df[
            self.reviews_df['author_id'] == user_id
        ]['product_id'].values)
        
        recommendations = []
        
        # Get collaborative filtering recommendations
        if self.user_item_model is not None:
            try:
                cf_recs, cf_scores = self.user_item_model.recommend(
                    user_idx, self.user_item_matrix[user_idx], 
                    N=n_recommendations*2, filter_already_liked_items=True
                )
                cf_recs = cf_recs.tolist() if hasattr(cf_recs, 'tolist') else list(cf_recs)
                cf_scores = cf_scores.tolist() if hasattr(cf_scores, 'tolist') else list(cf_scores)
                
                # Convert indices to product IDs
                cf_product_ids = [self.idx_to_product[idx] for idx in cf_recs if idx in self.idx_to_product]
                
            except Exception as e:
                print(f"Warning: Error getting CF recommendations: {e}")
                cf_product_ids = []
        else:
            cf_product_ids = []
        
        # Get content-based recommendations
        cb_product_ids = self._get_content_based_recommendations(user_rated_products, n_recommendations*2)
        
        # Combine recommendations (prioritize CF, then CB)
        combined_recs = []
        seen_products = set()
        
        # Add CF recommendations first
        for product_id in cf_product_ids:
            if product_id not in seen_products and len(combined_recs) < n_recommendations*2:
                combined_recs.append(product_id)
                seen_products.add(product_id)
        
        # Fill with CB recommendations
        for product_id in cb_product_ids:
            if product_id not in seen_products and len(combined_recs) < n_recommendations*2:
                combined_recs.append(product_id)
                seen_products.add(product_id)
        
        # Apply category filters
        filtered_recs = self._apply_category_filters(
            combined_recs, primary_category, secondary_category, tertiary_category
        )
        
        return self._format_recommendations(filtered_recs[:n_recommendations])
    
    def _get_content_based_recommendations(self, user_rated_products, n_recommendations):
        """Get content-based recommendations using product similarity"""
        if self.content_similarity_matrix is None or not user_rated_products:
            return []
        
        try:
            # Get indices of user's rated products
            rated_indices = []
            product_list = list(self.product_to_idx.keys())
            
            for product_id in user_rated_products:
                if product_id in self.product_to_idx:
                    rated_indices.append(self.product_to_idx[product_id])
            
            if not rated_indices:
                return []
            
            # Calculate average similarity scores
            similarity_scores = self.content_similarity_matrix[rated_indices].mean(axis=0)
            
            # Get top similar products (excluding already rated ones)
            top_indices = np.argsort(similarity_scores)[::-1]
            
            recommendations = []
            for idx in top_indices:
                product_id = self.idx_to_product.get(idx)
                if product_id and product_id not in user_rated_products:
                    recommendations.append(product_id)
                    if len(recommendations) >= n_recommendations:
                        break
            
            return recommendations
            
        except Exception as e:
            print(f"Warning: Error in content-based recommendations: {e}")
            return []
    
    def _get_cold_start_recommendations(self, user_profile, primary_category,
                                      secondary_category, tertiary_category, n_recommendations):
        """Get recommendations for new users (cold start)"""
        if user_profile and self.user_profiles:
            # Try to find similar user profile
            profile_features = ['skin_tone', 'eye_color', 'skin_type', 'hair_color']
            profile_key = tuple(user_profile.get(k, None) for k in profile_features)
            
            if profile_key in self.user_profiles:
                product_recs = self.user_profiles[profile_key]
            else:
                # Find most similar profile
                best_match = None
                best_score = 0
                for stored_profile, products in self.user_profiles.items():
                    score = sum(1 for i, val in enumerate(profile_key) 
                               if i < len(stored_profile) and val == stored_profile[i])
                    if score > best_score:
                        best_score = score
                        best_match = products
                
                product_recs = best_match if best_match else self._get_popular_products(n_recommendations*2)
        else:
            product_recs = self._get_popular_products(n_recommendations*2)
        
        # Apply category filters
        filtered_recs = self._apply_category_filters(
            product_recs, primary_category, secondary_category, tertiary_category
        )
        
        return self._format_recommendations(filtered_recs[:n_recommendations])
    
    def _get_popular_products(self, n_products):
        """Get popular products based on ratings and review count"""
        try:
            popular = (self.reviews_df.groupby('product_id')
                      .agg({'rating': ['mean', 'count']})
                      .reset_index())
            popular.columns = ['product_id', 'avg_rating', 'review_count']
            popular = popular[popular['review_count'] >= 3]
            popular['popularity_score'] = popular['avg_rating'] * np.log1p(popular['review_count'])
            popular = popular.sort_values('popularity_score', ascending=False)
            
            return popular['product_id'].head(n_products).tolist()
        except Exception as e:
            print(f"Error getting popular products: {e}")
            return list(self.products_df['product_id'].head(n_products))
    
    def _get_popular_products_formatted(self, n_products):
        """Get popular products formatted as recommendations"""
        product_ids = self._get_popular_products(n_products)
        return self._format_recommendations(product_ids)
    
    def _apply_category_filters(self, product_ids, primary_category, 
                              secondary_category, tertiary_category):
        """Apply category filters to recommendations"""
        if not any([primary_category, secondary_category, tertiary_category]):
            return product_ids
        
        filtered_products = []
        for product_id in product_ids:
            product_info = self.products_df[self.products_df['product_id'] == product_id]
            
            if product_info.empty:
                continue
                
            product_info = product_info.iloc[0]
            
            # Check category filters
            if primary_category and product_info.get('primary_category') != primary_category:
                continue
            if secondary_category and product_info.get('secondary_category') != secondary_category:
                continue
            if tertiary_category and product_info.get('tertiary_category') != tertiary_category:
                continue
                
            filtered_products.append(product_id)
        
        return filtered_products
    
    def _format_recommendations(self, product_ids):
        """Format recommendations with product details"""
        recommendations = []
        for product_id in product_ids:
            product_info = self.products_df[self.products_df['product_id'] == product_id]
            if not product_info.empty:
                product_info = product_info.iloc[0]
                recommendations.append({
                    'product_id': product_id,
                    'product_name': product_info.get('product_name', 'Unknown'),
                    'brand_name': product_info.get('brand_name', 'Unknown'),
                    'price_usd': product_info.get('price_usd', 0),
                    'rating': product_info.get('rating', 0),
                    'primary_category': product_info.get('primary_category', 'Unknown'),
                    'secondary_category': product_info.get('secondary_category', 'Unknown'),
                    'tertiary_category': product_info.get('tertiary_category', 'Unknown')
                })
        
        return recommendations
    
    def get_categories(self):
        """Get available categories for filtering"""
        categories = {}
        category_columns = ['primary_category', 'secondary_category', 'tertiary_category']
        
        for col in category_columns:
            if col in self.products_df.columns:
                categories[col] = sorted(self.products_df[col].dropna().unique())
            else:
                categories[col] = []
                
        return categories
    
    def get_user_stats(self):
        """Get statistics about users and interactions"""
        stats = {
            'total_users': len(self.user_to_idx),
            'total_products': len(self.product_to_idx),
            'total_interactions': self.user_item_matrix.nnz if self.user_item_matrix is not None else 0,
            'sparsity': (1 - self.user_item_matrix.nnz / (self.user_item_matrix.shape[0] * self.user_item_matrix.shape[1])) * 100 if self.user_item_matrix is not None else 0,
            'avg_ratings_per_user': self.reviews_df.groupby('author_id').size().mean(),
            'avg_ratings_per_product': self.reviews_df.groupby('product_id').size().mean()
        }
        return stats

In [4]:
def test_recommendation_system_fixed(reviews_df, products_df):
    """Fixed test function with better error handling"""
    print("="*50)
    print("TESTING SEPHORA RECOMMENDATION SYSTEM")
    print("="*50)
    
    try:
        # Initialize system
        rec_system = SephoraRecommendationSystem(reviews_df, products_df)
        
        # Train models
        training_success = rec_system.train_models(test_size=0.2)
        if not training_success:
            print("Warning: Model training had issues, but system can still provide recommendations")
        
        print("\n" + "="*50)
        print("SYSTEM STATISTICS")
        print("="*50)
        stats = rec_system.get_user_stats()
        for key, value in stats.items():
            print(f"{key}: {value:.2f}" if isinstance(value, float) else f"{key}: {value}")
        
        print("\n" + "="*50)
        print("AVAILABLE CATEGORIES")
        print("="*50)
        categories = rec_system.get_categories()
        for cat_type, cat_list in categories.items():
            print(f"{cat_type}: {len(cat_list)} categories")
            if cat_list:
                print(f"Examples: {cat_list[:5]}")
            print()
        
        print("="*50)
        print("TESTING RECOMMENDATIONS")
        print("="*50)
        
        # Test 1: Existing user recommendations
        existing_users = list(rec_system.user_to_idx.keys())[:3]
        print(f"\n1. Testing existing users:")
        for user_id in existing_users:
            print(f"\nUser {user_id}:")
            try:
                recs = rec_system.get_user_recommendations(user_id=user_id, n_recommendations=5)
                if recs:
                    for i, rec in enumerate(recs, 1):
                        print(f"  {i}. {rec['product_name']} by {rec['brand_name']} (${rec['price_usd']:.2f})")
                else:
                    print("  No recommendations found")
            except Exception as e:
                print(f"  Error getting recommendations for user {user_id}: {e}")
        
        # Test 2: Cold start recommendations
        print(f"\n2. Testing cold start (new user):")
        try:
            cold_start_profile = {
                'skin_tone': 'light',
                'skin_type': 'dry',
                'eye_color': 'brown',
                'hair_color': 'blonde'
            }
            recs = rec_system.get_user_recommendations(user_profile=cold_start_profile, n_recommendations=5)
            if recs:
                for i, rec in enumerate(recs, 1):
                    print(f"  {i}. {rec['product_name']} by {rec['brand_name']} (${rec['price_usd']:.2f})")
            else:
                print("  No cold start recommendations found")
        except Exception as e:
            print(f"  Error getting cold start recommendations: {e}")
        
        # Test 3: Category filtering
        print(f"\n3. Testing category filtering:")
        primary_cat = categories['primary_category'][0] if categories['primary_category'] else None
        if primary_cat:
            try:
                recs = rec_system.get_user_recommendations(
                    user_id=existing_users[0] if existing_users else None, 
                    primary_category=primary_cat,
                    n_recommendations=5
                )
                print(f"Recommendations for category '{primary_cat}':")
                if recs:
                    for i, rec in enumerate(recs, 1):
                        print(f"  {i}. {rec['product_name']} by {rec['brand_name']} ({rec['primary_category']})")
                else:
                    print("  No category-filtered recommendations found")
            except Exception as e:
                print(f"  Error getting category-filtered recommendations: {e}")
        else:
            print("  No primary categories available for testing")
        
        print("\n" + "="*50)
        print("TESTING COMPLETED SUCCESSFULLY!")
        print("="*50)
        
        return rec_system
        
    except Exception as e:
        print(f"CRITICAL ERROR during testing: {e}")
        import traceback
        traceback.print_exc()
        return None


In [5]:
def quick_test(reviews_df, products_df):
    """Quick test to verify the system works"""
    print("QUICK TEST - BASIC FUNCTIONALITY")
    print("="*40)
    
    try:
        rec_system = SephoraRecommendationSystem(reviews_df, products_df)
        rec_system.train_models()
        
        # Test basic recommendation
        recs = rec_system.get_user_recommendations(n_recommendations=3)
        print(f"Generated {len(recs)} recommendations:")
        for i, rec in enumerate(recs, 1):
            print(f"{i}. {rec['product_name']} by {rec['brand_name']}")
        
        print("✅ System working correctly!")
        return rec_system
        
    except Exception as e:
        print(f"❌ Error: {e}")
        return None



In [6]:
# Example usage:
if __name__ == "__main__":
    # Load your data
    reviews_df = pd.read_csv('../data/reviews_df.csv', dtype={0: str})
    products_df = pd.read_csv('../data/product_info_df.csv')
    
    # Use the comprehensive test function
    rec_system = test_recommendation_system_fixed(reviews_df, products_df)
    
    # Or use the quick test for basic functionality
    # rec_system = quick_test(reviews_df, products_df)
    
    # Then use the system normally
    if rec_system:
        recommendations = rec_system.get_user_recommendations(
            user_id="5061282401", 
            n_recommendations=10
        )
        print("\nFinal recommendations:")
        for i, rec in enumerate(recommendations, 1):
            print(f"{i}. {rec['product_name']} by {rec['brand_name']} (${rec['price_usd']:.2f})")

TESTING SEPHORA RECOMMENDATION SYSTEM
Preparing data...
User-Item Matrix Shape: (377261, 2345)
Sparsity: 99.90%
Content Similarity Matrix Shape: (1863, 1863)
Created 969 user profiles
Training with matrix shape: (377261, 2345)
Non-zero entries: 841646
Train matrix shape: (377261, 2345), nnz: 673898
Test matrix shape: (377261, 2345), nnz: 167748
Training Collaborative Filtering Models...


100%|██████████| 20/20 [00:14<00:00,  1.36it/s]


ALS model trained successfully!
Models training completed!

SYSTEM STATISTICS
total_users: 377261
total_products: 2345
total_interactions: 841646
sparsity: 99.90
avg_ratings_per_user: 2.24
avg_ratings_per_product: 359.65

AVAILABLE CATEGORIES
primary_category: 8 categories
Examples: ['Bath & Body', 'Fragrance', 'Hair', 'Makeup', 'Men']

secondary_category: 39 categories
Examples: ['Accessories', 'Bath & Body', 'Bath & Shower', 'Beauty Supplements', 'Beauty Tools']

tertiary_category: 109 categories
Examples: ['Accessories', 'Aftershave', 'Anti-Aging', 'BB & CC Cream', 'BB & CC Creams']

TESTING RECOMMENDATIONS

1. Testing existing users:

User 5061282401:
  1. Lip Glowy Balm by LANEIGE ($18.00)
  2. Volcanic AHA Pore Clearing Clay Mask by innisfree ($16.00)
  3. Cream Skin Toner & Moisturizer by LANEIGE ($33.00)
  4. Magic Cream Moisturizer with Hyaluronic Acid by Charlotte Tilbury ($100.00)
  5. Intense Therapy Lip Balm SPF 25 by Jack Black ($10.00)

User 42802569154:
  1. Fat Water H

In [14]:
# Uses user profile similarity
recs = rec_system.get_user_recommendations(
    user_profile={
        'skin_tone': 'light',
        'skin_type': 'dry', 
        'eye_color': 'brown',
        'hair_color': 'blonde'
    },
    n_recommendations=10
)
recs

[{'product_id': 'P309308',
  'product_name': 'Good Genes All-In-One AHA Lactic Acid Treatment',
  'brand_name': 'Sunday Riley',
  'price_usd': np.float64(122.0),
  'rating': np.float64(4.3375),
  'primary_category': 'Skincare',
  'secondary_category': 'Treatments',
  'tertiary_category': 'Face Serums'},
 {'product_id': 'P482535',
  'product_name': 'Strawberry Smooth BHA + AHA Salicylic Acid Serum',
  'brand_name': 'Glow Recipe',
  'price_usd': np.float64(42.0),
  'rating': np.float64(4.5439),
  'primary_category': 'Skincare',
  'secondary_category': 'Treatments',
  'tertiary_category': 'Face Serums'},
 {'product_id': 'P501254',
  'product_name': 'Watermelon Glow AHA Night Treatment',
  'brand_name': 'Glow Recipe',
  'price_usd': np.float64(40.0),
  'rating': np.float64(4.6795),
  'primary_category': 'Skincare',
  'secondary_category': 'Treatments',
  'tertiary_category': 'Facial Peels'},
 {'product_id': 'P453227',
  'product_name': 'Truth Juice Daily Cleanser',
  'brand_name': 'OLEHENR

In [16]:
new_user_recs = rec_system.get_user_recommendations(
    user_profile={
        'skin_tone': 'medium',
        'skin_type': 'oily',
        'eye_color': 'blue',
        'hair_color': 'blonde'
    },
    n_recommendations=10
)
new_user_recs

[{'product_id': 'P420652',
  'product_name': 'Lip Sleeping Mask Intense Hydration with Vitamin C',
  'brand_name': 'LANEIGE',
  'price_usd': np.float64(24.0),
  'rating': np.float64(4.3508),
  'primary_category': 'Skincare',
  'secondary_category': 'Lip Balms & Treatments',
  'tertiary_category': 'Lip Balms & Treatments'},
 {'product_id': 'P433520',
  'product_name': 'Magic Cream Moisturizer with Hyaluronic Acid',
  'brand_name': 'Charlotte Tilbury',
  'price_usd': np.float64(100.0),
  'rating': np.float64(4.4045),
  'primary_category': 'Skincare',
  'secondary_category': 'Moisturizers',
  'tertiary_category': 'Moisturizers'},
 {'product_id': 'P475908',
  'product_name': 'Aqua Bomb Overnight Lip Mask',
  'brand_name': 'belif',
  'price_usd': np.float64(22.0),
  'rating': np.float64(4.4068),
  'primary_category': 'Skincare',
  'secondary_category': 'Lip Balms & Treatments',
  'tertiary_category': 'Lip Balms & Treatments'},
 {'product_id': 'P410400',
  'product_name': 'U.F.O. Salicylic A

In [14]:
category_recs = rec_system.get_user_recommendations(
    user_id="6941883808",
    primary_category="Makeup",
    n_recommendations=5
)
category_recs

[]

In [None]:
category_recs2 = rec_system.get_user_recommendations(
    user_id="5061282401",
    primary_category="Skincare",
    secondary_category = "";
    n_recommendations=5
)
category_recs2



[]

In [None]:
products_df.secondary_category