In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import warnings

# Suppress specific warnings
warnings.filterwarnings('ignore', category=UserWarning)

class AdvancedSkincareRecommender:
    def __init__(self):
        # Expanded skin conditions and product dataset
        self.products = [
            {
                'name': 'Salicylic Acid Cleanser',
                'conditions': ['acne', 'blackheads', 'oily_skin', 'large_pores'],
                'ingredients': ['salicylic acid', 'niacinamide'],
                'brand': 'ClearSkin Pro'
            },
            {
                'name': 'Hydration Boost Serum',
                'conditions': ['dryness', 'sensitive_skin', 'uneven_tone'],
                'ingredients': ['hyaluronic acid', 'ceramides'],
                'brand': 'MoistureMax'
            },
            {
                'name': 'Anti-Aging Peptide Cream',
                'conditions': ['wrinkles', 'fine_lines', 'uneven_tone'],
                'ingredients': ['peptides', 'retinol'],
                'brand': 'AgeLess'
            },
            {
                'name': 'Redness Relief Moisturizer',
                'conditions': ['rosacea', 'sensitive_skin', 'uneven_tone'],
                'ingredients': ['centella', 'niacinamide'],
                'brand': 'CalmSkin'
            },
            {
                'name': 'Pore Refining Toner',
                'conditions': ['large_pores', 'oily_skin', 'acne'],
                'ingredients': ['glycolic acid', 'salicylic acid'],
                'brand': 'PoreControl'
            },
            {
                'name': 'Brightening Serum',
                'conditions': ['hyperpigmentation', 'uneven_tone'],
                'ingredients': ['vitamin c', 'alpha arbutin'],
                'brand': 'GlowUp'
            }
        ]
        
        # Collect all unique conditions
        self.all_conditions = list(set([
            condition 
            for product in self.products 
            for condition in product['conditions']
        ]))
        
        # Multilabel Binarizer
        self.mlb = MultiLabelBinarizer(classes=self.all_conditions)
    
    def prepare_training_data(self):
        # Prepare X (condition descriptions) and y (multi-label conditions)
        X = [' '.join(product['conditions']) for product in self.products]
        y = self.mlb.fit_transform([product['conditions'] for product in self.products])
        
        return X, y
    
    def create_recommendation_pipeline(self):
        # Create a complete ML pipeline
        pipeline = Pipeline([
            ('vectorizer', TfidfVectorizer()),
            ('classifier', MultiOutputClassifier(RandomForestClassifier(n_estimators=100)))
        ])
        
        return pipeline
    
    def train_and_evaluate_model(self):
        # Prepare data
        X, y = self.prepare_training_data()
        
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.3, random_state=42
        )
        
        # Create and train pipeline
        pipeline = self.create_recommendation_pipeline()
        pipeline.fit(X_train, y_train)
        
        # Predict and evaluate
        y_pred = pipeline.predict(X_test)
        print(classification_report(y_test, y_pred, target_names=self.all_conditions))
        
        return pipeline
    
    def recommend_products(self, user_conditions, top_n=3):
        # Train the model
        pipeline = self.train_and_evaluate_model()
        
        # Prepare user conditions
        user_conditions_text = ' '.join(user_conditions)

        # Predict product suitability
        predictions_list = pipeline.predict_proba([user_conditions_text])

        # Extract probability of class 1 (i.e., the likelihood that a condition is present)
        predictions = {
            condition: predictions_list[i][0][1] if len(predictions_list[i][0]) > 1 else 0
            for i, condition in enumerate(self.all_conditions)
        }

        # Rank products based on condition matches
        recommendations = []
        for product in self.products:
            match_score = sum(predictions.get(condition, 0) for condition in product['conditions'])

            recommendations.append({
                'product': product,
                'match_score': match_score
            })

        # Sort and return top recommendations
        return sorted(recommendations, key=lambda x: x['match_score'], reverse=True)[:top_n]

# Instantiate the recommender
recommender = AdvancedSkincareRecommender()

# Recommend for multiple conditions
print("\nRecommendations for hyperpigmentation dryness:")
results = recommender.recommend_products(['hyperpigmentation', 'dryness'])
for result in results:
    print(f"Product: {result['product']['name']}")
    print(f"Match Score: {result['match_score']}")
    print(f"Conditions: {result['product']['conditions']}\n")



Recommendations for hyperpigmentation dryness:
                   precision    recall  f1-score   support

          rosacea       0.00      0.00      0.00         0
         wrinkles       0.00      0.00      0.00         0
       blackheads       0.00      0.00      0.00         1
hyperpigmentation       0.00      0.00      0.00         0
        oily_skin       1.00      1.00      1.00         1
      uneven_tone       1.00      1.00      1.00         1
          dryness       0.00      0.00      0.00         1
       fine_lines       0.00      0.00      0.00         0
   sensitive_skin       0.00      0.00      0.00         1
      large_pores       1.00      1.00      1.00         1
             acne       1.00      1.00      1.00         1

        micro avg       1.00      0.57      0.73         7
        macro avg       0.36      0.36      0.36         7
     weighted avg       0.57      0.57      0.57         7
      samples avg       1.00      0.54      0.68         7

Produ