In [2]:
import pandas as pd
import numpy as np
import cv2
import random
import matplotlib.pyplot as plt
from collections import Counter
from PIL import Image
import os
import ipywidgets as widgets
from IPython.display import display
from rembg import remove
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

## Overview
This script extracts dominant colors from an image and recommends perfumes based on:
- **Color detection** → Matches colors with olfactive accords using improved histogram-based detection.
- **Face removal** → Detects and removes faces before color extraction.
- **Situation-based filtering** → Suggests fragrances based on context (e.g., "formal event").
- **Preference filtering** → Recommends perfumes containing a specific accord.
- **Exclusion filtering** → Avoids perfumes with unwanted accords.
- **Gender filtering** → Returns perfumes based on gender (`Male`, `Female`, `Unisex`).
- **Text-based recommendation (TF-IDF + KNN)** → Finds similar perfumes based on olfactive descriptions.

In [3]:
## Step 1: Load the Fragrance Database
display("Loading fragrance dataset...")
fragrance_df = pd.read_csv("../data/fragrance_ML_model.csv")


'Loading fragrance dataset...'

In [None]:
## Step 2: Text-Based Fragrance Recommendation System (TF-IDF + KNN)
display("Setting up text-based recommendation system...")
vectorizer = TfidfVectorizer(stop_words="english")
feature_vectors = vectorizer.fit_transform(fragrance_df["Olfactive Profile"])
knn_model = NearestNeighbors(n_neighbors=20, metric="cosine", algorithm="brute")
knn_model.fit(feature_vectors)

In [None]:
def recommend_by_text(query):
    display(f"Searching for perfumes similar to: {query}")
    query_vector = vectorizer.transform([query])
    distances, indices = knn_model.kneighbors(query_vector)
    recommended = fragrance_df.iloc[indices[0]]
    return recommended.sample(frac=1).reset_index(drop=True)  # Shuffle recommendations

In [None]:
## Step 3: Remove Background and Faces from Image
def remove_background(image_path):
    display("Removing background and detecting faces in image...")
    image = Image.open(image_path)
    output = remove(image)
    processed_image_path = "processed_image.png"
    output.save(processed_image_path)


    # Load the processed image with OpenCV
    image_cv = cv2.imread(processed_image_path)
    gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
    
    # Detect faces
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)
    
    # Replace detected faces with a neutral color (gray)
    for (x, y, w, h) in faces:
        image_cv[y:y+h, x:x+w] = (128, 128, 128)  # Gray out the face
    
    processed_face_path = "processed_face_image.png"
    cv2.imwrite(processed_face_path, image_cv)
    return processed_face_path


In [5]:
import cv2
import numpy as np
from PIL import Image
from rembg import remove

def remove_non_clothing(image_path):
    """
    - Elimina el fondo de la imagen (rembg)
    - Detecta y elimina la piel (cara, manos)
    - Mantiene solo la ropa
    
    Retorna:
    - processed_clothing_path (str): Ruta de la imagen final con solo la ropa.
    """
    print("👕 Eliminando fondo y manteniendo solo la ropa en la imagen...")

    # **PASO 1: Eliminar fondo con rembg**
    image = Image.open(image_path)
    output = remove(image)
    processed_image_path = "processed_no_bg.png"
    output.save(processed_image_path)

    # **PASO 2: Cargar imagen y convertir a HSV**
    image_cv = cv2.imread(processed_image_path)
    hsv = cv2.cvtColor(image_cv, cv2.COLOR_BGR2HSV)

    # **PASO 3: Detectar piel y eliminarla**
    lower_skin = np.array([0, 20, 70], dtype=np.uint8)  # Tono piel bajo
    upper_skin = np.array([20, 255, 255], dtype=np.uint8)  # Tono piel alto
    skin_mask = cv2.inRange(hsv, lower_skin, upper_skin)
    
    # Aplicar la máscara para eliminar áreas de piel
    image_no_skin = cv2.bitwise_and(image_cv, image_cv, mask=cv2.bitwise_not(skin_mask))

    # **PASO 4: Filtrar objetos pequeños (mantener solo ropa)**
    gray = cv2.cvtColor(image_no_skin, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
    
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    for cnt in contours:
        area = cv2.contourArea(cnt)
        if area < 5000:  # Eliminar objetos pequeños (caras, manos)
            cv2.drawContours(image_no_skin, [cnt], -1, (0, 0, 0), thickness=cv2.FILLED)  # Negro

    # **PASO 5: Guardar la imagen final**
    processed_clothing_path = "processed_clothing_only.png"
    cv2.imwrite(processed_clothing_path, image_no_skin)

    print(f"✅ Imagen procesada guardada en: {processed_clothing_path}")
    return processed_clothing_path


In [10]:
image_path = "test_image.jpg"  # Imagen original
processed_clothing_path = remove_non_clothing(image_path)

print(f"✅ Imagen final guardada en: {processed_clothing_path}")

👕 Eliminando fondo y manteniendo solo la ropa en la imagen...


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\celia\\Documents\\IronhackDataAnalysis\\FinalProject\\MachineLearning Model\\test_image.jpg'

In [6]:
## Step 4: Extract Dominant Colors using Histogram Analysis
def extract_dominant_colors(image_path, num_colors=3):
    display("Extracting dominant colors using histogram analysis...")
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError("Image not found in this path")
    image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    hist = cv2.calcHist([image], [0], None, [180], [0, 180])
    hist = hist.flatten()

    hist_percentage = (hist/ hist.sum()) * 100
    sorted_indices = np.argsort(hist_percentage)[::-1]  #Order by most frequent colors

    top_colors = [hue for hue in sorted_indices if hist_percentage[hue] >= min_percentage][:num_colors]
    
    top_colors = np.argsort(hist)[-num_colors:][::-1]  # Get top N most frequent colors
    
    color_labels = []
    for hue in top_colors:
        if 0 <= hue < 10 or hue >= 170:
            color_labels.append("red")
        elif 10 <= hue < 25:
            color_labels.append("orange")
        elif 25 <= hue < 35:
            color_labels.append("yellow")
        elif 35 <= hue < 85:
            color_labels.append("green")
        elif 85 <= hue < 130:
            color_labels.append("blue")
        elif 130 <= hue < 160:
            color_labels.append("purple")
        elif 160 <= hue < 170:
            color_labels.append("pink")
        elif 170 <= hue < 195:
            color_labels.append("black")
        elif 195 <= hue < 225:
            color_labels.append("white")
        elif 225 <= hue < 245:
            color_labels.append("beige")
        elif 245 <= hue < 265:
            color_labels.append("brown")
    
    return list(set(color_labels))

In [7]:
## Step 5: Map Colors to Olfactive Accords
COLOR_TO_ACCORD = {
    "yellow": ["citrus", "fruity", "sweet"],
    "blue": ["marine", "aquatic", "ozonic"],
    "beige": ["powdery", "soft spicy", "musky"],
    "white": ["clean", "aldehyde", "soft spicy"],
    "red": ["spicy", "woody", "amber"],
    "green": ["herbal", "fresh spicy", "aromatic"],
    "black": ["dark", "intense", "leathery"],
    "pink": ["floral", "sweet", "fruity"],
    "orange": ["warm spicy", "sweet", "gourmand"],
    "purple": ["powdery", "floral", "woody"],
    "brown": ["woody", "earthy", "leathery"]
}

def map_colors_to_accords(dominant_colors):
    detected_accords = []
    for color in dominant_colors:
        if color in COLOR_TO_ACCORD:
            accords.extend(COLOR_TO_ACCORD[color])
    return list(set(accords))

In [8]:
# Situation-based accords
SITUATION_TO_ACCORD = {
    "formal": ["woody", "spicy", "leather"],
    "casual": ["fresh", "citrus", "aquatic"],
    "romantic": ["floral", "sweet", "musky"],
    "sport": ["green", "aquatic", "ozonic"],
    "office": ["clean", "powdery", "aldehyde"]
}

In [None]:
## Step 6: Define the Recommendation Function
def recommend_fragrance(input_data, mode="image", situation=None, preference=None, exclude=None, gender=None, num_recommendations=5):
    display("Generating fragrance recommendation...")
    if mode == "text":
        return recommend_by_text(input_data)
    
    processed_image_path = remove_background(input_data)
    detected_colors = extract_dominant_colors(processed_image_path)
    accords_segmented = [COLOR_TO_ACCORD[color] for color in detected_colors if color in COLOR_TO_ACCORD]
    
    combined_accords = list(set(sum(accords_segmented, [])))
    if situation and situation in SITUATION_TO_ACCORD:
        combined_accords.extend(SITUATION_TO_ACCORD[situation])
    combined_accords = list(set(combined_accords))
    
    filtered_df = fragrance_df[fragrance_df["Olfactive Profile"].str.contains('|'.join(combined_accords), case=False, na=False)]
    
    if gender:
        filtered_df = filtered_df[filtered_df["Gender"].str.contains(gender, case=False, na=False)]
    if preference:
        filtered_df = filtered_df[filtered_df["Olfactive Profile"].str.contains(preference, case=False, na=False)]
    if exclude:
        filtered_df = filtered_df[~filtered_df["Olfactive Profile"].str.contains(exclude, case=False, na=False)]
    
    if filtered_df.empty:
        return "No perfumes found after applying filters. Try adjusting your criteria."
    
    return filtered_df.sample(n=min(num_recommendations, len(filtered_df))).reset_index(drop=True)


In [None]:
## Step 5: Run Sample Tests with Detailed Analysis

def run_sample_tests():
    # Image-based Recommendation Test
    display("Running detailed sample test for image-based recommendation...")
    test_image_path = "test_image.png"
    
    display(f"Using test image: {test_image_path}")
    processed_image = remove_background(test_image_path)
    
    # Display original and processed images
    original = Image.open(test_image_path)
    processed = Image.open(processed_image)
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(original)
    plt.title("Original Image")
    plt.axis("off")
    plt.subplot(1, 2, 2)
    plt.imshow(processed)
    plt.title("Processed Image (Face Removed)")
    plt.axis("off")
    plt.show()
    
    detected_colors = extract_dominant_colors(processed_image)
    display(f"Detected Colors: {detected_colors}")
    
    recommendations = recommend_fragrance(
        input_data=test_image_path,
        mode="image",
        situation="casual",
        preference="lemon",
        exclude="sweet",
        gender="unisex",
        num_recommendations=5
    )
    display("Recommended Fragrances:", recommendations)

     # Text-based Recommendation Test
    display("Running detailed sample test for text-based recommendation...")
    text_query = "fresh citrus"
    display(f"Input description: {text_query}")
    text_recommendations = recommend_by_text(text_query)
    display("Recommended Fragrances:", text_recommendations)


In [None]:
# Execute sample tests
run_sample_tests()

In [None]:
import ipywidgets as widgets
from IPython.display import display

# Upload a test image interactively
uploader = widgets.FileUpload(accept='.png,.jpg,.jpeg', multiple=False)
display(uploader)

if uploader.value:
    uploaded_file = list(uploader.value.values())[0]
    test_image_path = "test_image.png"
    with open(test_image_path, "wb") as f:
        f.write(uploaded_file['content'])
    display(f"Image saved as {test_image_path}")
else:
    display("No file uploaded! Please upload an image first.")
