In [41]:
#Import required libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

### **1.Reading the Data**
##### Using the dataset we have in /data/fragrance_ML_model already prepared for creating a recommendation system


In [42]:
#Reading data from a .csv file in the same directory
fragrance_df = pd.read_csv("../data/fragrance_ML_model.csv")

In [43]:
# Text vectorization using TF-IDF
vectorizer = TfidfVectorizer(stop_words="english")
feature_vectors = vectorizer.fit_transform(fragrance_df["Olfactive Profile"])

In [44]:
# Creamos un modelo KNN para encontrar fragancias similares
knn_model = NearestNeighbors(n_neighbors=20, metric="cosine", algorithm="brute")
knn_model.fit(feature_vectors)

In [45]:
def filter_by_note(note, brand=None, gender=None, year=None):
    """
    Filtra perfumes que contengan una nota olfativa específica y permite aplicar filtros adicionales.
    
    Parámetros:
    - note: Nota olfativa que el usuario busca (ejemplo: "vainilla").
    - brand: (Opcional) Filtrar por marca.
    - gender: (Opcional) Filtrar por género ("men", "women", "unisex").
    - year: (Opcional) Filtrar por año de lanzamiento.

    Retorna:
    - DataFrame con los perfumes que cumplen los criterios, ordenados por calidad y actualidad.
    """

    # Filtrar perfumes que contengan la nota olfativa en alguna parte de su descripción
    filtered_df = fragrance_df[fragrance_df["Olfactive Profile"].str.contains(note, case=False, na=False)]

    # Aplicar filtros opcionales
    if brand:
        filtered_df = filtered_df[filtered_df["Brand"].str.contains(brand, case=False, na=False)]
    if gender:
        filtered_df = filtered_df[filtered_df["Gender"].str.lower() == gender.lower()]
    if year:
        filtered_df = filtered_df[filtered_df["Year"] == year]

    # Ordenar por mejor puntuación, luego por número de reviews y finalmente por el año de lanzamiento
    filtered_df = filtered_df.sort_values(by=["Rating Value", "Rating Count", "Year"], ascending=[False, False, False])

    return filtered_df

In [46]:
def recommend_perfumes_knn(perfume_name, num_recommendations=5):
    """
    Returns similar perfumes, prioritizing the highest rated and most reviewed.
    In case of a tie, selects the most recent ones.

    Parameters:
    - perfume_name: Name of the reference perfume.
    - num_recommendations: Number of recommendations.

    Returns:
    - List of recommended perfumes.
    """

    if perfume_name not in fragrance_df["Perfume"].values:
        return f"El perfume '{perfume_name}' no está en la base de datos."

    # Get the index of the perfume in the dataset
    idx = fragrance_df[fragrance_df["Perfume"] == perfume_name].index[0]

    # Ensure the input vector is in the correct format
    query_vector = feature_vectors[idx].toarray().reshape(1, -1)

    # Find similar perfumes using KNN
    distances, indices = knn_model.kneighbors(query_vector, n_neighbors=20)

    # Create a DataFrame with the recommendations
    recommendations = fragrance_df.iloc[indices.flatten()[1:]].copy()  # Skip the first (same perfume)

    # Sort by rating value, then by number of reviews, and finally by year (most recent first)
    recommendations = recommendations.sort_values(by=["Rating Value", "Rating Count", "Year"], 
                                                  ascending=[False, False, False])

    return recommendations["Perfume"].head(num_recommendations).tolist()


In [48]:
# ➤ TEST 1: Find perfumes with "vanilla"
note_filtered_perfumes = filter_by_note(note="bergamot", gender="women")

# ➤ TEST 2: Recommend similar perfumes
example_perfume = fragrance_df["Perfume"].iloc[0]  # Select the first perfume as reference
recommended_perfumes = recommend_perfumes_knn(example_perfume)

# 🔹 8️⃣ DISPLAY RESULTS

# If using Jupyter Notebook, display the DataFrame nicely
try:
    from IPython.display import display
    print("\n🔸 Perfumes with Vanilla:")
    display(note_filtered_perfumes)  # This works in Jupyter Notebook
except ImportError:
    print("\n🔸 Perfumes with Vanilla:")
    print(note_filtered_perfumes.to_string())  # Print full DataFrame in a script

# Print recommended perfumes
print(f"\n🔸 Reference Perfume: {example_perfume}")
print("🔸 Recommended Perfumes:")
for i, perfume in enumerate(recommended_perfumes, 1):
    print(f"{i}. {perfume}")


🔸 Perfumes with Vanilla:


Unnamed: 0,Perfume,Brand,Country,Gender,Rating Value,Rating Count,Year,Top,Middle,Base,Perfumer1,mainaccord1,mainaccord2,mainaccord3,mainaccord4,mainaccord5,Family,Olfactive Profile
20880,quand vient la pluie,guerlain,france,women,4.63,128,2007,"rosemary, orange blossom, bergamot","violet, heliotrope, jasmine","praline, patchouli, musk, amber",sylvaine delacourte,powdery,white floral,sweet,violet,citrus,powdery,"rosemary, orange blossom, bergamot, violet, he..."
20841,classique collector s snow globe,jean paul gaultier,france,women,4.60,30,2019,"orange blossom, star anise, mandarin orange, b...","ginger, plum, orchid, iris, ylang ylang, tuberose","vanilla, cinnamon, sandalwood, amber, musk",jacques cavallier,citrus,warm spicy,powdery,white floral,sweet,citrus,"orange blossom, star anise, mandarin orange, b..."
20825,aromatherapy revitalise therapy,avon,usa,women,4.59,70,2002,"mandarin orange, melon, pineapple, grapefruit,...","jasmine, violet, freesia","sandalwood, musk",unknown,citrus,fruity,aromatic,sweet,fresh,citrus,"mandarin orange, melon, pineapple, grapefruit,..."
20804,dune esprit de parfum,dior,france,women,4.58,149,1994,"palisander rosewood, peony, mandarin orange, b...","rose, gorse, wallflower, ylang ylang, lily, ja...","amber, sandalwood, benzoin, moss, patchouli, v...",jean louis sieuzac,woody,amber,floral,sweet,warm spicy,woody,"palisander rosewood, peony, mandarin orange, b..."
20765,gucci envy eau de parfum,gucci,italy,women,4.56,126,1997,"bergamot, freesia, peach, magnolia, pineapple","hyacinth, lily of the valley, jasmine, rose, v...","cedar, musk, sandalwood",unknown,floral,woody,powdery,white floral,green,floral,"bergamot, freesia, peach, magnolia, pineapple,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19,viva by fergie,avon,usa,women,3.25,537,2012,"green notes, bergamot","lavender, mint, narcissus","vetiver, woody notes, white musk",laurent le guernec,green,aromatic,lavender,woody,fresh spicy,green,"green notes, bergamot, lavender, mint, narciss..."
34,inspire,ellen tracy,usa,women,3.25,85,2001,"honeysuckle, cyclamen, jasmine, gardenia, peon...","lily of the valley, lemon, bergamot, melon","musk, sandalwood",carlos viñals,white floral,floral,fresh,citrus,powdery,white floral,"honeysuckle, cyclamen, jasmine, gardenia, peon..."
11,body,kkw fragrance,usa,women,3.24,223,2018,"peach, bergamot, pink pepper, green mandarin","jasmine sambac, ylang ylang, turkish rose","australian sandalwood, musk, haitian vetiver, ...",nicole mancini,woody,citrus,white floral,powdery,sweet,woody,"peach, bergamot, pink pepper, green mandarin, ..."
9,absolument femme,absolument parfumeur,france,women,3.24,41,2012,"grapefruit, tangerine, bergamot","wormwood, violet, magnolia, spices, cardamom, ...","white musk, sandalwood, amber",marc villaceque,citrus,floral,fresh spicy,warm spicy,aromatic,citrus,"grapefruit, tangerine, bergamot, wormwood, vio..."



🔸 Reference Perfume: full speed power
🔸 Recommended Perfumes:
1. dark door sport
2. limao siciliano
3. verveine agrumes 2015
4. heritage drops
5. citrus verbena summer


In [None]:
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from IPython.display import display  # Para visualizar DataFrames en Jupyter Notebook


# Función para limpiar y preparar notas olfativas para TF-IDF
def prepare_for_tfidf(text):
    if pd.isna(text):
        return ""
    text = re.sub(r"[^\w\s\-,]", "", text)  # Mantiene comas
    text = text.lower()
    text = text.replace("-", " ")
    return text.strip()

# Aplicar la limpieza solo a las columnas de texto
for column in fragrance_df.select_dtypes(include=["object"]).columns:
    fragrance_df[column] = fragrance_df[column].apply(prepare_for_tfidf)

# Función para recomendar perfumes según una nota olfativa
def recommend_perfumes_by_note(note, reference_perfume=None, num_recommendations=5):
    """
    Encuentra perfumes similares a un perfume de referencia, pero solo dentro del grupo que contiene la nota deseada.

    Parámetros:
    - note: Nota olfativa de interés.
    - reference_perfume: Perfume base para encontrar similitudes.
    - num_recommendations: Número de perfumes recomendados.

    Retorna:
    - Lista de perfumes recomendados.
    """
    # Filtrar perfumes que contengan la nota buscada
    filtered_df = fragrance_df[fragrance_df["Olfactive Profile"].str.contains(note, case=False, na=False)]
    
    if filtered_df.empty:
        return f"No hay perfumes con la nota '{note}' en la base de datos."

    # Vectorizar solo los perfumes filtrados
    vectorizer = TfidfVectorizer(stop_words="english")
    feature_vectors = vectorizer.fit_transform(filtered_df["Olfactive Profile"])

    # Entrenar modelo KNN solo con perfumes filtrados
    knn_model = NearestNeighbors(n_neighbors=min(20, len(filtered_df)), metric="cosine", algorithm="brute")
    knn_model.fit(feature_vectors)

    # Si no se proporciona un perfume de referencia, tomar el primero de la lista filtrada
    if reference_perfume is None:
        reference_perfume = filtered_df["Perfume"].iloc[0] # Dejar un head(5)  y darle el olfactive profile o la familia olfativa para que el usuario pueda elegir (Marca, Olfactive profile y Perfumista)
                                                            #Añadir variable random 
    # Verificar si el perfume de referencia está en la lista filtrada
    if reference_perfume not in filtered_df["Perfume"].values:
        return f"El perfume '{reference_perfume}' no tiene la nota '{note}' en su perfil olfativo."

    # Obtener el índice del perfume de referencia en el DataFrame filtrado
    idx = filtered_df[filtered_df["Perfume"] == reference_perfume].index[0]

    # Asegurar que la matriz de entrada sea 2D
    query_vector = feature_vectors[filtered_df.index.get_loc(idx)].toarray().reshape(1, -1)

    # Encontrar perfumes similares
    distances, indices = knn_model.kneighbors(query_vector, n_neighbors=min(20, len(filtered_df)))

    # Crear un DataFrame con las recomendaciones
    recommendations = filtered_df.iloc[indices.flatten()[1:]].copy()  # Omitimos el primer resultado (mismo perfume)

    # Ordenar por rating, número de reviews y año
    recommendations = recommendations.sort_values(by=["Rating Value", "Rating Count", "Year"], ascending=[False, False, False])

    return recommendations["Perfume"].head(num_recommendations).tolist()

# ➤ PRUEBA: Encontrar perfumes similares con la nota "vanilla"
example_perfume = fragrance_df["Perfume"].iloc[0]  # Tomamos el primer perfume como referencia
recommended_perfumes = recommend_perfumes_by_note(note="vanilla", reference_perfume=example_perfume)

# 🔹 Mostrar Resultados
print(f"\n🔸 Perfume de referencia: {example_perfume}")
print("🔸 Perfumes recomendados:")
for i, perfume in enumerate(recommended_perfumes, 1):
    print(f"{i}. {perfume}")



🔸 Perfume de referencia: full speed power
🔸 Perfumes recomendados:
1. E
2. l
3.  
4. p
5. e
6. r
7. f
8. u
9. m
10. e
11.  
12. '
13. f
14. u
15. l
16. l
17.  
18. s
19. p
20. e
21. e
22. d
23.  
24. p
25. o
26. w
27. e
28. r
29. '
30.  
31. n
32. o
33.  
34. t
35. i
36. e
37. n
38. e
39.  
40. l
41. a
42.  
43. n
44. o
45. t
46. a
47.  
48. '
49. v
50. a
51. n
52. i
53. l
54. l
55. a
56. '
57.  
58. e
59. n
60.  
61. s
62. u
63.  
64. p
65. e
66. r
67. f
68. i
69. l
70.  
71. o
72. l
73. f
74. a
75. t
76. i
77. v
78. o
79. .
