# Skincare Recommendation System

## Import Library

In [66]:
import pandas as pd
import ast

## Load Data

In [67]:
# Load CSV
df = pd.read_excel("../data/products_integrated_features.xlsx")

# Tampilkan beberapa baris awal
df.head()

Unnamed: 0,url,product_name,brand,category,price,rating,skin_type,total_reviews,skin_concern,ingredients,skin_goal,age,rating_star
0,https://reviews.femaledaily.com/products/clean...,Air Mawar,Viva Cosmetics,Toner,4800,4.1,"['combination', 'oily']",741820,"['irritation', 'dryness', 'acne', 'sensitive',...","['aha', 'bha', 'hyaluronic acid', 'green tea',...","['calming', 'refreshing', 'glowing', 'fast-abs...",19 - 24,4.786207
1,https://reviews.femaledaily.com/products/clean...,Face Tonic,Viva Cosmetics,Toner,5000,4.0,"['dry', 'combination']",66944,"['pores', 'acne', 'irritation', 'sensitive', '...","['aloe vera', 'green tea']","['nourishing', 'refreshing', 'anti-aging', 'ca...",19 - 24,4.484375
2,https://reviews.femaledaily.com/products/clean...,Face Tonic,Viva Cosmetics,Toner,5000,3.7,"['combination', 'oily']",35724,"['acne', 'irritation', 'oiliness', 'pores', 'r...","['vitamin c', 'green tea']","['oil-control', 'refreshing', 'calming']",19 - 24,4.461538
3,https://reviews.femaledaily.com/products/treat...,Acne Lotion,Viva Cosmetics,Acne treatment,5150,3.7,"['combination', 'oily']",17776,"['acne', 'redness', 'oiliness', 'blackheads', ...","['zinc', 'tea tree', 'vitamin c']","['scar-fading', 'calming']",19 - 24,4.29703
4,https://reviews.femaledaily.com/products/clean...,Astringent,Viva Cosmetics,Toner,5300,3.8,"['oily', 'combination']",131544,"['acne', 'irritation', 'oiliness', 'pores', 'b...",['green tea'],"['pore-minimizing', 'refreshing', 'calming', '...",19 - 24,4.342593


In [68]:
# Cek kolom yang tersedia
df.columns

Index(['url', 'product_name', 'brand', 'category', 'price', 'rating',
       'skin_type', 'total_reviews', 'skin_concern', 'ingredients',
       'skin_goal', 'age', 'rating_star'],
      dtype='object')

In [69]:
# Buat kolom review_score = rating * total_reviews
df["review_score"] = df["rating"] * df["total_reviews"]

# Ambil kolom yang dibutuhkan untuk rekomendasi
columns_used = ["product_name", "brand", "age", "review_score", "price", "category", "ingredients"]
df_filtered = df[columns_used].copy()

df_filtered.head()

Unnamed: 0,product_name,brand,age,review_score,price,category,ingredients
0,Air Mawar,Viva Cosmetics,19 - 24,3041462.0,4800,Toner,"['aha', 'bha', 'hyaluronic acid', 'green tea',..."
1,Face Tonic,Viva Cosmetics,19 - 24,267776.0,5000,Toner,"['aloe vera', 'green tea']"
2,Face Tonic,Viva Cosmetics,19 - 24,132178.8,5000,Toner,"['vitamin c', 'green tea']"
3,Acne Lotion,Viva Cosmetics,19 - 24,65771.2,5150,Acne treatment,"['zinc', 'tea tree', 'vitamin c']"
4,Astringent,Viva Cosmetics,19 - 24,499867.2,5300,Toner,['green tea']


In [70]:
# Cek jumlah baris & kolom
print("Shape:", df_filtered.shape)

Shape: (918, 7)


In [71]:
# Info tipe data & missing value
df_filtered.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   product_name  918 non-null    object 
 1   brand         918 non-null    object 
 2   age           918 non-null    object 
 3   review_score  918 non-null    float64
 4   price         918 non-null    int64  
 5   category      918 non-null    object 
 6   ingredients   918 non-null    object 
dtypes: float64(1), int64(1), object(5)
memory usage: 50.3+ KB


In [72]:
# Statistik ringkasan numerik
df_filtered.describe()

Unnamed: 0,review_score,price
count,918.0,918.0
mean,344191.9,132216.2
std,744403.0,174008.7
min,56.0,4800.0
25%,40723.7,32575.0
50%,149214.3,85000.0
75%,351761.8,160000.0
max,10207150.0,1825000.0


In [73]:
# Jumlah missing value per kolom
print("Missing values per column:")
print(df_filtered.isnull().sum())

Missing values per column:
product_name    0
brand           0
age             0
review_score    0
price           0
category        0
ingredients     0
dtype: int64


In [74]:
# Cek data unik untuk kategori
df_filtered["category"].value_counts()

category
Moisturizer Gel       97
Sun Protection        97
Facial Wash           96
Toner                 94
Serum & Essence       94
Peeling               93
Acne treatment        91
Exfoliator            91
Moisturizer Lotion    85
Moisturizer Cream     80
Name: count, dtype: int64

In [75]:
# Cek format ingredients
print("Ingredients format example:")
df_filtered["ingredients"].iloc[0:5]

Ingredients format example:


0    ['aha', 'bha', 'hyaluronic acid', 'green tea',...
1                           ['aloe vera', 'green tea']
2                           ['vitamin c', 'green tea']
3                    ['zinc', 'tea tree', 'vitamin c']
4                                        ['green tea']
Name: ingredients, dtype: object

# Preprocessing Data

In [76]:
# Cek tipe data kolom ingredients
print("Ingredients column type:", type(df_filtered['ingredients'].iloc[0]))

Ingredients column type: <class 'str'>


In [77]:
# Konversi string ingredients → list (pakai ast.literal_eval)
df_filtered['ingredients'] = df_filtered['ingredients'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Cek apakah berhasil
print("Contoh 5 baris hasil konversi:")
df_filtered[['product_name', 'ingredients']].head()

Contoh 5 baris hasil konversi:


Unnamed: 0,product_name,ingredients
0,Air Mawar,"[aha, bha, hyaluronic acid, green tea, vitamin e]"
1,Face Tonic,"[aloe vera, green tea]"
2,Face Tonic,"[vitamin c, green tea]"
3,Acne Lotion,"[zinc, tea tree, vitamin c]"
4,Astringent,[green tea]


In [78]:
# Tipe data ingredients sekarang
print("\nTipe data ingredients setelah konversi:", type(df_filtered['ingredients'].iloc[0]))


Tipe data ingredients setelah konversi: <class 'list'>


## Recommendation System

In [79]:
def recommend_products(df, user_age, user_price_min, user_price_max, user_category, user_ingredients, top_k=5):
    """
    Function to recommend products based on user preferences.
    """
    # Filter by age
    df_age = df[df["age"] == user_age]

    # Filter by category
    df_cat = df_age[df_age["category"] == user_category]

    # Filter by price range
    df_price = df_cat[(df_cat["price"] >= user_price_min) & (df_cat["price"] <= user_price_max)]

    # Filter if at least one ingredient matches
    def has_matching_ingredient(product_ingredients):
        return any(ing in product_ingredients for ing in user_ingredients)

    df_final = df_price[df_price["ingredients"].apply(has_matching_ingredient)]

    # Urutkan berdasarkan review_score
    df_sorted = df_final.sort_values(by="review_score", ascending=False)

    # Ambil top k
    return df_sorted.head(top_k)


In [80]:
# Contoh input user
user_input = {
    "age": "25 - 29",
    "price_min": 50000,
    "price_max": 200000,
    "category": "Toner",
    "ingredients": ["niacinamide", "aha"]
}

# Rekomendasi
top_products = recommend_products(
    df_filtered,
    user_age=user_input["age"],
    user_price_min=user_input["price_min"],
    user_price_max=user_input["price_max"],
    user_category=user_input["category"],
    user_ingredients=user_input["ingredients"],
    top_k=5
)

top_products


Unnamed: 0,product_name,brand,age,review_score,price,category,ingredients
571,Glycolic Bright Peeling Toner,L'Oreal Paris,25 - 29,252902.4,125000,Toner,"[aha, bha, glycerin, panthenol, salicylic acid..."
453,Marigold Clearings Petal Toner,NPURE,25 - 29,243485.9,85000,Toner,"[aha, allantoin, glycerin, niacinamide, panthe..."
592,Licorice pH Balancing Toner,ACWELL,25 - 29,226528.3,130000,Toner,"[licorice, aha, vitamin c, green tea, pha, ret..."
706,Probiome Skin Tonic,Studio Tropik,25 - 29,153260.6,180000,Toner,"[ceramide, hyaluronic acid, niacinamide, aha, ..."
730,Skin Ready Hydrating Booster,BHUMI,25 - 29,140400.0,197000,Toner,"[centella asiatica, ceramide, glycerin, hyalur..."
