Prediction Of Skin Care Recommendation Product

In [3]:
import pandas as pd
import random
import seaborn as sns
import matplotlib.pyplot as plt
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


In [None]:
df = pd.read_csv("GlowGuide_Enhanced_Dataset.csv")  # Change filename if needed

In [None]:
df

In [None]:
# ----------------------------
# Step 2: EDA Process
# ----------------------------


# View first 5 rows
print(df.head())

In [None]:
print(df.tail())

In [None]:
# Check for missing values
print("\nMissing values:\n", df.isnull().sum())

In [None]:
print(df.info())

In [None]:
print(df.shape)

In [None]:
# Only for numeric columns
print("\nBasic statistics:\n", df.describe())


In [None]:
# Check data types
print("\nData types:\n", df.dtypes)


In [None]:
for col in df.columns:
    print(f"{col} → Unique values: {df[col].nunique()}")
    print(df[col].value_counts())
    print('-'*40)

In [None]:
### Bar Charts
# Skin Concern distribution
sns.countplot(data=df, x='Skin_Concern', order=df['Skin_Concern'].value_counts().index)
plt.xticks(rotation=90)
plt.title("Skin Concerns Distribution")
plt.tight_layout()
plt.show()

# Gender distribution
sns.countplot(data=df, x='Gender')
plt.title("Gender Distribution")
plt.show()

# Skin Type
sns.countplot(data=df, x='Skin_Type')
plt.title("Skin Type Distribution")
plt.show()

# Skin Tone
sns.countplot(data=df, x='Skin_Tone')
plt.title("Skin Tone Distribution")
plt.show()


In [None]:

####Histogram
sns.histplot(data=df, x='Age', bins=20, kde=True, color='orange')
plt.title("Age Distribution")
plt.show()

In [None]:
#### Piechart

top_products = df['Recommended_Product'].value_counts().head(10)

# Plot
top_products.plot.pie(autopct='%1.1f%%', startangle=90)
plt.title("Top 10 Recommended Products")
plt.ylabel("")
plt.show()


In [None]:

#### HEATMAP
# Crosstab: Skin Concern vs Recommended Product
pivot = pd.crosstab(df['Skin_Concern'], df['Recommended_Product'])

# Plot heatmap with 'coolwarm' color
plt.figure(figsize=(15, 8))
sns.heatmap(
    pivot,
    cmap="coolwarm",      
    linewidths=0.5,
    annot=True,
    fmt='d',
    cbar_kws={'label': 'Frequency'}
)

plt.title("Skin Concern vs Recommended Product", fontsize=14)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()



In [None]:
from sklearn.preprocessing import LabelEncoder

# Create a copy of the dataframe
df_encoded = df.copy()

# Label encode all object (categorical) columns
le = LabelEncoder()
for col in df_encoded.columns:
    if df_encoded[col].dtype == 'object':
        df_encoded[col] = le.fit_transform(df_encoded[col])


In [None]:
# Compute correlation matrix
correlation_matrix = df_encoded.corr()

# Display the matrix
print("\nCorrelation Matrix:\n")
print(correlation_matrix)


In [None]:


# Plot heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5, fmt=".2f")
plt.title(" Correlation Heatmap of Skincare Dataset")
plt.show()

In [None]:
#### MODEL TRAINING

# ----------------------------
# Step 4: Prepare features and target
# ----------------------------

X = df[["Skin_Concern", "Skin_Type", "Skin_Tone", "Age", "Gender"]]
y = df["Recommended_Product"]

In [None]:
# ----------------------------
# Step 5: Define Column Transformer
# ----------------------------

# Encode all categorical columns using OrdinalEncoder
categorical_features = ["Skin_Concern", "Skin_Type", "Skin_Tone", "Gender"]

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), categorical_features)
    ],
    remainder='passthrough'  # to keep Age as it is
)

In [None]:
# ----------------------------
# Step 6: Build Pipeline
# ----------------------------

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])


In [None]:
# ----------------------------
# Step 7: Train-test split
# ----------------------------

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
# ----------------------------
# Step 8: Fit the Pipeline
# ----------------------------

pipeline.fit(X_train, y_train)


In [None]:
# ----------------------------
# Step 9: Accuracy
# ----------------------------

train_accuracy = pipeline.score(X_train, y_train)
test_accuracy = pipeline.score(X_test, y_test)

print("Training Accuracy:", round(train_accuracy * 100, 2), "%")
print("Testing Accuracy:", round(test_accuracy * 100, 2), "%")


In [None]:
def predict_product(concern, skin_type, skin_tone, age, gender):
    input_data = pd.DataFrame([[concern, skin_type, skin_tone, age, gender]],
                              columns=["Skin_Concern", "Skin_Type", "Skin_Tone", "Age", "Gender"])
    return pipeline.predict(input_data)[0]

# Try it
print(predict_product("Pigmentation", "Dry", "Medium", 24, "Female"))


In [None]:

# Save the pipeline to a .pkl file
joblib.dump(pipeline, "recommended_product_model.pkl")
print("Model saved as 'recommended_product_model.pkl'")


HOME REMEDY PREDICTION 

In [None]:
# Step 2: Define Data Parameters

# 20 Skin Concerns
skin_concerns = [
    "Acne", "Dryness", "Oily Skin", "Wrinkles", "Dark Circles",
    "Pigmentation", "Sunburn", "Blackheads", "Whiteheads", "Dullness",
    "Uneven Skin Tone", "Pores", "Rosacea", "Redness", "Itchy Skin",
    "Sagging", "Dark Spots", "Tanning", "Inflammation", "Eczema"
]

# 20 Home Remedies (Mapped 1-to-1)
home_remedies = [
    "Honey and Cinnamon Mask", "Aloe Vera Gel", "Oatmeal and Yogurt Pack", "Cucumber Slices",
    "Cold Green Tea Bags", "Potato Juice", "Tomato Pulp", "Multani Mitti (Fuller’s Earth)",
    "Lemon and Honey Mix", "Coconut Oil", "Turmeric and Milk Paste", "Papaya Mash",
    "Apple Cider Vinegar Toner", "Rose Water Spray", "Neem Paste", "Curd and Besan Pack",
    "Baking Soda Scrub", "Coriander Leaf Juice", "Sandalwood Powder Paste", "Mint Leaf Paste"
]

# Create mapping from concern to home remedy
remedy_mapping = dict(zip(skin_concerns, home_remedies))

In [None]:
# Step 3: Generate Dataset of 15,000 Records
random.seed(42)
data = []
for _ in range(15000):
    concern = random.choice(skin_concerns)
    remedy = remedy_mapping[concern]
    data.append([concern, remedy])

df = pd.DataFrame(data, columns=["Skin_Concern", "Home_Remedy"])



In [None]:

plt.figure(figsize=(14, 6))
sns.countplot(data=df, y="Skin_Concern", order=df["Skin_Concern"].value_counts().index, palette="Spectral")
plt.title("Frequency of Each Skin Concern in the Dataset")
plt.xlabel("Count")
plt.ylabel("Skin Concern")
plt.tight_layout()
plt.show()


In [None]:
pivot = pd.crosstab(df["Skin_Concern"], df["Home_Remedy"])
plt.figure(figsize=(14,10))
sns.heatmap(pivot, annot=True, fmt='d', cmap='YlGnBu', linewidths=0.5)
plt.title("Skin Concern vs Home Remedy Mapping")
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(12,6))
sns.countplot(data=df, x="Home_Remedy", order=df["Home_Remedy"].value_counts().index, palette="cubehelix")
plt.title("Frequency of All Home Remedies")
plt.ylabel("Count")
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()



In [None]:
cumulative = df['Skin_Concern'].value_counts(normalize=True).cumsum()
plt.figure(figsize=(10,5))
sns.lineplot(x=cumulative.index, y=cumulative.values, marker='o')
plt.xticks(rotation=45)
plt.title("Cumulative Distribution of Skin Concerns")
plt.ylabel("Cumulative %")
plt.tight_layout()
plt.show()


In [None]:
# Step 4: Train-Test Split
X = df[["Skin_Concern"]]
y = df["Home_Remedy"]

In [None]:
# Step 5: Preprocessing Pipeline
preprocessor = ColumnTransformer(transformers=[
    ('cat', OrdinalEncoder(), ["Skin_Concern"])
])

In [None]:

# Step 6: Full Pipeline with Model
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

In [None]:

# Step 7: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
# Step 8: Train the Model
pipeline.fit(X_train, y_train)

In [None]:
# Step 9: Accuracy Evaluation
train_accuracy = pipeline.score(X_train, y_train)
test_accuracy = pipeline.score(X_test, y_test)

In [None]:
# Step 10: Print Accuracies
print("Training Accuracy:", round(train_accuracy * 100, 2), "%")
print("Testing Accuracy:", round(test_accuracy * 100, 2), "%")


In [None]:

# Step 11: Predict Home Remedy for a given skin concern
example_input = pd.DataFrame({
    "Skin_Concern": ["Pigmentation"]  # 🔁 Change this to test other concerns
})
predicted_remedy = pipeline.predict(example_input)

print("🧴 Predicted Home Remedy for", example_input["Skin_Concern"][0], "→", predicted_remedy[0])

In [None]:
# Step 11: Save the Trained Model to a .joblib File


joblib.dump(pipeline, "home_remedy_model.joblib")
print("Model saved as 'home_remedy_model.joblib'")


In [None]:
# Step 2: Generate Dataset
def calculate_stress_level(sleep, screen_time):
    if sleep >= 7 and screen_time <= 4:
        return "Low"
    elif 5 <= sleep < 7 or 4 < screen_time <= 6:
        return "Moderate"
    else:
        return "High"

random.seed(42)
data = []

for _ in range(15000):
    sleep_hours = round(random.uniform(3.5, 9.5), 1)
    screen_time = round(random.uniform(2, 10), 1)
    stress = calculate_stress_level(sleep_hours, screen_time)
    data.append([sleep_hours, screen_time, stress])

df = pd.DataFrame(data, columns=["Sleep_Hours", "Screen_Time_Hours", "Stress_Level"])

In [None]:
# Step 3: Features and Target
X = df[["Sleep_Hours", "Screen_Time_Hours"]]
y = df["Stress_Level"]


In [None]:
# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
# Step 5: Pipeline with Preprocessing + Model
pipeline = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('classifier', RandomForestClassifier(random_state=42))
])

In [None]:
# Step 6: Train the Model
pipeline.fit(X_train, y_train)

In [None]:
# Step 7: Accuracy
train_accuracy = pipeline.score(X_train, y_train)
test_accuracy = pipeline.score(X_test, y_test)

In [None]:
print("Training Accuracy:", round(train_accuracy * 100, 2), "%")
print("Testing Accuracy:", round(test_accuracy * 100, 2), "%")

In [None]:
# Step 8: Prediction Example
example_input = [[6.0, 7.0]]  # sleep = 6 hours, screen time = 7 hours
predicted = pipeline.predict(example_input)
print("📌 Predicted Stress Level:", predicted[0])


In [None]:
# Step 9: Save the trained model to a .joblib file
import joblib

joblib.dump(pipeline, "stress_level_model.pkl")
print("Model saved as 'stress_level_model.pkl'")


In [None]:

# Set random seed
random.seed(42)

# Step 1: Define Stress Level Rules
def calculate_stress_level(sleep, screen_time):
    if sleep >= 7 and screen_time <= 4:
        return "Low"
    elif 5 <= sleep < 7 or 4 < screen_time <= 6:
        return "Moderate"
    else:
        return "High"

In [None]:
# Step 2: Exercise mappings
exercise_map = {
    "Low": {
        "Exercise": "Walking",
        "Duration": 20,
        "Description": "Light walking to maintain calm and health"
    },
    "Moderate": {
        "Exercise": "Yoga",
        "Duration": 30,
        "Description": "Breathing and flexibility to reduce stress"
    },
    "High": {
        "Exercise": "Meditation",
        "Duration": 40,
        "Description": "Deep meditation to calm high stress"
    }
}

In [None]:
# Step 4: Generate 15,000 records
data = []

for _ in range(15000):
    sleep = round(random.uniform(3.5, 9.5), 1)
    screen = round(random.uniform(2, 10), 1)
    stress = calculate_stress_level(sleep, screen)
    exercise = exercise_map[stress]["Exercise"]
    duration = exercise_map[stress]["Duration"]
    description = exercise_map[stress]["Description"]
    
   
    
    row = [sleep, screen, stress, exercise, duration, description]
    data.append(row)


In [None]:
# Step 5: Create DataFrame
columns = ["Sleep_Hours", "Screen_Time_Hours", "Stress_Level",
           "Recommended_Exercise", "Exercise_Duration", "Exercise_Description"]

df = pd.DataFrame(data, columns=columns)


In [None]:
# Step 6: Save (optional)
df.to_csv("stress_exercise_dataset.csv", index=False)

# Display few rows
df.head()


In [None]:
# Step 2: Load the Dataset
df = pd.read_csv("stress_exercise_dataset.csv")

# Optionally drop extra 10 columns (for simplicity)
df = df[["Sleep_Hours", "Screen_Time_Hours", "Recommended_Exercise", "Exercise_Duration", "Exercise_Description"]]

df.to_csv("stress_exercise_dataset_cleaned.csv", index=False)
print("Cleaned dataset saved successfully as 'stress_exercise_dataset_cleaned.csv'.")



In [None]:
# Step 3: Define Features
X = df[["Sleep_Hours", "Screen_Time_Hours"]]

# Separate targets
y_exercise = df["Recommended_Exercise"]
y_duration = df["Exercise_Duration"]
y_description = df["Exercise_Description"]

In [None]:
# Step 4: Preprocessing Pipeline
# (Sleep and Screen Time are numeric, no encoding needed)
preprocessor = ColumnTransformer(transformers=[], remainder='passthrough')

In [None]:
# Step 5: Pipelines for each target
pipeline_exercise = Pipeline([
    ('preprocessing', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

pipeline_duration = Pipeline([
    ('preprocessing', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

pipeline_description = Pipeline([
    ('preprocessing', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

In [None]:
# Step 6: Train-Test Split
X_train, X_test, y1_train, y1_test = train_test_split(X, y_exercise, test_size=0.2, random_state=42)
_, _, y2_train, y2_test = train_test_split(X, y_duration, test_size=0.2, random_state=42)
_, _, y3_train, y3_test = train_test_split(X, y_description, test_size=0.2, random_state=42)

In [None]:
# Step 7: Train All Models
pipeline_exercise.fit(X_train, y1_train)
pipeline_duration.fit(X_train, y2_train)
pipeline_description.fit(X_train, y3_train)


In [None]:
# Step 8: Accuracy Scores
train_acc1 = pipeline_exercise.score(X_train, y1_train)
test_acc1 = pipeline_exercise.score(X_test, y1_test)

train_acc2 = pipeline_duration.score(X_train, y2_train)
test_acc2 = pipeline_duration.score(X_test, y2_test)

train_acc3 = pipeline_description.score(X_train, y3_train)
test_acc3 = pipeline_description.score(X_test, y3_test)

print("Recommended Exercise - Train Accuracy:", round(train_acc1 * 100, 2), "% | Test Accuracy:", round(test_acc1 * 100, 2), "%")
print("Exercise Duration - Train Accuracy:", round(train_acc2 * 100, 2), "% | Test Accuracy:", round(test_acc2 * 100, 2), "%")
print("Exercise Description - Train Accuracy:", round(train_acc3 * 100, 2), "% | Test Accuracy:", round(test_acc3 * 100, 2), "%")


In [None]:
df.columns

In [None]:
# Example input
sleep_hours = 5.0
screen_time = 7.5

user_input = [[sleep_hours, screen_time]]

# Predict each output
pred_exercise = pipeline_exercise.predict(user_input)[0]
pred_duration = pipeline_duration.predict(user_input)[0]
pred_description = pipeline_description.predict(user_input)[0]

print("\n📌 Prediction Result")
print("Recommended Exercise:", pred_exercise)
print("Duration (minutes):", pred_duration)
print("Description:", pred_description)


In [None]:

# Save each model to a separate joblib file
joblib.dump(pipeline_exercise, 'exercise_recommendation_model.joblib')
joblib.dump(pipeline_duration, 'exercise_duration_model.joblib')
joblib.dump(pipeline_description, 'exercise_description_model.joblib')

print("All models saved as joblib files successfully!")


FOODS TO EAT AND FOODS TO AVOID RECOMMENDATION

In [None]:
# Step 1: Define Parameters
skin_concerns = [
    "Acne", "Dryness", "Oily Skin", "Wrinkles", "Dark Circles",
    "Pigmentation", "Sunburn", "Blackheads", "Eczema", "Redness"
]

diet_types = ["veg", "nonveg", "vegan", "combination"]

foods_to_eat_mapping = {
    "Acne": "Carrot", "Dryness": "Avocado", "Oily Skin": "Cucumber", "Wrinkles": "Blueberries",
    "Dark Circles": "Tomato", "Pigmentation": "Papaya", "Sunburn": "Watermelon",
    "Blackheads": "Spinach", "Eczema": "Pumpkin", "Redness": "Sweet Potato"
}

foods_to_avoid_mapping = {
    "Acne": "Chocolates", "Dryness": "Chips", "Oily Skin": "Fried Food", "Wrinkles": "Sugar",
    "Dark Circles": "Caffeine", "Pigmentation": "Soft Drinks", "Sunburn": "Red Meat",
    "Blackheads": "Creamy Food", "Eczema": "Spicy Food", "Redness": "Cheese"
}

In [None]:
# Step 2: Generate 15,000 Records
random.seed(42)
data = []
for _ in range(15000):
    concern = random.choice(skin_concerns)
    diet = random.choice(diet_types)
    eat = foods_to_eat_mapping[concern]
    avoid = foods_to_avoid_mapping[concern]
    data.append([concern, diet, eat, avoid])

df = pd.DataFrame(data, columns=["Skin_Concern", "Diet_Type", "Foods_to_Eat", "Foods_to_Avoid"])


In [None]:
# Step 3: Features & Targets
X = df[["Skin_Concern", "Diet_Type"]]
y_eat = df["Foods_to_Eat"]
y_avoid = df["Foods_to_Avoid"]

In [None]:
# Step 4: Preprocessing & Pipelines
preprocessor = ColumnTransformer([
    ('cat', OrdinalEncoder(), ["Skin_Concern", "Diet_Type"])
])

pipeline_eat = Pipeline([
    ('pre', preprocessor),
    ('clf', RandomForestClassifier(n_estimators=100, random_state=42))
])

pipeline_avoid = Pipeline([
    ('pre', preprocessor),
    ('clf', RandomForestClassifier(n_estimators=100, random_state=42))
])

In [None]:
# Step 5: Split & Train
X_train, X_test, y_eat_train, y_eat_test, y_avoid_train, y_avoid_test = train_test_split(
    X, y_eat, y_avoid, test_size=0.2, random_state=42
)

pipeline_eat.fit(X_train, y_eat_train)
pipeline_avoid.fit(X_train, y_avoid_train)


In [None]:
# Step 6: Accuracy
print("Foods to Eat - Train:", round(pipeline_eat.score(X_train, y_eat_train)*100, 2), "% | Test:",
      round(pipeline_eat.score(X_test, y_eat_test)*100, 2), "%")
print("Foods to Avoid - Train:", round(pipeline_avoid.score(X_train, y_avoid_train)*100, 2), "% | Test:",
      round(pipeline_avoid.score(X_test, y_avoid_test)*100, 2), "%")


In [None]:
# Predict for new input
new_input = pd.DataFrame({
    "Skin_Concern": ["Pigmentation"],
    "Diet_Type": ["nonveg"]
})

eat_pred = pipeline_eat.predict(new_input)[0]
avoid_pred = pipeline_avoid.predict(new_input)[0]

print("\n🎯 Recommendation:")
print("✅ Foods to Eat:", eat_pred)
print("❌ Foods to Avoid:", avoid_pred)


In [None]:


# Save both models to joblib files
joblib.dump(pipeline_eat, 'foods_to_eat_model.joblib')
joblib.dump(pipeline_avoid, 'foods_to_avoid_model.joblib')

print("Models saved as 'foods_to_eat_model.joblib' and 'foods_to_avoid_model.joblib'")


In [None]:

# Create a dictionary to store all models
all_models = {
    "recommended_product_model": pipeline,                      
    "home_remedy_model": pipeline,                             
    "stress_level_model": pipeline,                             
    "exercise_recommendation_model": pipeline_exercise,
    "exercise_duration_model": pipeline_duration,
    "exercise_description_model": pipeline_description,
    "foods_to_eat_model": pipeline_eat,
    "foods_to_avoid_model": pipeline_avoid
}

# Save all models in a single .joblib file
joblib.dump(all_models, "glowguide_all_models.joblib")
print("All models saved together as 'glowguide_all_models.joblib'")


In [None]:
models = joblib.load('glowguide_all_models.joblib')
print("Model keys:", list(models.keys()))
