In [8]:
# --- 1. Setup ---
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report
import joblib
from IPython.display import display, Image

In [16]:
# Load dataset
df = pd.read_csv("Fashion Dataset.csv")
print(df.head())

   Unnamed: 0        p_id                                               name  \
0           0  17048614.0  Khushal K Women Black Ethnic Motifs Printed Ku...   
1           1  16524740.0  InWeave Women Orange Solid Kurta with Palazzos...   
2           2  16331376.0  Anubhutee Women Navy Blue Ethnic Motifs Embroi...   
3           3  14709966.0  Nayo Women Red Floral Printed Kurta With Trous...   
4           4  11056154.0   AHIKA Women Black & Green Printed Straight Kurta   

    price     colour      brand  \
0  5099.0      Black  Khushal K   
1  5899.0     Orange    InWeave   
2  4899.0  Navy Blue  Anubhutee   
3  3699.0        Red       Nayo   
4  1350.0      Black      AHIKA   

                                                 img  ratingCount  avg_rating  \
0  http://assets.myntassets.com/assets/images/170...       4522.0    4.418399   
1  http://assets.myntassets.com/assets/images/165...       1081.0    4.119334   
2  http://assets.myntassets.com/assets/images/163...       1752.0

In [17]:
# --- 2. Categorize items ---
def categorize_item(name, attributes):
    text = f"{name} {attributes}".lower()
    if any(word in text for word in ["shirt", "t-shirt", "top", "blouse", "kurta"]):
        return "top"
    elif any(word in text for word in ["jeans", "trouser", "shorts", "pants", "skirt", "palazzo"]):
        return "bottom"
    elif any(word in text for word in ["shoe", "sneaker", "boot", "slipper", "heel", "sandal"]):
        return "shoes"
    else:
        return "other"

df["category"] = df.apply(lambda row: categorize_item(row["name"], row["p_attributes"]), axis=1)

# Keep only useful categories
df = df[df["category"].isin(["top", "bottom", "shoes"])]

In [11]:
# --- 3. Feature Engineering ---
X = df[["colour", "brand"]].fillna("Unknown")
y = df["category"]

# One-Hot Encode categorical vars
encoder = OneHotEncoder(handle_unknown="ignore")
X_encoded = encoder.fit_transform(X)

# --- 4. Train/Test Split ---
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# --- 5. Train Random Forest ---
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# --- 6. Evaluate ---
y_pred = rf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

      bottom       0.76      0.62      0.68       742
       shoes       0.00      0.00      0.00         5
         top       0.80      0.89      0.84      1295

    accuracy                           0.79      2042
   macro avg       0.52      0.50      0.51      2042
weighted avg       0.78      0.79      0.78      2042



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
# --- 7. Save Model ---
joblib.dump((rf, encoder), "outfit_rf_model.joblib")
print("✅ Random Forest model saved as outfit_rf_model.joblib")

✅ Random Forest model saved as outfit_rf_model.joblib


In [13]:
# --- 8. Use Model for Outfit Suggestions ---
def suggest_outfit_with_ml(preferred_colour=None):
    rf, encoder = joblib.load("outfit_rf_model.joblib")

    outfit = {}
    for cat in ["top", "bottom", "shoes"]:
        candidates = df[df["category"] == cat]
        if preferred_colour:
            candidates = candidates[candidates["colour"].str.contains(preferred_colour, case=False, na=False)]
        if candidates.empty:
            candidates = df[df["category"] == cat]
        sample = candidates.sample(1)
        
        # Predict category (just for demonstration)
        X_new = encoder.transform(sample[["colour", "brand"]].fillna("Unknown"))
        predicted_cat = rf.predict(X_new)[0]
        
        item = sample.iloc[0].to_dict()
        item["predicted_category"] = predicted_cat
        outfit[cat] = item
    return outfit

In [14]:
# --- 9. Display Outfit with Images ---
def show_outfit(outfit):
    for cat, item in outfit.items():
        print(f"\n{cat.upper()} (Predicted: {item['predicted_category']}):")
        print(f"Name: {item['name']}")
        print(f"Brand: {item['brand']}")
        print(f"Colour: {item['colour']}")
        print(f"Price: {item['price']}")
        print(f"Rating: {item['avg_rating']} ({item['ratingCount']} reviews)")
        display(Image(url=item['img'], width=200))

# Example usage
outfit = suggest_outfit_with_ml(preferred_colour="Black")
show_outfit(outfit)


TOP (Predicted: top):
Name: Bitiya by Bhama Black Ready to Wear Lehenga with Blouse
Brand: Bitiya by Bhama
Colour: Black
Price: 3749.0
Rating: 4.166666666666667 (30.0 reviews)



BOTTOM (Predicted: bottom):
Name: Miss Chase Women Black Skinny Fit High-Rise Clean Look Jeans
Brand: Miss Chase
Colour: Black
Price: 1899.0
Rating: 4.430769230769231 (65.0 reviews)



SHOES (Predicted: top):
Name: Ahalyaa Women Stunning Black Printed Desi Girl Jumpsuit
Brand: Ahalyaa
Colour: Black
Price: 6100.0
Rating: nan (nan reviews)
