In [15]:
# ==========================
# üîÆ DOSHA PREDICTION SCRIPT (Feature Names + Auto-Fix)
# ==========================

import pickle
import pandas as pd
from difflib import get_close_matches

# --- Load model, encoders, scaler, and feature names ---
model_filename = "random_forest_model.pkl"   # Change if needed
model = pickle.load(open(model_filename, "rb"))
label_encoders = pickle.load(open("label_encoders.pkl", "rb"))
scaler = pickle.load(open("scaler.pkl", "rb"))
trained_features = pickle.load(open("feature_names.pkl", "rb"))

# --- Example input data ---
input_data = {
    "Body Frame": "Thin and Lean",
    "Type of Hair": "Normal",
    "Color of Hair": "Brown",
    "Skin": "Moist,Greasy",
    "Complexion": "Dark",
    "Body Weight": "Overweight",
    "Nails": "Blackish",
    "Size and Color of the Teeth": "Irregular,Blackish",
    "Pace of Performing Work": "Medium",
    "Mental Activity": "Aggressive",
    "Memory": "Long Term",
    "Sleep Pattern": "Sleepy",
    "Weather Conditions": "Dislike Heat",
    "Reaction under Adverse Situations": "Calm",
    "Mood": "Changes Quickly",
    "Eating Habit": "Improper Chewing",
    "Hunger": "Sudden and Sharp",
    "Body Temperature": "Normal",
    "Joints": "Heavy",
    "Nature": "Jealous,Fearful",
    "Body Energy": "Medium",
    "Quality of Voice": "Deep",
    "Body Odor": "Strong"
}

input_df = pd.DataFrame([input_data])

# --- Auto-match similar columns if names differ ---
input_cols = input_df.columns.tolist()
missing_from_input = [f for f in trained_features if f not in input_cols]
unseen_in_input = [f for f in input_cols if f not in trained_features]

if missing_from_input or unseen_in_input:
    print("\n‚ö†Ô∏è Column name mismatch detected. Attempting automatic correction...")
    rename_map = {}
    for missing in missing_from_input:
        match = get_close_matches(missing, unseen_in_input, n=1, cutoff=0.6)
        if match:
            rename_map[match[0]] = missing
            print(f"üîÑ Renamed '{match[0]}' ‚Üí '{missing}'")

    input_df.rename(columns=rename_map, inplace=True)

# --- Encode categorical columns ---
for col in input_df.columns:
    if col in label_encoders:
        input_df[col] = label_encoders[col].transform(input_df[col])

# --- Ensure column order matches training ---
input_df = input_df.reindex(columns=trained_features)

# --- Scale features ---
input_scaled = scaler.transform(input_df)

# --- Predict ---
prediction = model.predict(input_scaled)[0]

# --- Decode Dosha label ---
if "Dosha" in label_encoders:
    predicted_dosha = label_encoders["Dosha"].inverse_transform([prediction])[0]
else:
    predicted_dosha = prediction

print(f"\nüîπ Predicted Dosha: {predicted_dosha}")



üîπ Predicted Dosha: Kapha
