In [9]:
import pandas as pd
import numpy as np
import streamlit as st
import joblib
import shap
import matplotlib.pyplot as plt
import warnings
from pathlib import Path
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.inspection import permutation_importance

warnings.filterwarnings("ignore")

# ============================================================
# üìÅ PATH CONFIG
# ============================================================
BASE_DIR = Path.cwd()
DATA_DIR = BASE_DIR / "processed_data"
MODELS_DIR = BASE_DIR / "models"
DATA_DIR.mkdir(exist_ok=True)
MODELS_DIR.mkdir(exist_ok=True)

DATA_FILE = DATA_DIR / "personalized_nsga2_results.csv"
MODEL_FILE = MODELS_DIR / "personalized_rf_model.joblib"
SCALER_FILE = MODELS_DIR / "personalized_scaler.joblib"

# ============================================================
# üß© Streamlit Setup
# ============================================================
st.set_page_config(page_title="ü•ó Personalized NSGA-II Optimizer + XAI", layout="wide")
st.title("ü•ó Personalized Recipe Optimization Dashboard")
st.markdown("Uses **NSGA-II** + **Explainable AI (XAI)** to generate personalized, sustainable meal recommendations.")

# ============================================================
# üì• Load Dataset (with fallback)
# ============================================================
@st.cache_data
def load_data():
    if not DATA_FILE.exists():
        st.error(f"‚ùå File not found: {DATA_FILE}")
        return pd.DataFrame()

    df = pd.read_csv(DATA_FILE)
    df = df.loc[:, ~df.columns.duplicated()].copy()

    # Ensure numeric columns exist
    num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    if len(num_cols) == 0:
        st.error("‚ùå No numeric features found in dataset!")
        return pd.DataFrame()

    # Create preference score if missing
    if "_preference_score" not in df.columns:
        df["_preference_score"] = df[num_cols].mean(axis=1).rank(pct=True)
        st.warning("‚ö†Ô∏è '_preference_score' not found. Created placeholder values.")

    return df

df = load_data()
if df.empty:
    st.stop()

st.success(f"‚úÖ Loaded {df.shape[0]} optimized recipes")

# ============================================================
# ‚öôÔ∏è Load / Train Model and Scaler
# ============================================================
@st.cache_resource
def get_model_and_scaler(df):
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()

    if MODEL_FILE.exists() and SCALER_FILE.exists():
        model = joblib.load(MODEL_FILE)
        scaler = joblib.load(SCALER_FILE)
        return model, scaler

    st.warning("‚ö†Ô∏è No saved model found ‚Äî training new Random Forest model...")

    X = df[numeric_cols].fillna(df[numeric_cols].mean())
    y = df["_preference_score"]

    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)

    model = RandomForestRegressor(n_estimators=200, random_state=42)
    model.fit(X_scaled, y)

    joblib.dump(model, MODEL_FILE)
    joblib.dump(scaler, SCALER_FILE)
    st.success("‚úÖ Model trained and saved successfully.")

    return model, scaler

model, scaler = get_model_and_scaler(df)

# ============================================================
# üß† Feature Importance (XAI)
# ============================================================
@st.cache_data
def compute_feature_importance(model, df):
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    X_scaled = MinMaxScaler().fit_transform(df[numeric_cols])
    result = permutation_importance(model, X_scaled, df["_preference_score"], n_repeats=10, random_state=42)
    importance_df = pd.DataFrame({
        "Feature": numeric_cols,
        "Importance": result.importances_mean
    }).sort_values("Importance", ascending=False)
    return importance_df

importance_df = compute_feature_importance(model, df)
st.markdown("### üîç Model Feature Importance")
st.bar_chart(importance_df.set_index("Feature")["Importance"])

# ============================================================
# üéØ User Preferences
# ============================================================
st.sidebar.header("üßç Personalized Preferences")
energy_pref = st.sidebar.slider("Energy (kcal)", 1200, 3000, 1800)
protein_pref = st.sidebar.slider("Protein (g)", 10, 200, 60)
carbs_pref = st.sidebar.slider("Carbs (g)", 20, 300, 150)
fat_pref = st.sidebar.slider("Fat (g)", 10, 150, 50)
price_pref = st.sidebar.slider("Price ($)", 1.0, 20.0, 8.0)
emission_pref = st.sidebar.slider("Emission (kgCO‚ÇÇe)", 0.1, 10.0, 3.0)

# ============================================================
# üéØ Personalized Scoring
# ============================================================
def personalized_score(row):
    score = (
        0.25 * (1 - abs(row.get("energy_kcal_mean", 0) - energy_pref) / max(energy_pref, 1))
        + 0.25 * (1 - abs(row.get("protein_g_mean", 0) - protein_pref) / max(protein_pref, 1))
        + 0.15 * (1 - abs(row.get("fat_g_mean", 0) - fat_pref) / max(fat_pref, 1))
        + 0.15 * (1 - abs(row.get("carbs_g_mean", 0) - carbs_pref) / max(carbs_pref, 1))
        + 0.1  * (1 - abs(row.get("price_mean", 0) - price_pref) / max(price_pref, 1))
        + 0.1  * (1 - abs(row.get("Total_emissions_mean", 0) - emission_pref) / max(emission_pref, 1))
    )
    return score

df["_personalized_score"] = df.apply(personalized_score, axis=1)
filtered = df.sort_values("_personalized_score", ascending=False).head(10)

# ============================================================
# üß© Display Top Recipe & SHAP Explanation
# ============================================================
if not filtered.empty:
    st.subheader("üçΩÔ∏è Top Personalized Recipe")

    sel_row = filtered.iloc[[0]]
    features = [c for c in df.select_dtypes(include=[np.number]).columns if "mean" in c]
    sel_row = sel_row.loc[:, ~sel_row.columns.duplicated()].copy()

    cols_to_show = [c for c in ["price_mean", "Total_emissions_mean", "_preference_score", "_personalized_score"] if c in sel_row.columns]
    final_cols = features + cols_to_show

    try:
        st.table(pd.DataFrame(sel_row[final_cols]).T)
    except Exception as e:
        st.warning(f"‚ö†Ô∏è Could not render table properly: {e}")
        st.dataframe(sel_row.T)

    # Natural explanation
    st.markdown("**üß† Natural Language Explanation:**")
    st.markdown(
        f"This recipe offers {sel_row['protein_g_mean'].values[0]:.1f}g protein, "
        f"{sel_row['carbs_g_mean'].values[0]:.1f}g carbs, and a satisfaction score of "
        f"{sel_row['_personalized_score'].values[0]:.2f}. "
        "It fits well with your current dietary preferences."
    )

    # ============================================================
    # üí¨ SHAP Explanation
    # ============================================================
    st.markdown("### ü©ª SHAP Explainability (Feature Impact)")
    try:
        X = df[features].fillna(df[features].mean())
        X_scaled = scaler.transform(X)
        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(X_scaled)
        fig, ax = plt.subplots(figsize=(10, 4))
        shap.summary_plot(shap_values, X, plot_type="bar", show=False)
        st.pyplot(fig)
    except Exception as e:
        st.warning(f"‚ö†Ô∏è SHAP visualization failed: {e}")

else:
    st.warning("‚ö†Ô∏è No recipes matched your preferences. Adjust the sliders and retry.")


2025-11-07 19:21:15.903 No runtime found, using MemoryCacheStorageManager
2025-11-07 19:21:15.905 No runtime found, using MemoryCacheStorageManager


KeyError: '_preference_score'