# Diabetes Risk Prediction with Explainable AI
This notebook demonstrates an end-to-end pipeline for diabetes risk classification using:
- MLP neural network classifier
- SHAP and LIME for model explainability
- DiCE for counterfactual generation

Datasets: BRFSS 2015 Health Indicators, Diabetes_2


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Load and Preprocess Data

In [None]:
from src.preprocess import load_and_merge_data, prepare_data

df = load_and_merge_data()
X_train_scaled, X_test_scaled, y_train_bal, y_test, feature_names, X_test_df = prepare_data(df)


## Train MLP Classifier

In [None]:
from src.model import train_mlp, evaluate_model

clf = train_mlp(X_train_scaled, y_train_bal)
results = evaluate_model(clf, X_test_scaled, y_test)
print('Model Evaluation Results:')
for key, value in results.items():
    print(f"{key}:\n{value}\n")

## SHAP Explainability

In [None]:
from src.explainability import run_shap

run_shap(clf, X_train_scaled, X_test_scaled, X_test_df, feature_names)

## LIME Explainability

In [None]:
from src.explainability import run_lime

import pandas as pd
X_test_df_only = X_test_df.reset_index(drop=True)
run_lime(clf, pd.DataFrame(X_train_scaled, columns=feature_names), X_test_df_only, feature_names)

## Counterfactuals with DiCE

In [None]:
from src.explainability import run_dice
cf_df = run_dice(clf, pd.DataFrame(X_train_scaled, columns=feature_names), y_train_bal, feature_names)
cf_df