# Interpretabilità del modello con SHAP

In [None]:
# Se stai usando Colab:
# !pip install shap


In [None]:
import pandas as pd
import shap
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Caricamento dataset
df = pd.read_csv("dataset_weighted.csv")
symptom_cols = [col for col in df.columns if "Symptom_" in col]

X = df[symptom_cols]
y = df["Disease"]

# Codifica etichette
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42)


In [None]:
# Modello Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:
# Costruzione SHAP explainer
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)


In [None]:
# SHAP summary plot (globale)
shap.summary_plot(shap_values, X_test, plot_type="bar")
