In [4]:
import shap
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier

# Load breast cancer dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data)
y = data.target

# Fit random forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)

# Explain random forest predictions using SHAP values
explainer = shap.Explainer(model)
X_train = X.sample(n=500, random_state=42)
shap_values = explainer(X_train)

# Get feature names
feature_names = X_train.columns

# Create dataframe of SHAP values
shap_df = pd.DataFrame(shap_values, columns=X_train.columns.tolist())

# Add feature names to dataframe
shap_df['feature_names'] = feature_names

# Select random observations from X_train and create dataframe
obs = X_train.sample(n=5, random_state=42)
obs_df = pd.DataFrame(obs.values, columns=feature_names)

# Add predicted probabilities to dataframe
proba = model.predict_proba(obs)
proba_df = pd.DataFrame(proba, columns=['Class 0', 'Class 1'])

# Identify top 5 features for each class
shap_abs = np.abs(shap_values)
shap_mean = np.mean(shap_abs, axis=0)
sort_ind = np.argsort(shap_mean)
top_inds_1 = sort_ind[-5:]
top_inds_0 = sort_ind[:5]

# Create summary plot of SHAP values
shap.summary_plot(shap_values, obs, feature_names=feature_names)



ValueError: DataFrame constructor not properly called!