In [1]:
# !pip install interpret

In [2]:
# !pip install explainerdashboard

In [6]:
import gc
import types
from joblib import dump, load

import pandas as pd
import numpy as np

from explainerdashboard import ClassifierExplainer, ExplainerDashboard, ExplainerHub

from sklearn.metrics import accuracy_score, balanced_accuracy_score,  f1_score, precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay

In [7]:
gc.collect()

0

In [8]:
from glob import glob

models = glob("*.joblib")

In [9]:
def create_explainer_dashboard(model, X, y, title, name, desc):
  explainer = ClassifierExplainer(model, X, y)
  return ExplainerDashboard(explainer, title=title, name=name, desc=desc)

In [10]:
def generate_dfs_by_amusements(df_feat):
  # Merged Amusement
  df_feat_merged_amusement = df_feat.copy()
  df_feat_merged_amusement["label"] = df_feat_merged_amusement["label"].replace([1], 0)
  df_feat_merged_amusement["label"] = df_feat_merged_amusement["label"].replace([2], 1)

  # Removed Amusement
  df_feat_no_amusement = df_feat[df_feat["label"] != 0]
  df_feat_no_amusement["label"] = df_feat_no_amusement["label"].replace([1], 0) 
  df_feat_no_amusement["label"] = df_feat_no_amusement["label"].replace([2], 1)

  return df_feat_merged_amusement, df_feat_no_amusement

In [11]:
def get_train_test_data(df_feat):
  y = df_feat.pop('label')
  X = df_feat
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) 
  return {"X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test}

In [12]:
df = pd.read_csv("combined_subjects.csv")
features_eda = ["EDA_smna_mean", "EDA_phasic_min", "EDA_tonic_mean", "EDA_tonic_min", "EDA_tonic_max", "label"]
features_top_5 = ["net_acc_std", "net_acc_max", "EDA_tonic_mean", "EDA_tonic_min", "EDA_tonic_max", "label"]
features_top_10 = [
    "net_acc_std",
    "net_acc_max",
    "EDA_tonic_mean",
    "EDA_tonic_min",
    "EDA_tonic_max",
    "EDA_smna_mean",
    "EDA_smna_std",
    "EDA_smna_min",
    "EDA_smna_max",
    "EDA_phasic_min",
    "label"
]
df_feat_eda = df[features_eda]
df_feat_top_5 = df[features_top_5]
df_feat_top_10 = df[features_top_10]
datasets = [[df_feat_eda, "eda"], [df_feat_top_5, "top_5"], [df_feat_top_10, "top_10"]]

In [13]:
for df in datasets:
  df_feat_merged_amusement, df_feat_no_amusement = generate_dfs_by_amusements(df[0])
  df_feat_merged_amusement.to_csv(f"data_merged_amusement_{df[1]}.csv", index=False)
  df_feat_no_amusement.to_csv(f"data_no_amusement_{df[1]}.csv", index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [14]:
dataframes = glob("data_*.csv")

In [15]:
merged_amu_eda_dict_test_train = {}
merged_amu_top_5_dict_test_train = {}
merged_amu_top_10_dict_test_train = {}

no_amu_eda_dict_test_train = {}
no_amu_top_5_dict_test_train = {}
no_amu_top_10_dict_test_train = {}

for df in dataframes:
  if "eda" in df:
    if "merged_amu" in df:
      merged_amu_eda_dict_test_train = get_train_test_data(pd.read_csv(df))
    elif "no_amu" in df:
      no_amu_eda_dict_test_train = get_train_test_data(pd.read_csv(df))
  elif "top_5" in df:
    if "merged_amu" in df:
      merged_amu_top_5_dict_test_train = get_train_test_data(pd.read_csv(df))
    elif "no_amu" in df:
      no_amu_top_5_dict_test_train = get_train_test_data(pd.read_csv(df))
  elif "top_10" in df:
    if "merged_amu" in df:
      merged_amu_top_10_dict_test_train = get_train_test_data(pd.read_csv(df))
    elif "no_amu" in df:
      no_amu_top_10_dict_test_train = get_train_test_data(pd.read_csv(df))

In [16]:
merged_amu_eda_dict_test_train["X_test"]

Unnamed: 0,EDA_smna_mean,EDA_phasic_min,EDA_tonic_mean,EDA_tonic_min,EDA_tonic_max
1939,0.005784,2.500509e-05,-0.459484,-0.482311,-0.442313
29,0.003803,3.480511e-06,-0.624620,-0.675497,-0.572050
210,0.016297,3.070307e-04,-0.437586,-0.504524,-0.298327
952,0.003520,4.497885e-05,-0.665862,-0.700151,-0.653427
583,0.003179,2.608313e-07,-1.127714,-1.136460,-1.118911
...,...,...,...,...,...
1067,0.039253,1.199074e-02,1.837834,1.801206,1.868372
743,0.019420,1.365990e-03,-0.849890,-0.869320,-0.828785
940,0.005990,1.329783e-04,-0.716321,-0.736411,-0.699557
529,0.007149,1.618367e-04,-0.177635,-0.233177,-0.114506


In [17]:
dashboards = []
for model_path in models:
  # add code to find models that do not have `.predict_proba()`
  X = None
  y = None
  model = load(model_path)

  if "svc" in model_path:

    def predict_proba(self, X):
        pred = self.predict(X)
        return np.array([1-pred, pred]).T
        
    model.predict_proba = types.MethodType(predict_proba, model)

  if "eda" in model_path:
    if "merged_amu" in model_path:
      X = merged_amu_eda_dict_test_train["X_test"]
      y = merged_amu_eda_dict_test_train["y_test"]
    elif "no_amu" in model_path:
      X = no_amu_eda_dict_test_train["X_test"]
      y = no_amu_eda_dict_test_train["y_test"]
  elif "top_5" in model_path:
    if "merged_amu" in model_path:
      X = merged_amu_top_5_dict_test_train["X_test"]
      y = merged_amu_top_5_dict_test_train["y_test"]
    elif "no_amu" in model_path:
      X = no_amu_top_5_dict_test_train["X_test"]
      y = no_amu_top_5_dict_test_train["y_test"]
  elif "top_10" in model_path:
    if "merged_amu" in model_path:
      X = merged_amu_top_10_dict_test_train["X_test"]
      y = merged_amu_top_10_dict_test_train["y_test"]
    elif "no_amu" in model_path:
      X = no_amu_top_10_dict_test_train["X_test"]
      y = no_amu_top_10_dict_test_train["y_test"]
  db = create_explainer_dashboard(model, X, y, model_path[:-6], model_path[:-6], model_path[:-6])
  dashboards.append(db)

Note: shap values for shap='kernel' normally get calculated against X_background, but paramater X_background=None, so setting X_background=shap.sample(X, 50)...
Generating self.shap_explainer = shap.KernelExplainer(model, X, link='identity')
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
For this type of model and model_output interactions don't work, so setting shap_interaction=False...
The explainer object has no decision_trees property. so setting decision_trees=False...
Generating layout...
Calculating shap values...


  0%|          | 0/440 [00:00<?, ?it/s]

Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Detected RandomForestClassifier model: Changing class type to RandomForestClassifierExplainer...
Note: model_output=='probability', so assuming that raw shap output of RandomForestClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab

  0%|          | 0/523 [00:00<?, ?it/s]

Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Note: shap values for shap='kernel' normally get calculated against X_background, but paramater X_background=None, so setting X_background=shap.sample(X, 50)...
Generating self.shap_explainer = shap.KernelExplainer(model, X, link='identity')
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive 

  0%|          | 0/523 [00:00<?, ?it/s]

Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Note: shap values for shap='kernel' normally get calculated against X_background, but paramater X_background=None, so setting X_background=shap.sample(X, 50)...
Generating self.shap_explainer = shap.KernelExplainer(model, X, link='identity')
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive 

  0%|          | 0/523 [00:00<?, ?it/s]

Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Detected RandomForestClassifier model: Changing class type to RandomForestClassifierExplainer...
Note: model_output=='probability', so assuming that raw shap output of RandomForestClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab

  0%|          | 0/523 [00:00<?, ?it/s]

Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Note: model_output=='probability', so assuming that raw shap output of DecisionTreeClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
The explainer object has no 

  0%|          | 0/440 [00:00<?, ?it/s]

Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Note: shap values for shap='kernel' normally get calculated against X_background, but paramater X_background=None, so setting X_background=shap.sample(X, 50)...
Generating self.shap_explainer = shap.KernelExplainer(model, X, link='identity')
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive 

  0%|          | 0/440 [00:00<?, ?it/s]

Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Note: shap values for shap='kernel' normally get calculated against X_background, but paramater X_background=None, so setting X_background=shap.sample(X, 50)...
Generating self.shap_explainer = shap.KernelExplainer(model, X, link='identity')
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive 

  0%|          | 0/523 [00:00<?, ?it/s]

Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Detected RandomForestClassifier model: Changing class type to RandomForestClassifierExplainer...
Note: model_output=='probability', so assuming that raw shap output of RandomForestClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab

  0%|          | 0/440 [00:00<?, ?it/s]

Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Note: model_output=='probability', so assuming that raw shap output of DecisionTreeClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
The explainer object has no 

  0%|          | 0/523 [00:00<?, ?it/s]

Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Note: model_output=='probability', so assuming that raw shap output of DecisionTreeClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
The explainer object has no 

  0%|          | 0/440 [00:00<?, ?it/s]

Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Note: shap values for shap='kernel' normally get calculated against X_background, but paramater X_background=None, so setting X_background=shap.sample(X, 50)...
Generating self.shap_explainer = shap.KernelExplainer(model, X, link='identity')
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive 

  0%|          | 0/440 [00:00<?, ?it/s]

Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...


In [18]:
hub = ExplainerHub(dashboards, title="Model Comparison",
            description="Showing dashboards for all the models")
hub.run()

Using random SECRET_KEY: daf86b7d-f797-4012-8720-cdeed5aed49f, please set it on your app.config["SECRET_KEY"]


Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
For this type of model and model_output interactions don't work, so setting shap_interaction=False...
The explainer object has no decision_trees property. so setting decision_trees=False...
Generating layout...
Calculating dependencies...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
Generating layout...
Calculating dependencies...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.job

 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8050
 * Running on http://192.168.0.104:8050
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [14/Jan/2023 13:46:15] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [14/Jan/2023 13:46:15] "[33mGET /f%22/static/jquery-3.5.1.slim.min.js%22 HTTP/1.1[0m" 404 -
127.0.0.1 - - [14/Jan/2023 13:46:15] "GET /static/bootstrap.min.css HTTP/1.1" 200 -
127.0.0.1 - - [14/Jan/2023 13:46:15] "[33mGET /f%22/static/bootstrap.min.js%22 HTTP/1.1[0m" 404 -
127.0.0.1 - - [14/Jan/2023 13:46:15] "GET /index/ HTTP/1.1" 200 -
127.0.0.1 - - [14/Jan/2023 13:46:15] "GET /index/assets/bootstrap.min.css?m=1671118704.0240476 HTTP/1.1" 200 -
127.0.0.1 - - [14/Jan/2023 13:46:15] "GET /index/_dash-component-suites/dash/deps/polyfill@7.v2_7_0m1670582575.12.1.min.js HTTP/1.1" 200 -
127.0.0.1 - - [14/Jan/2023 13:46:15] "GET /index/_dash-component-suites/dash_bootstrap_components/_components/dash_bootstrap_components.v1_2_1m1671118703.min.js HTTP/1.1" 200 -
127.0.0.1 -