<a href="https://colab.research.google.com/github/VondracekS/ExplainabilityExchange/blob/master/ExplainerDashboardDemo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Explainer Dashboard Demo - penguin dataset

In [1]:
pip install explainerdashboard

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting explainerdashboard
  Downloading explainerdashboard-0.4.2.1-py3-none-any.whl (286 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m286.9/286.9 KB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting jupyter-dash>=0.4.1
  Downloading jupyter_dash-0.4.2-py3-none-any.whl (23 kB)
Collecting oyaml
  Downloading oyaml-1.0-py2.py3-none-any.whl (3.0 kB)
Collecting dash-auth
  Downloading dash_auth-2.0.0-py3-none-any.whl (3.4 kB)
Collecting flask-simplelogin
  Downloading flask_simplelogin-0.1.1-py3-none-any.whl (7.2 kB)
Collecting shap>=0.37
  Downloading shap-0.41.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (572 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m572.4/572.4 KB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
Collecting waitress
  Downloading waitress-2.1.2-py3-none-any.whl (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import pandas as pd
import plotly.express as px
from explainerdashboard import ExplainerDashboard, ClassifierExplainer
from seaborn import load_dataset

In [27]:
from sklearn.model_selection import train_test_split
data = load_dataset("penguins")
data_train, data_test = train_test_split(data)

In [29]:
for elem in [data_train, data_test]:
  print(elem.shape)

(258, 7)
(86, 7)


In [7]:
data_train.dtypes

species               object
island                object
bill_length_mm       float64
bill_depth_mm        float64
flipper_length_mm    float64
body_mass_g          float64
sex                   object
dtype: object

In [30]:
fig = px.scatter_matrix(data_train, color='species', template='ggplot2')
fig.update_layout(autosize=False, width=1500, height=1000, title = "Penguins Scatter Matrix plot")

In [31]:
data_train.isna().sum()

species              0
island               0
bill_length_mm       2
bill_depth_mm        2
flipper_length_mm    2
body_mass_g          2
sex                  7
dtype: int64

In [34]:
data_train.dtypes

species               object
island                object
bill_length_mm       float64
bill_depth_mm        float64
flipper_length_mm    float64
body_mass_g          float64
sex                   object
dtype: object

In [37]:
[elem for elem in data_train.columns if data_train[elem].dtype == 'object']
[elem for elem in data_train.columns if data_train[elem].dtype != 'object']

['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']

In [53]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer

cat_features = [col for col in data_train.columns if data_train[col].dtype == 'object']
num_features = [col for col in data_train.columns if data_train[col].dtype != 'object']

numeric_transformer = Pipeline(
    steps=[('imputer', SimpleImputer(strategy='mean')),
           ('scaler', StandardScaler())
])
categorical_transformer = Pipeline(
    steps=[
        ('imputer', SimpleImputer(strategy='constant')),
        ('encoder', OrdinalEncoder())]
)
preprocessor = ColumnTransformer(
    transformers=[
        ('numeric', numeric_transformer, num_features),
        ('categorical', categorical_transformer, cat_features)
    ]
)

pipeline = Pipeline(
    steps=[
        ('preprocessor', preprocessor),
        ('scaler', StandardScaler()), 
        ('encoder', OrdinalEncoder()),
        ('imputer', SimpleImputer(strategy="median"))
    ]
)

pd.DataFrame(pipeline.fit_transform(data_train, data_test), columns=pipeline.get_feature_names_out())

# pipe = Pipeline(steps=[('scaler', StandardScaler()), ('encoder', OrdinalEncoder())
#                        ('imputer', SimpleImputer(strategy="median"))])
# pipe.fit_transform(data_train)

Unnamed: 0,numeric__bill_length_mm,numeric__bill_depth_mm,numeric__flipper_length_mm,numeric__body_mass_g,categorical__species,categorical__island,categorical__sex
0,131.0,57.0,18.0,24.0,1.0,1.0,1.0
1,41.0,54.0,9.0,22.0,0.0,0.0,1.0
2,40.0,53.0,15.0,51.0,0.0,1.0,1.0
3,135.0,60.0,31.0,33.0,1.0,1.0,1.0
4,47.0,55.0,24.0,35.0,0.0,2.0,1.0
...,...,...,...,...,...,...,...
253,47.0,38.0,12.0,19.0,0.0,1.0,1.0
254,19.0,49.0,9.0,19.0,0.0,1.0,0.0
255,3.0,46.0,18.0,19.0,0.0,2.0,2.0
256,110.0,60.0,35.0,33.0,1.0,1.0,1.0


In [51]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier



array(['numeric__bill_length_mm', 'numeric__bill_depth_mm',
       'numeric__flipper_length_mm', 'numeric__body_mass_g',
       'categorical__species', 'categorical__island', 'categorical__sex'],
      dtype=object)