## Install Required Packages

In [None]:
# %pip install --upgrade raiwidgets
# %pip install --upgrade pandas
# %pip install --upgrade fairlearn
# %pip install --upgrade interpret-community

After installing packages, you must close and reopen the notebook as well as restarting the kernel.

## Model Interpretability

**Model Interpretability** powered by [InterpretML](https://github.com/interpretml/interpret-community), which explains blackbox models, helping users understand their model's global behavior, or the reasons behind individual predictions.

In [None]:
from fairlearn.reductions import GridSearch
from fairlearn.reductions import DemographicParity
from fairlearn.datasets import fetch_adult
from fairlearn.metrics import MetricFrame, selection_rate

from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score

import pandas as pd

# SHAP Tabular Explainer
from interpret.ext.blackbox import MimicExplainer
from interpret.ext.glassbox import LGBMExplainableModel

In [None]:
def load_custom_dataset():
    file_path = "/home/josh/Downloads/AiDashOwnModels/Acidents_dataset_v7.csv"
    dataset = pd.read_csv(file_path)
    return dataset

# Call the function to load dataset
df = load_custom_dataset()

# Check for missing values
if df.isnull().sum().any():
    print("Missing values detected in the dataset.")
else:
    print("No missing values in the dataset.")

# Display first few rows to verify
print(df.head())

# Select target column (label) and features
X_raw, y = df, df['Degree of Injury']

In [None]:
sensitive_features = X_raw[['Nature of Injury', 'Part of Body']]
X_raw = df.drop(columns=['Degree of Injury'])

le = LabelEncoder()
y = le.fit_transform(y)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test, sensitive_features_train, sensitive_features_test = \
    train_test_split(X_raw, y, sensitive_features,
                     test_size = 0.2, random_state=0, stratify=y)

# Work around indexing bug
X_train = X_train.reset_index(drop=True)
sensitive_features_train = sensitive_features_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
sensitive_features_test = sensitive_features_test.reset_index(drop=True)

In [None]:
numeric_transformer = Pipeline(
    steps=[
        ("impute", SimpleImputer(strategy="mean")),   # For numeric columns only
        ("scaler", StandardScaler()),
    ]
)

categorical_transformer = Pipeline(
    steps=[
        ("impute", SimpleImputer(strategy="most_frequent")),  # For categorical columns
        ("ohe", OneHotEncoder(handle_unknown="ignore")),
    ]
)

# Use ColumnTransformer to apply transformations to appropriate column types
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, make_column_selector(dtype_include="number")),
        ("cat", categorical_transformer, make_column_selector(dtype_include="object")),
    ]
)

model = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        (
            "classifier",
            LogisticRegression(solver="liblinear", fit_intercept=True),
        ),
    ]
)

model.fit(X_train, y_train)

In [None]:
# Using SHAP MimicExplainer
# clf.steps[-1][1] returns the trained classification model
explainer = MimicExplainer(model.steps[-1][1], 
                           X_train,
                           LGBMExplainableModel,
                           features=X_raw.columns, 
                           classes=['Rejected', 'Approved'],
                           transformations=preprocessor)

In [None]:
### Note we downsample the test data since visualization dashboard can't handle the full dataset
global_explanation = explainer.explain_global(X_test[:1000])

In [None]:
global_explanation.get_feature_importance_dict()

In [None]:
# You can pass a specific data point or a group of data points to the explain_local function
# E.g., Explain the first data point in the test set
instance_num = 1
local_explanation = explainer.explain_local(X_test[:instance_num])

In [None]:
# Get the prediction for the first member of the test set and explain why model made that prediction
prediction_value = model.predict(X_test)[instance_num]

sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]
sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]

In [None]:
print('local importance values: {}'.format(sorted_local_importance_values))
print('local importance names: {}'.format(sorted_local_importance_names))

In [None]:
from raiwidgets import ExplanationDashboard
ExplanationDashboard(global_explanation, model, dataset=X_test[:1000], true_y=y_test[:1000])

## © Copyright, 2025 Assentian Limited. All Rights Reserved 