# 1. Regression

## 1.1. Load Data and Train Model

# SHAP Metrics

In [None]:
#from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from holisticai.datasets import load_dataset
from sklearn.metrics import mean_squared_error

dataset = load_dataset('us_crime')
dataset = dataset.train_test_split(test_size=1000, random_state=42)
train = dataset['train']
test = dataset['test']

model = DecisionTreeRegressor()
model.fit(train['X'], train['y'])

mean_squared_error(test['y'], model.predict(test['X']))

# Create a Custom Feature Importance Calculator

In [None]:
from holisticai.utils import RegressionProxy
from holisticai.utils.feature_importances import compute_shap_feature_importance
from holisticai.utils.inspection import compute_partial_dependence

X = test['X']
proxy = RegressionProxy(predict=model.predict)

local_importances = compute_shap_feature_importance(X=X, proxy=proxy)
local_conditional_importances = local_importances.conditional()

importances = local_importances.to_global()
conditional_importances = local_conditional_importances.to_global()

ranked_importances = importances.top_alpha(0.8)

partial_dependencies = compute_partial_dependence(X, features=ranked_importances.feature_names, proxy=proxy)

y_pred = proxy.predict(X)

In [None]:
from holisticai.explainability.metrics import regression_explainability_metrics

regression_explainability_metrics(importances=importances, 
                                partial_dependencies=partial_dependencies, 
                                conditional_importances=conditional_importances, 
                                local_importances=local_importances)

In [None]:
from holisticai.explainability.plots import plot_feature_importance
import matplotlib.pyplot as plt

fig,ax = plt.subplots(1,1, figsize=(5,10))
plot_feature_importance(importances, top_n=50, ax=ax)

In [None]:
from holisticai.explainability.plots import plot_partial_dependence

plot_partial_dependence(partial_dependencies, ranked_importances, subplots=(4,3), figsize=(8, 8))

In [None]:
from holisticai.explainability.plots import plot_local_importance_distribution

plot_local_importance_distribution(local_importances)
plt.show()

In [None]:
from holisticai.explainability.plots import plot_predictions_vs_interpretability

plot_predictions_vs_interpretability(y_pred, local_importances)
plt.show()

# 3. Classification

In [None]:
from holisticai.utils import BinaryClassificationProxy
from holisticai.utils.feature_importances import compute_shap_feature_importance
from holisticai.utils.inspection import compute_partial_dependence
from numpy.random import RandomState
from sklearn.linear_model import LogisticRegression
from holisticai.datasets import load_dataset
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
dataset = load_dataset(dataset_name="adult")
dataset = dataset.train_test_split(test_size=2000, random_state=42, stratify=dataset['y'])
train = dataset['train']
test = dataset['test']

model = LogisticRegression()
model.fit(train['X'], train['y'])

accuracy_score(test['y'], model.predict(test['X']))

In [None]:
X = test['X']
proxy = BinaryClassificationProxy(predict=model.predict, predict_proba=model.predict_proba, classes=model.classes_)

local_importances = compute_shap_feature_importance(X=X, proxy=proxy)
local_conditional_importances = local_importances.conditional()
importances = local_importances.to_global()
conditional_importances = local_conditional_importances.to_global()

ranked_importances = importances.top_alpha(0.8)

partial_dependencies = compute_partial_dependence(train['X'], features=ranked_importances.feature_names, proxy=proxy)

y_pred = proxy.predict(X)
y_score = proxy.predict_proba(X)[:,1]

In [None]:
from holisticai.explainability.metrics import classification_explainability_metrics

classification_explainability_metrics(importances, partial_dependencies, conditional_importances, local_importances=local_importances)

In [None]:
plot_local_importance_distribution(local_importances)
plt.show()

In [None]:
plot_predictions_vs_interpretability(y_score, local_importances)
plt.show()

In [None]:
from holisticai.explainability.plots import plot_feature_importance

plot_feature_importance(importances, top_n=8)

In [None]:
from holisticai.explainability.plots import plot_partial_dependence

class_index = 0
plot_partial_dependence(partial_dependencies, ranked_importances, class_idx=class_index)

# Multi Classification

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

from holisticai.datasets import load_dataset
from sklearn.metrics import accuracy_score

dataset = load_dataset(dataset_name="student_multiclass")
dataset = dataset.train_test_split(test_size=200, random_state=42, stratify=dataset['y'])   
train = dataset['train']
test = dataset['test']

model = GradientBoostingClassifier()
model.fit(train['X'], train['y'])

accuracy_score(test['y'], model.predict(test['X']))

In [None]:
from holisticai.utils import MultiClassificationProxy
from holisticai.utils.feature_importances import compute_shap_feature_importance
from holisticai.utils.inspection import compute_partial_dependence

proxy = MultiClassificationProxy(predict=model.predict, predict_proba=model.predict_proba, classes=model.classes_)

local_importances = compute_shap_feature_importance(X=train['X'], y=train['y'], proxy=proxy, max_samples=200)
local_conditional_importances = local_importances.conditional()

importances = local_importances.to_global()
conditional_importances = local_conditional_importances.to_global()

ranked_importances = importances.top_alpha(0.8)

partial_dependencies = compute_partial_dependence(test['X'], features=ranked_importances.feature_names, proxy=proxy)

y_pred = proxy.predict(test['X'])

In [None]:
from holisticai.explainability.metrics import multiclass_explainability_metrics

multiclass_explainability_metrics(importances, partial_dependencies, conditional_importances, test['X'], y_pred, local_importances)

In [None]:
from holisticai.explainability.plots import plot_feature_importance

plot_feature_importance(importances, top_n=30)

In [None]:
from holisticai.explainability.plots import plot_partial_dependence

class_idx = 0 #1
plot_partial_dependence(partial_dependencies, ranked_importances, subplots=(3,4), figsize=(10,8), class_idx=class_idx)