In [7]:
import sys
dirname = '/Users/muhammaddaffarobani/Documents/personal_research/smt'
if dirname not in sys.path:
    sys.path.append(dirname)
from smt.utils.design_space import (
    DesignSpace,
    FloatVariable,
    CategoricalVariable,
)
from smt.applications.mixed_integer import (
    MixedIntegerKrigingModel,
)
from smt.surrogate_models import (
    KPLS,
    MixIntKernelType,
    MixHrcKernelType,
)
from sklearn.metrics import mean_squared_error
from smt.explainability_tools._shap_values import *

import time
import numpy as np


In [2]:
"""Problem definition"""
# Design space
ds = DesignSpace([
    CategoricalVariable([
        "1", 
        "2", 
        "3", 
        "4", 
        "5", 
        "6", 
        "7", 
        "8", 
        "9", 
        "10", 
        "11", 
        "12",
    ]),
    FloatVariable(10.0, 20.0),
    FloatVariable(1.0, 2.0),
])


# Objective function
def y(X):
    listI = [
        0.0833,
        0.139,
        0.380,
        0.0796,
        0.133,
        0.363,
        0.0859,
        0.136,
        0.360,
        0.0922,
        0.138,
        0.369,
        ]
    
    I = np.int64(X[0])
    L = X[1]
    S = X[2]
    Ival = listI[I]

    E = 200e9
    P = 50e3

    y = (P * L**3) / (3 * E * S**2 * Ival)

    return y

f_obj = y

# Create training and testing data
n_data = 200
n_train = int(0.8 * n_data)
n_test = n_data - n_train
xdoe, _ = ds.sample_valid_x(n_data)
y_doe = [f_obj(xdoe[i]) for i in range(len(xdoe))]

X_tr, y_tr = xdoe[:n_train, :], y_doe[:n_train]
X_te, y_te = xdoe[n_train:, :], y_doe[n_train:]
y_tr, y_te = np.array(y_tr), np.array(y_te)
inverse_categories_map = {0: {i: f"{i+1}" for i in range(12)}}

"""Modelling"""
sm = MixedIntegerKrigingModel(
    surrogate=KPLS(
        design_space=ds,
        categorical_kernel=MixIntKernelType.HOMO_HSPHERE,
        hierarchical_kernel=MixHrcKernelType.ARC_KERNEL,
        theta0=np.array([4.43799547e-04, 4.39993134e-01, 1.59631650e+00]),
        corr="squar_exp",
        n_start=1,
        cat_kernel_comps=[2],
        n_comp=2,
        print_global=False,
        ),
    )
start_time = time.time()
sm.set_training_values(X_tr, np.array(y_tr))
sm.train()
print("run time (s):", time.time() - start_time)
y_pred = sm.predict_values(X_te)
rmse = mean_squared_error(y_te, y_pred, squared=False)
rrmse = rmse / y_te.mean()
print(f"RMSE: {rmse:.4f}")
print(f"rRMSE: {rrmse:.4f}")


run time (s): 20.96684718132019
RMSE: 0.0001
rRMSE: 0.0639


In [11]:
X_tr.mean(axis=0)

array([ 5.2875    , 15.02385348,  1.51153966])

In [28]:
from scipy import stats

def compute_shap_values(
        mask,
        s_full,
        weights,
        reference_values,
        model,
):
    y = model.predict_values(s_full)
    b0 = model.predict_values(reference_values.reshape(1, -1))
    y = y - b0

    w = np.diag(weights)

    b = np.dot(
        np.linalg.inv(np.dot(np.dot(mask.transpose(), w), mask)),
        np.dot(np.dot(mask.transpose(), w), y)
    )
    b = b.reshape(-1, )
    return b

def individual_shap_values(
	instances, 
	model, 
	x, 
	is_categorical,
):
    reference_values = get_reference_feature_values(
	x, 
	is_categorical,
    )
    shap_values = list()

    for instance in instances:
        instance = instance.reshape(1, -1)
        mask = create_mask_array(instance.shape[1])
        s_with_zero = mask * instance
        s_full = (s_with_zero == 0) * reference_values + s_with_zero

        weights = np.apply_along_axis(
            calculate_weight,
            1,
            mask,
        )
        shap_value = compute_shap_values(
            mask,
            s_full,
            weights,
            reference_values,
            model,
        )
        shap_values.append(shap_value)
    shap_values = np.array(shap_values)
    return shap_values


def get_reference_feature_values(
	x, 
	is_categorical,
):
	# get reference values for each feature
	# if the feature is categorical/ordinal -> mode
	# else -> mean
	num_features = x.shape[1]
	reference_values = np.zeros(num_features)
	for f in range(num_features):
		if is_categorical[f] == 1:
			mode = stats.mode(x[:, f], keepdims=False)[0]
			reference_values[f] = mode
		else:
			mean = np.mean(x[:, f])
			reference_values[f] = mean
	return reference_values

In [29]:
instances = X_te[0:1, :]
x = X_tr
model = sm
in_shap_values = individual_shap_values(
    instances, 
    model, 
    x, 
    is_categorical=[1, 0, 0],
)


In [30]:
in_shap_values

array([[ 0.00060626, -0.00019934,  0.00030898]])