## Calculate SHAP interaction values of Gaussian model through SHAPIQ package

#### ref: Muschalik, M. et al. shapiq: Shapley Interactions for Machine Learning. In Proc. of the 38th International Conference on Neural Information Processing Systems. 4141, 130324-130357 (2024).

In [None]:
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, ConstantKernel
from shapiq import TabularExplainer
import matplotlib.pyplot as plt

# Data preparation
df_rese2 = pd.read_excel('Dataset_bayesian.xlsx', sheet_name='initial')
df_rese2.columns = ['number','T_Re', 'T_Se', 'c_Re', 'f_H2', 'Sub', 'Fractal']
df_rese2.iloc[:, -1] = df_rese2.iloc[:, -1] * 1

# Normalization function
def x_normalizer(X):
    def max_min_scaler(x, x_max, x_min):
        return (x - x_min) / (x_max - x_min)
    x_norm = []
    var_array = X.T  # Variables by column
    for x in X:
        x_norm.append([
            max_min_scaler(x[i], np.max(var_array[i]), np.min(var_array[i]))
            for i in range(len(x))
        ])
    return np.array(x_norm)

def y_normalizer(y_2d):
    return y_2d - 1

# Data preprocessing
x_init = x_normalizer(df_rese2.iloc[:, 1:6].values)
y_init = y_normalizer(np.transpose([df_rese2.iloc[:, -1].values]))
X, Y = x_init, y_init.flatten()
feature_names = ['T_Re', 'T_Se', 'c_Re', 'f_H2', 'Sub']

# Fit GPR model
kernel = ConstantKernel(constant_value=1.0, constant_value_bounds=(1, 1000)) * \
         Matern(length_scale=np.ones(X.shape[1]), 
                length_scale_bounds=[(1, 10)] * X.shape[1],
                nu=1.5)

model_sklearn = GaussianProcessRegressor(
    kernel=kernel,
    alpha=0.05**2,
    n_restarts_optimizer=20,
    random_state=42
)

model_sklearn.fit(X, Y)
print(f"Train R2: {model_sklearn.score(X, Y):.4f}")

# Create SHAPIQ TabularExplainer
explainer_tabular = TabularExplainer(
    model=model_sklearn.predict,
    data=X,
    index="SII",          # Shapley Interaction Index
    # model_type="regression",
    # feature_names=feature_names,
    max_order=2           # Supports pairwise interactions
)

# Select a test sample for explanation (can be changed to any desired index)
x_target = X[10]  # Explain the 11th sample

# Set budget = 2^d for full enumeration (can use smaller value for approximation)
interaction_values = explainer_tabular.explain(
    x=x_target,
    budget=2**X.shape[1],   
    random_state=0
)

# Output interaction values as dictionary
print(X.shape[1])
print("Interaction values dictionary (including first and second order):")
print(interaction_values.dict_values)

# Extract only second-order interactions
print("\nSecond-order interaction values:")
interaction_2nd_order = interaction_values.get_n_order(2).dict_values
print(interaction_2nd_order)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Convert second-order interactions into matrix form
interaction_matrix = np.zeros((X.shape[1], X.shape[1]))
for (i, j), value in interaction_2nd_order.items():
    interaction_matrix[i, j] = value
    interaction_matrix[j, i] = value  # Symmetric

df_matrix = pd.DataFrame(interaction_matrix, columns=feature_names, index=feature_names)

plt.figure(figsize=(8, 6))
sns.heatmap(df_matrix, annot=True, cmap="YlOrRd", fmt=".4f")
plt.title("SHAPIQ Pairwise Feature Interactions")
plt.show()


In [None]:
from collections import defaultdict
import tqdm

# Store accumulated values and counts
interaction_sums = defaultdict(float)
interaction_counts = defaultdict(int)

# Iterate over all samples (can be changed to a subset for faster computation)
for i in tqdm.tqdm(range(X.shape[0]), desc="Computing interaction effects for each sample"):
    x_i = X[i]
    values = explainer_tabular.explain(
        x=x_i,
        budget=2**X.shape[1],  # Can be reduced (e.g., 64) for speed
        random_state=0
    ).get_n_order(2).dict_values

    for pair, val in values.items():
        pair_sorted = tuple(sorted(pair))  # Ensure consistent order (i,j)
        interaction_sums[pair_sorted] += abs(val)  # Accumulate absolute interaction values
        interaction_counts[pair_sorted] += 1

# Compute average absolute interaction effects
avg_interactions = {
    pair: interaction_sums[pair] / interaction_counts[pair]
    for pair in interaction_sums
}

# Initialize empty matrix
n_features = X.shape[1]
interaction_matrix_avg = np.zeros((n_features, n_features))

# Fill in average values (symmetric)
for (i, j), value in avg_interactions.items():
    interaction_matrix_avg[i, j] = value
    interaction_matrix_avg[j, i] = value

# Convert to DataFrame for easy visualization
df_interaction_avg = pd.DataFrame(
    interaction_matrix_avg,
    index=feature_names,
    columns=feature_names
)

print("Average absolute interaction matrix:")
print(df_interaction_avg.round(4))

### Caulcate the main SHAP value

In [None]:
main_effect_sums = defaultdict(float)
main_effect_counts = defaultdict(int)

for i in tqdm.tqdm(range(X.shape[0]), desc="Computing main effects for each sample"):
    x_i = X[i]
    values = explainer_tabular.explain(
        x=x_i,
        budget=2**X.shape[1],
        random_state=0
    ).get_n_order(1).dict_values

    for index, val in values.items():
        feature_index = index[0]                   # Fix: extract the integer from the tuple
        feature_name = feature_names[feature_index]
        main_effect_sums[feature_name] += abs(val)
        main_effect_counts[feature_name] += 1

# Average main effects
avg_main_effects = {
    fname: main_effect_sums[fname] / main_effect_counts[fname]
    for fname in main_effect_sums
}

main_effect_series = pd.Series(avg_main_effects).reindex(feature_names)

print("Average main effect values (per feature):")
print(main_effect_series.round(4))

main_effect_series.sort_values(ascending=False).plot.bar(
    figsize=(8, 5),
    color='skyblue',
    title='Average Absolute Main Effects (SHAPIQ)'
)
plt.ylabel("Main Effect Value")
plt.tight_layout()
plt.show()

In [None]:
# Initialize an n x n zero matrix
n_features = X.shape[1]
combined_matrix = np.zeros((n_features, n_features))

# Fill in main effects (diagonal)
for i, fname in enumerate(feature_names):
    combined_matrix[i, i] = main_effect_series[fname]

# Fill in interaction effects (off-diagonal)
for (i, j), val in avg_interactions.items():
    combined_matrix[i, j] = val
    combined_matrix[j, i] = val  # Symmetric filling

import seaborn as sns
import matplotlib.pyplot as plt

# Construct DataFrame
df_combined = pd.DataFrame(
    combined_matrix,
    index=feature_names,
    columns=feature_names
)

# Plot heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(
    df_combined,
    annot=True,
    fmt=".4f",
    cmap="YlGnBu",
    square=True,
    cbar_kws={"label": "Effect Strength"}
)
plt.title("Main Effects (Diagonal) + Interaction Effects (Off-Diagonal)")
plt.tight_layout()
plt.show()

print(df_combined)