In [None]:
import os
os.environ['DYLD_LIBRARY_PATH'] = '/opt/homebrew/opt/libomp/lib:' + os.environ.get('DYLD_LIBRARY_PATH', '')
import gpboost

In [None]:
# %% 
# ENV‑HACK gegen OpenMP‑Crashes
import os, warnings

# Erlaube doppelt geladene OpenMP‑Bibliotheken
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
# Setze OMP-Threads auf 1, um Race-Conditions zu vermeiden
os.environ["OMP_NUM_THREADS"] = "1"

# Unterdrücke Tqdm‑Warnings
try:
    from tqdm.auto import TqdmWarning
    warnings.filterwarnings("ignore", category=TqdmWarning)
except ImportError:
    pass



# %%
# mWER Mixed-Effects Modeling with GPBoost
import pandas as pd
import gpboost as gbm
import shap
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Baseline ausgewähl: recapp; -35dBFS, trafficoutside 

In [None]:
# 1) Load data
df = pd.read_csv("transcripts_wer_mwer_phrase.csv")

# 2) Build design matrix X for Fixed Effects with specified baselines
# Define baselines
baseline_system = 'recapp–gsw-CH_smoothed'
baseline_volume = '-35dBFS'
baseline_ambient = 'trafficOutside'

# Create ordered categoricals
sys_full = df['technology'] + '–' + df['model']
df['system'] = pd.Categorical(
    sys_full,
    categories=[baseline_system] + [s for s in sorted(sys_full.unique()) if s != baseline_system]
)

df['processedVolume'] = pd.Categorical(
    df['processedVolume'],
    categories=[baseline_volume] + [v for v in sorted(df['processedVolume'].unique()) if v != baseline_volume]
)

df['ambientVariant'] = pd.Categorical(
    df['ambientVariant'],
    categories=[baseline_ambient] + [a for a in sorted(df['ambientVariant'].unique()) if a != baseline_ambient]
)

# Generate dummies (drop_first ensures baselines are reference)
X = pd.get_dummies(
    df[['system', 'ambientVariant', 'processedVolume']],
    drop_first=True
)
y = df['mwer'].values

# 3) Prepare cluster IDs for Random Effects
group_ids = df['convoID'].astype('category').cat.codes.values

# 4) Initialize GPModel with Random Intercepts on convoID
gp_model = gbm.GPModel(
    group_data=group_ids,
    likelihood='gaussian'
)

# 5) Create Dataset and train GBRT+GP model
train_data = gbm.Dataset(
    data=X.values,
    label=y
)
params = {
    'learning_rate': 0.1,
    'max_depth': 6,
    'objective': 'regression_l2',
    'verbose': 0
}
num_round = 200
booster = gbm.train(
    params=params,
    train_set=train_data,
    gp_model=gp_model,
    num_boost_round=num_round
)

# 6) Extract Fixed and Random Effects via pred_latent
preds = booster.predict(
    data=X.values,
    group_data_pred=group_ids,
    pred_latent=True
)
# Extract fixed and random effects
fixed_effects = np.array(preds['fixed_effect'])
random_effects = np.array(preds['random_effect_mean'])

# 7) Aggregate random effects per convoID
df_re = pd.DataFrame({
    'convoID': df['convoID'],
    'random_effect': random_effects
})
cluster_effects = (
    df_re
    .groupby('convoID')['random_effect']
    .mean()
)
print('=== Estimated Random Effects per convoID ===')
print(cluster_effects.head())

# 8) Feature Importances (Gain) for Fixed Effects
fi = booster.feature_importance(importance_type='gain')
feat_imp = (
    pd.DataFrame({
        'feature': X.columns,
        'gain_importance': fi
    })
    .sort_values('gain_importance', ascending=False)
)
print('\n=== Feature Importances (Gain) ===')
print(feat_imp)

# 9) SHAP Analysis for deeper interpretation
explainer = shap.TreeExplainer(booster)
shap_values = explainer.shap_values(X)

# 9a) Global feature impact (nachträgliche Punkte-Skalierung)
# Generate an Explanation object
exp = explainer(X)
# Plot beeswarm from Explanation
ax = shap.plots.beeswarm(
    exp,
    max_display=20,
    show=False
)
# After drawing, reduce point sizes on the axis
for coll in ax.collections:
    coll.set_sizes([1])
plt.show()

In [None]:
import matplotlib.pyplot as plt

# aus deinem feat_imp DataFrame
feat_imp_sorted = feat_imp.copy()
feat_imp_sorted['gain_importance'] /= feat_imp_sorted['gain_importance'].sum()  # normieren auf Anteile

plt.figure(figsize=(8,6))
plt.barh(
    feat_imp_sorted['feature'],
    feat_imp_sorted['gain_importance']
)
plt.xlabel("Anteil der Gesamt-Gain-Importance")
plt.title("Globale Feature-Wichtigkeit (Gain-Anteil)")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import pandas as pd

# 1) Hole alle Vorhersagen auf dem Test-Set
preds = booster.predict(data=X_test, ignore_gp_model=True)

directions = []
for i, feat in enumerate(X.columns):
    # Indexe, wo das Dummy-Feature = 1 bzw. = 0 ist
    idx1 = X_test[:, i] == 1
    idx0 = X_test[:, i] == 0
    # Mittelwerte der Vorhersage
    mu1 = preds[idx1].mean() if idx1.sum()>0 else np.nan
    mu0 = preds[idx0].mean() if idx0.sum()>0 else np.nan
    # Differenz = Effekt-Richtung
    directions.append(mu1 - mu0)

dir_df = pd.DataFrame({
    "feature": X.columns,
    "mean_pred_diff": directions
})

# Kombiniere mit Permutation Importance
res = (pd.merge(imp_df, dir_df, on="feature")
         .sort_values("perm_importance", ascending=False))

print(res.head(15))





In [None]:
shap.dependence_plot(
    'ambientVariant_insideCrowded', 
    shap_values, 
    X, 
    interaction_index='system_whisper–turbo',  # färbt nach Einsatz von whisper–turbo
    show=False
)
plt.show()

# Ohne Baseline

In [None]:
# %%
# mWER Mixed-Effects Modeling with GPBoost
import pandas as pd
import gpboost as gbm
import shap
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# 1) Load data
df = pd.read_csv("transcripts_wer_mwer_phrase.csv")

# 2) Build design matrix X for Fixed Effects (no baselines)
# Create ordered categoricals without setting baselines
sys_full = df['technology'] + '–' + df['model']
df['system'] = pd.Categorical(
    sys_full,
    categories=sorted(sys_full.unique())  # No baseline
)

df['processedVolume'] = pd.Categorical(
    df['processedVolume'],
    categories=sorted(df['processedVolume'].unique())  # No baseline
)

df['ambientVariant'] = pd.Categorical(
    df['ambientVariant'],
    categories=sorted(df['ambientVariant'].unique())  # No baseline
)

# Generate dummies (no drop_first to include all variables)
X = pd.get_dummies(
    df[['system', 'ambientVariant', 'processedVolume']],
    drop_first=False  # Include all variables (no baseline)
)
y = df['mwer'].values

# 3) Prepare cluster IDs for Random Effects
group_ids = df['convoID'].astype('category').cat.codes.values

# 4) Initialize GPModel with Random Intercepts on convoID
gp_model = gbm.GPModel(
    group_data=group_ids,
    likelihood='gaussian',
    matrix_inversion_method="cholesky"
)

# 5) Create Dataset and train GBRT+GP model
train_data = gbm.Dataset(
    data=X.values,
    label=y
)
params = {
    'learning_rate': 0.1,
    'max_depth': 6,
    'objective': 'regression_l2',
    'verbose': 0
}
num_round = 200
booster = gbm.train(
    params=params,
    train_set=train_data,
    gp_model=gp_model,
    num_boost_round=num_round
)

# 6) Extract Fixed and Random Effects via pred_latent
preds = booster.predict(
    data=X.values,
    group_data_pred=group_ids,
    pred_latent=True
)
# Extract fixed and random effects
fixed_effects = np.array(preds['fixed_effect'])
random_effects = np.array(preds['random_effect_mean'])

# 7) Aggregate random effects per convoID
df_re = pd.DataFrame({
    'convoID': df['convoID'],
    'random_effect': random_effects
})
cluster_effects = (
    df_re
    .groupby('convoID')['random_effect']
    .mean()
)
print('=== Estimated Random Effects per convoID ===')
print(cluster_effects.head())

# 8) Feature Importances (Gain) for Fixed Effects
fi = booster.feature_importance(importance_type='gain')
feat_imp = (
    pd.DataFrame({
        'feature': X.columns,
        'gain_importance': fi
    })
    .sort_values('gain_importance', ascending=False)
)
print('\n=== Feature Importances (Gain) ===')
print(feat_imp)


import matplotlib.pyplot as plt
feat_imp_sorted = feat_imp.copy()
feat_imp_sorted['gain_importance'] /= feat_imp_sorted['gain_importance'].sum()
plt.figure(figsize=(8,6))
plt.barh(
    feat_imp_sorted['feature'],
    feat_imp_sorted['gain_importance'],
)
plt.xlabel("Anteil der Gesamt-Gain-Importance")
plt.title("Globale Feature-Wichtigkeit (Gain-Anteil)")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()


# 9) SHAP Analysis for deeper interpretation
explainer = shap.TreeExplainer(booster)
shap_values = explainer.shap_values(X)

# 9a) Global feature impact (nachträgliche Punkte-Skalierung)
# Generate an Explanation object
exp = explainer(X)
# Plot beeswarm from Explanation
ax = shap.plots.beeswarm(
    exp,
    max_display=20,
    show=False
)
# After drawing, reduce point sizes on the axis
for coll in ax.collections:
    coll.set_sizes([1])
plt.show()


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GroupShuffleSplit
from sklearn.metrics import mean_squared_error

# 1) Cluster-sicheren Test-Split
gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
_, test_idx = next(gss.split(X, y, groups=group_ids))
X_test = X.values[test_idx]
y_test = y[test_idx]

# 2) Basis-MSE auf Test-Set
def gbm_predict(arr):
    return booster.predict(data=arr, ignore_gp_model=True)

base_mse = mean_squared_error(y_test, gbm_predict(X_test))

# 3) Permutation Importance (ohne Baseline-Dummies)
baseline_cols = [
    'system_recapp–gsw-CH_smoothed',
    'ambientVariant_trafficOutside',
    'processedVolume_-35dBFS'
]
perm_features = [f for f in X.columns if f not in baseline_cols]

perm_imp = []
for feat in perm_features:
    idx = X.columns.get_loc(feat)
    Xp = X_test.copy()
    np.random.shuffle(Xp[:, idx])
    mse = mean_squared_error(y_test, gbm_predict(Xp))
    perm_imp.append(mse - base_mse)

perm_df = pd.DataFrame({
    'feature': perm_features,
    'perm_importance': perm_imp
})

# 4) mean_pred_diff für alle Features (inkl. Baselines)
preds = gbm_predict(X_test)
dirs = []
for i, feat in enumerate(X.columns):
    idx1 = X_test[:, i] == 1
    idx0 = X_test[:, i] == 0
    mu1 = preds[idx1].mean() if idx1.sum()>0 else np.nan
    mu0 = preds[idx0].mean() if idx0.sum()>0 else np.nan
    dirs.append(mu1 - mu0)

dir_df = pd.DataFrame({
    'feature': X.columns,
    'mean_pred_diff': dirs
})

# 5) Zusammenführen und sortieren
res = (
    perm_df
    .merge(dir_df, on='feature')
    .sort_values('perm_importance', ascending=False)
)

print(res.head(15))



In [None]:

# Das kann nicht sein rescuespeech macht weniger mean_pred_diff

# GPBOOST mit dev test set

In [None]:
import os
os.environ['DYLD_LIBRARY_PATH'] = '/opt/homebrew/opt/libomp/lib:' + os.environ.get('DYLD_LIBRARY_PATH', '')
import gpboost

In [None]:
# %% 
# Suppress the IProgress/Tqdm warning in Jupyter
import warnings
try:
    from tqdm.auto import TqdmWarning
    warnings.filterwarnings("ignore", category=TqdmWarning)
except ImportError:
    pass

# %% 
# mWER Mixed-Effects Modeling with GPBoost
import pandas as pd
import gpboost as gbm
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import GroupShuffleSplit
from sklearn.metrics import mean_squared_error, r2_score

# 1) Load data
df = pd.read_csv("transcripts_wer_mwer_phrase.csv")

# 2) Build design matrix X for Fixed Effects (no baselines)
sys_full = df['technology'] + '–' + df['model']
df['system'] = pd.Categorical(sys_full, categories=sorted(sys_full.unique()))
df['processedVolume'] = pd.Categorical(df['processedVolume'], categories=sorted(df['processedVolume'].unique()))
df['ambientVariant'] = pd.Categorical(df['ambientVariant'], categories=sorted(df['ambientVariant'].unique()))

X = pd.get_dummies(df[['system', 'ambientVariant', 'processedVolume']], drop_first=False)
y = df['mwer'].values

# 3) Prepare cluster IDs for Random Effects
group_ids = df['convoID'].astype('category').cat.codes.values

# 4) Split the data into training and testing sets, keeping groups intact
splitter = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, test_idx = next(splitter.split(X, y, groups=group_ids))
X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
y_train, y_test = y[train_idx], y[test_idx]
group_ids_train, group_ids_test = group_ids[train_idx], group_ids[test_idx]

# 5) Initialize GPModel with Random Intercepts on convoID
gp_model = gbm.GPModel(
    group_data=group_ids_train,
    likelihood="gaussian",
    matrix_inversion_method="cholesky"   # <— hier hinzufügen
)

# 6) Create Dataset and train GBRT+GP model
train_data = gbm.Dataset(data=X_train.values, label=y_train)
params = {
    'learning_rate': 0.1,
    'max_depth': 6,
    'objective': 'regression_l2',
    'verbose': 0
}
num_round = 200
booster = gbm.train(params=params, train_set=train_data, gp_model=gp_model, num_boost_round=num_round)

# 7) Make predictions on the test set (inkl. Random Effects)
y_pred_dict = booster.predict(X_test.values, group_data_pred=group_ids_test)

# 8) Extract the combined predictions
y_pred_values = y_pred_dict['response_mean']

# 9) Compute evaluation metrics
mse  = mean_squared_error(y_test, y_pred_values)
rmse = np.sqrt(mse)
r2   = r2_score(y_test, y_pred_values)

# 10) Print the results
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R² Score: {r2:.4f}")


In [None]:
import pandas as pd
import gpboost as gbm
import numpy as np
from sklearn.model_selection import GroupKFold
from sklearn.metrics import mean_squared_error

# 1) Daten vorbereiten (wie gehabt)
df = pd.read_csv("transcripts_wer_mwer_phrase.csv")
sys_full = df['technology'] + '–' + df['model']
df['system'] = pd.Categorical(sys_full, categories=sorted(sys_full.unique()))
df['processedVolume'] = pd.Categorical(df['processedVolume'], categories=sorted(df['processedVolume'].unique()))
df['ambientVariant'] = pd.Categorical(df['ambientVariant'], categories=sorted(df['ambientVariant'].unique()))
X = pd.get_dummies(df[['system', 'ambientVariant', 'processedVolume']], drop_first=False)
y = df['mwer'].values
groups = df['convoID'].astype('category').cat.codes.values

# 2) CV-Setup
gkf = GroupKFold(n_splits=5)

# 3) Parametergrid
param_grid = {
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth':       [4, 6, 8],
    'num_round':       [100, 200, 300],
}

best_params = None
best_score  = np.inf  # wir wollen RMSE minimieren

# 4) Grid-Search
for lr in param_grid['learning_rate']:
    for md in param_grid['max_depth']:
        for nr in param_grid['num_round']:
            rmses = []
            # Für jeden Fold neu trainieren und evaluieren
            for train_idx, val_idx in gkf.split(X, y, groups):
                X_tr, X_val = X.values[train_idx], X.values[val_idx]
                y_tr, y_val = y[train_idx], y[val_idx]
                grp_tr        = groups[train_idx]
                grp_val       = groups[val_idx]
                
                # GPModel mit Trainings-Gruppen
                gp = gbm.GPModel(group_data=grp_tr, likelihood='gaussian')
                dtrain = gbm.Dataset(data=X_tr, label=y_tr)
                
                booster = gbm.train(
                    params={'learning_rate': lr, 'max_depth': md, 'objective': 'regression_l2', 'verbose': 0},
                    train_set=dtrain,
                    gp_model=gp,
                    num_boost_round=nr
                )
                
                # Vorhersage
                pred = booster.predict(X_val, group_data_pred=grp_val)
                y_pred = pred['response_mean']
                
                rmses.append(np.sqrt(mean_squared_error(y_val, y_pred)))
            
            mean_rmse = np.mean(rmses)
            print(f"lr={lr}, depth={md}, rounds={nr} → CV RMSE={mean_rmse:.4f}")
            
            if mean_rmse < best_score:
                best_score  = mean_rmse
                best_params = {'learning_rate': lr, 'max_depth': md, 'num_round': nr}

print("\nBeste Parameter:", best_params, "mit CV RMSE =", best_score)


In [None]:
# %%
# Finale Modell-Evaluation mit den besten Hyperparametern
import pandas as pd
import gpboost as gbm
import numpy as np
from sklearn.model_selection import GroupShuffleSplit
from sklearn.metrics import mean_squared_error, r2_score

# 1) Daten laden und Feature-Matrix bauen (wie gehabt)
df = pd.read_csv("transcripts_wer_mwer_phrase.csv")
sys_full = df['technology'] + '–' + df['model']
df['system'] = pd.Categorical(sys_full, categories=sorted(sys_full.unique()))
df['processedVolume'] = pd.Categorical(df['processedVolume'], categories=sorted(df['processedVolume'].unique()))
df['ambientVariant'] = pd.Categorical(df['ambientVariant'], categories=sorted(df['ambientVariant'].unique()))
X = pd.get_dummies(df[['system', 'ambientVariant', 'processedVolume']], drop_first=False)
y = df['mwer'].values
groups = df['convoID'].astype('category').cat.codes.values

# 2) Gruppensplit erneut durchführen
splitter = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, test_idx = next(splitter.split(X, y, groups=groups))
X_train, X_test = X.values[train_idx], X.values[test_idx]
y_train, y_test = y[train_idx], y[test_idx]
group_train, group_test = groups[train_idx], groups[test_idx]

# 3) GPBoost-Modell mit den besten Parametern trainieren
best_params = {
    'learning_rate': 0.01,
    'max_depth': 6,
    'objective': 'regression_l2',
    'verbose': 0
}
num_round = 100

gp_model = gbm.GPModel(
    group_data=group_train, 
    likelihood='gaussian',
    matrix_inversion_method="cholesky"   # <— hier hinzufügen

)

dtrain   = gbm.Dataset(data=X_train, label=y_train)

final_booster = gbm.train(
    params=best_params,
    train_set=dtrain,
    gp_model=gp_model,
    num_boost_round=num_round
)

# 4) Auf dem Testset vorhersagen (inkl. Random Effects)
pred_dict    = final_booster.predict(data=X_test, group_data_pred=group_test)
y_pred_final = pred_dict['response_mean']

# 5) Metriken berechnen
mse_final  = mean_squared_error(y_test, y_pred_final)
rmse_final = np.sqrt(mse_final)
r2_final   = r2_score(y_test, y_pred_final)

print(f"Finales Modell — Testset")
print(f"  MSE:  {mse_final:.6f}")
print(f"  RMSE: {rmse_final:.6f}")
print(f"  R²:   {r2_final:.4f}")


In [None]:
# 1) Daten laden und Feature-Matrix bauen (auf allen Daten)
df = pd.read_csv("transcripts_wer_mwer_phrase.csv")
sys_full = df['technology'] + '–' + df['model']
df['system'] = pd.Categorical(sys_full, categories=sorted(sys_full.unique()))
df['processedVolume'] = pd.Categorical(df['processedVolume'], categories=sorted(df['processedVolume'].unique()))
df['ambientVariant'] = pd.Categorical(df['ambientVariant'], categories=sorted(df['ambientVariant'].unique()))

# Feature-Matrix erstellen
X_full = pd.get_dummies(df[['system', 'ambientVariant', 'processedVolume']], drop_first=False).values
y_full = df['mwer'].values  # Zielvariable (mwer)
group_full = df['convoID'].astype('category').cat.codes.values  # Gruppierungsvariable für Random Effects

# 2) Setup des finalen GPBoost-Modells
best_params = {
    'learning_rate': 0.01,
    'max_depth': 6,
    'objective': 'regression_l2',
    'verbose': 0
}
num_round = 100

gp_model_full = gbm.GPModel(group_data=group_full, likelihood='gaussian')
dtrain_full = gbm.Dataset(data=X_full, label=y_full)

# 3) Training auf dem kompletten Datensatz
final_booster_full = gbm.train(
    params=best_params,
    train_set=dtrain_full,
    gp_model=gp_model_full,
    num_boost_round=num_round
)

# Modell ist jetzt bereit für Explainability (SHAP, PDP, LIME)
print("Finales Modell auf Voll-Daten trainiert.")



In [None]:
# Vorhersagen auf X_full berechnen
y_pred_full = final_booster_full.predict(X_full, group_data_pred=group_full)

# Überprüfe die verfügbaren Schlüssel im Dictionary
print(f"Schlüssel von y_pred_full: {y_pred_full.keys()}")

# Extrahiere die Vorhersagewerte (normalerweise 'response_mean')
y_pred_values = y_pred_full.get('response_mean', None)  # Ersetze den Schlüssel mit dem passenden

# Überprüfe die Form von y_pred_values
print(f"Shape von y_pred_values: {y_pred_values.shape}")

# Berechne R²
from sklearn.metrics import r2_score
r2_final = r2_score(y_full, y_pred_values)
print(f"R²-Wert des finalen Modells: {r2_final:.4f}")

# Berechne MSE und RMSE
from sklearn.metrics import mean_squared_error
mse_final = mean_squared_error(y_full, y_pred_values)
rmse_final = np.sqrt(mse_final)

# Ausgabe der Metriken
print(f"Finales Modell — Gesamt-Daten")
print(f"  MSE:  {mse_final:.6f}")
print(f"  RMSE: {rmse_final:.6f}")
print(f"  R²:   {r2_final:.4f}")



In [None]:
import shap
import pandas as pd
import matplotlib.pyplot as plt

# Angenommen, das finale Modell ist final_booster_full und X_full ist die Feature-Matrix
explainer = shap.TreeExplainer(final_booster_full)

# SHAP-Werte berechnen
shap_values = explainer.shap_values(X_full)

# SHAP-Plot mit den tatsächlichen Feature-Namen
shap.summary_plot(shap_values, X_full, feature_names=X.columns.tolist(), plot_type="bar")

# SHAP Beeswarm Plot mit den Feature-Namen
shap.summary_plot(shap_values, X_full, feature_names=X.columns.tolist())



In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import shap

# 1) Verzeichnis anlegen
output_dir = "results_paper_shap"
os.makedirs(output_dir, exist_ok=True)

# 2) SHAP‑Erklärer und Werte (sofern noch nicht geschehen)
explainer = shap.TreeExplainer(final_booster_full)
shap_values = explainer.shap_values(X_full)

# 3) Summary Bar Plot mit Titel speichern
plt.figure()
shap.summary_plot(
    shap_values, 
    X_full, 
    feature_names=X.columns.tolist(),
    plot_type="bar",
    show=False
)
plt.title("Feature Importance\n(mean absolute SHAP values) - mWER")
plt.tight_layout()
fname_bar = os.path.join(output_dir, "shap_summary_bar.png")
plt.savefig(fname_bar, dpi=300)
plt.close()

# 4) Beeswarm Plot mit Titel speichern
plt.figure()
shap.summary_plot(
    shap_values, 
    X_full, 
    feature_names=X.columns.tolist(),
    show=False
)
plt.title("SHAP Beeswarm Plot - mWER")
plt.tight_layout()
fname_bee = os.path.join(output_dir, "shap_beeswarm.png")
plt.savefig(fname_bee, dpi=300)
plt.close()

print(f"Plots gespeichert unter:\n • {fname_bar}\n • {fname_bee}")

# 5) Feature‑Importance als CSV
#    (Mean Absolute SHAP pro Feature)
feat_imp = pd.DataFrame({
    "feature": X.columns.tolist(),
    "mean_abs_shap": np.abs(shap_values).mean(axis=0)
})
feat_imp = feat_imp.sort_values("mean_abs_shap", ascending=False)
csv_path = os.path.join(output_dir, "feature_importance.csv")
feat_imp.to_csv(csv_path, index=False)

print(f"Feature‑Importance‑Tabelle gespeichert unter:\n • {csv_path}")


# Tryouts

In [None]:
import shap

# Erstelle den explainer für das GPBoost-Modell
explainer = shap.Explainer(final_booster_full, X_full)

# Berechne SHAP-Werte
shap_values = explainer(X_full)

# Anzeige der SHAP-Werte für die erste Instanz
shap.initjs()  # Initialisiere JS für die Visualisierung
shap.force_plot(shap_values[0])


In [None]:
# the waterfall_plot shows how we get from explainer.expected_value to model.predict(X)[sample_ind]
shap.plots.waterfall(shap_values[1])

In [None]:
import numpy as np
import pandas as pd
import shap
import sk

# shap_values ist dein Explanation-Objekt für alle Samples
# X ist das DataFrame oder Array deiner Eingabedaten

# 1. Mittelwert der absoluten SHAP-Werte
mean_abs = np.mean(np.abs(shap_values.values), axis=0)

# 2. Mittelwert der rohen (vorzeichenbehafteten) SHAP-Werte
mean_signed = np.mean(shap_values.values, axis=0)

# Zusammenfassen in DataFrame
df = pd.DataFrame({
    'feature': shap_values.feature_names,
    'mean_abs_shap': mean_abs,
    'mean_signed_shap': mean_signed
}).sort_values('mean_abs_shap', ascending=False)

print(df.head(10))


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.inspection import permutation_importance

# 1) Berechne die Baseline-Metriken (z.B. MSE) für das originale Modell
def gbm_predict(arr):
    return final_booster_full.predict(arr, group_data_pred=group_full)['response_mean']

# Berechne Baseline MSE
base_mse = mean_squared_error(y_full, gbm_predict(X_full))

# 2) Berechne Permutation Feature Importance
perm_imp = []
for feature in X_full.columns:
    # Kopiere die Daten, um sie für die Permutation zu verwenden
    X_temp = X_full.copy()
    # Permutiere die Werte für das Feature
    X_temp[feature] = np.random.permutation(X_temp[feature])
    
    # Berechne den neuen MSE nach der Permutation des Features
    perm_mse = mean_squared_error(y_full, gbm_predict(X_temp))
    
    # Berechne die Differenz im MSE (Permutation Importance)
    perm_imp.append(perm_mse - base_mse)

# 3) Speichere und sortiere die Ergebnisse
perm_df = pd.DataFrame({
    'Feature': X_full.columns,
    'Permutation Importance': perm_imp
})

perm_df = perm_df.sort_values(by='Permutation Importance', ascending=False)

# 4) Ausgabe der wichtigsten Features
print(perm_df.head(15))  # Zeigt die Top 15 wichtigsten Features


In [None]:
import sklearn
print(sklearn.__version__)