In [2]:
import json

metadata = {
  "@context": "https://mlcommons.org/croissant/context/v1",
  "@type": "Dataset",
  "name": "StatLog (German Credit) Data",
  "description": "The StatLog German Credit dataset contains ...",
  "url": "https://archive.ics.uci.edu/dataset/144/statlog+german+credit+data",
  "license": "CC0-1.0",
  "datePublished": "1994-07-01",
  "keywords": ["credit scoring","loan approval","binary classification","explainable AI","finance","UCI repository"],
  "authors": [{"name": "UCI Machine Learning Repository","affiliation": "University of California, Irvine"}],
  "resources": [
    {
      "@type": "DataDownload",
      "name": "german.data",
      "url": "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data",
      "encodingFormat": "text/plain",
      "description": "Primary data file with 1,000 instances and 20 attributes plus label."
    },
    {
      "@type": "DataDownload",
      "name": "german.doc",
      "url": "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.doc",
      "encodingFormat": "text/plain",
      "description": "Attribute definitions and documentation."
    }
  ]
}

In [6]:
with open("German Credit metadata.jsonld", "w") as f:
    json.dump(metadata, f, indent=2)

In [35]:
import numpy as np
import pandas as pd

# your sample results
data = {
    "Precision":    [0.91, 0.79,0.88],
    "Recall":       [1,1,1],
    "Top-1 Match":  [0.83,0.83,0.83 ],
}

df = pd.DataFrame(data)

# median precision and recall
median_precision = df["Precision"].median()
median_recall    = df["Recall"].median()

# Top‑1 accuracy = proportion of datasets with a match
top1_accuracy    = df["Top-1 Match"].mean()

print(f"Median Precision: {median_precision:.2f}")
print(f"Median Recall:    {median_recall:.2f}")
print(f"Top‑1 Accuracy:   {top1_accuracy:.2f}")


Median Precision: 0.88
Median Recall:    1.00
Top‑1 Accuracy:   0.83


In [23]:
fdr_mlp = np.array([0, 0.25, 0.25, 0.000, 0.28, 0.0])

# SHAP’s FDR on the same 6 datasets (you should have run SHAP and computed these)
# e.g. hypothetically:
fdr_shap = np.array([0.4, 0.4, 0.4, 0.4, 0.6, 0.5])

In [25]:
median_fdr_mlp  = np.median(fdr_mlp)
median_fdr_shap = np.median(fdr_shap)
print(f"Median FDR (MLP):  {median_fdr_mlp:.3f}")
print(f"Median FDR (SHAP): {median_fdr_shap:.3f}")

Median FDR (MLP):  0.125
Median FDR (SHAP): 0.400


In [27]:
from scipy.stats import wilcoxon

stat, p_value = wilcoxon(fdr_mlp, fdr_shap, alternative="less")
print(f"Wilcoxon statistic = {stat:.2f}, p = {p_value:.3f}")

Wilcoxon statistic = 0.00, p = 0.016
