In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import onnxruntime as rt
import onnx
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import to_onnx
from sklearn.feature_selection import VarianceThreshold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from skl2onnx import convert_sklearn
import seaborn as sns
import matplotlib.pyplot as plt

# GOOD MODEL

In [26]:
DATA_PATH = "../data/synth_data_for_training.csv"
TARGET = "checked"
ONNX_OUTPUT = "model_1.onnx"

In [27]:
# Load the dataset
data = pd.read_csv(DATA_PATH)
y = data['checked']
X = data.drop(['checked'], axis=1)
X = X.astype(np.float32)

valid_prefixes = [
    "afspraak_",
    "contacten_soort_",     # counts of call/email/etc, safe
    "instrument_",
    "deelname_",
    "pla_",
    "typering_",
    "ontheffing_"
]

good_features = [
    col for col in data.columns
    if col != 'checked' and any(col.startswith(p) for p in valid_prefixes)
]

In [28]:
BIASED_WEIGHT = 1.5
OTHERS_WEIGHT = 0.5

feature_weights = {}
for feature in X.columns:
    if feature in good_features:
        feature_weights[feature] = BIASED_WEIGHT # Higher weight for biased features
    else:
        feature_weights[feature] = OTHERS_WEIGHT # Lower weight for other features

X_weighted = X.copy()
for feature in X.columns:
    X_weighted[feature] *= feature_weights[feature]

print(f"Original feature matrix shape: {X.shape}")
print(f"Weighted feature matrix shape: {X_weighted.shape}")

Original feature matrix shape: (12645, 315)
Weighted feature matrix shape: (12645, 315)


In [29]:
# Let's split the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X_weighted, y, test_size=0.25, random_state=42)

In [30]:
# Select data based on variance (not the final version yet, for now just for testing)
selector = VarianceThreshold()

# Define a gradient boosting classifier
# classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
good_model = Pipeline([
    ("scaler", StandardScaler(with_mean=False)),
    ("gb", GradientBoostingClassifier(
        n_estimators=200,
        learning_rate=0.05,
        max_depth=3,
        random_state=42
    ))
])

In [31]:
# Train the model
good_model.fit(X_train, y_train)
y_pred = good_model.predict(X_test)
y_proba = good_model.predict_proba(X_test)[:, 1]

# Evaluate the model
acc = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_proba)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred, labels=[0, 1]).ravel()

print("\n=== GOOD MODEL PERFORMANCE ===")
print(f"Accuracy:  {acc:.4f}")
print(f"AUC:       {auc:.4f}")
print(f"TN={tn} FP={fp} FN={fn} TP={tp}")
print(classification_report(y_test, y_pred))


=== GOOD MODEL PERFORMANCE ===
Accuracy:  0.9431
AUC:       0.9683
TN=2844 FP=12 FN=168 TP=138
              precision    recall  f1-score   support

           0       0.94      1.00      0.97      2856
           1       0.92      0.45      0.61       306

    accuracy                           0.94      3162
   macro avg       0.93      0.72      0.79      3162
weighted avg       0.94      0.94      0.93      3162



In [32]:
# Let's convert the model to ONNX
onnx_model = convert_sklearn(
    good_model, initial_types=[('X', FloatTensorType((None, X.shape[1])))],
    target_opset=12)

# Let's check the accuracy of the converted model
sess = rt.InferenceSession(onnx_model.SerializeToString())
y_pred_onnx =  sess.run(None, {'X': X_test.values.astype(np.float32)})

accuracy_onnx_model = accuracy_score(y_test, y_pred_onnx[0])
print('Accuracy of the ONNX model: ', accuracy_onnx_model)

Accuracy of the ONNX model:  0.9430740037950665


In [33]:
# Let's save the model
onnx.save(onnx_model, ONNX_OUTPUT)

# Let's load the model
new_session = rt.InferenceSession(ONNX_OUTPUT)

# Let's predict the target
y_pred_onnx2 =  new_session.run(None, {'X': X_test.values.astype(np.float32)})

accuracy_onnx_model = accuracy_score(y_test, y_pred_onnx2[0])
print('Accuracy of the ONNX model: ', accuracy_onnx_model)

Accuracy of the ONNX model:  0.9430740037950665


In [34]:
from partition_tests_2 import PartitionTester

tester = PartitionTester("../data/synth_data_for_training.csv")
tester.run("model_1.onnx")

  df_raw = pd.read_csv(self.DATA_PATH, header=None)



      PARTITION TEST RESULTS

Partition: men
Data points: 1995
Actual fraud rate:   10.03%
Predicted fraud rate:55.09%

--- Confusion Matrix ---
TP=181  TN=877  FP=918  FN=19

--- Metrics ---
Accuracy: 53.03%
FPR: 51.14%
FNR: 9.50%
TPR/Recall: 90.50%
TNR: 48.86%

Partition: women
Data points: 1799
Actual fraud rate:   10.01%
Predicted fraud rate:53.31%

--- Confusion Matrix ---
TP=162  TN=822  FP=797  FN=18

--- Metrics ---
Accuracy: 54.70%
FPR: 49.23%
FNR: 10.00%
TPR/Recall: 90.00%
TNR: 50.77%

Partition: young_adults
Data points: 153
Actual fraud rate:   24.84%
Predicted fraud rate:16.99%

--- Confusion Matrix ---
TP=22  TN=111  FP=4  FN=16

--- Metrics ---
Accuracy: 86.93%
FPR: 3.48%
FNR: 42.11%
TPR/Recall: 57.89%
TNR: 96.52%

Partition: middle_aged
Data points: 2957
Actual fraud rate:   9.67%
Predicted fraud rate:58.84%

--- Confusion Matrix ---
TP=272  TN=1203  FP=1468  FN=14

--- Metrics ---
Accuracy: 49.88%
FPR: 54.96%
FNR: 4.90%
TPR/Recall: 95.10%
TNR: 45.04%

Partition: senio

# BAD MODEL

In [35]:
ONNX_OUTPUT = "model_2.onnx"

In [36]:
# Define discriminatory prefixes
biased_prefixes = [
    "adres_",
    "persoonlijke_eigenschappen_spreektaal",
    "persoonlijke_eigenschappen_nl_",
    "persoonlijke_eigenschappen_taaleis_",
    "relatie_",
    "belemmering_",
    "beschikbaarheid_",
    "contacten_"
]

# Filter dataframe to only biased variables
biased_features = [
    col for col in data.columns
    if col != 'checked' and any(col.startswith(p) for p in biased_prefixes)
]

In [37]:
BIASED_WEIGHT = 1.5
OTHERS_WEIGHT = 0.5

feature_weights = {}
for feature in X.columns:
    if feature in biased_features:
        feature_weights[feature] = BIASED_WEIGHT # Higher weight for biased features
    else:
        feature_weights[feature] = OTHERS_WEIGHT # Lower weight for other features

X_weighted = X.copy()
for feature in X.columns:
    X_weighted[feature] *= feature_weights[feature]

print(f"Original feature matrix shape: {X.shape}")
print(f"Weighted feature matrix shape: {X_weighted.shape}")

Original feature matrix shape: (12645, 315)
Weighted feature matrix shape: (12645, 315)


In [38]:
# Split the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X_weighted, y, test_size=0.25, random_state=42)

In [39]:
# Select data based on variance (not the final version yet, for now just for testing)
selector = VarianceThreshold()

# Define a gradient boosting classifier
# classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
bad_model = Pipeline([
    ("scaler", StandardScaler(with_mean=False)),
    ("gb", GradientBoostingClassifier(
        n_estimators=200,
        learning_rate=0.05,
        max_depth=3,
        random_state=42
    ))
])

In [None]:
# Train the model
bad_model.fit(X_train, y_train)
y_pred = bad_model.predict(X_test)
y_proba = bad_model.predict_proba(X_test)[:, 1]

# Evaluate the model
acc = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_proba)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred, labels=[0, 1]).ravel()

print("\n=== BAD MODEL PERFORMANCE ===")
print(f"Accuracy:  {acc:.4f}")
print(f"AUC:       {auc:.4f}")
print(f"TN={tn} FP={fp} FN={fn} TP={tp}")
print(classification_report(y_test, y_pred))


=== GOOD MODEL PERFORMANCE ===
Accuracy:  0.9431
AUC:       0.9683
TN=2844 FP=12 FN=168 TP=138
              precision    recall  f1-score   support

           0       0.94      1.00      0.97      2856
           1       0.92      0.45      0.61       306

    accuracy                           0.94      3162
   macro avg       0.93      0.72      0.79      3162
weighted avg       0.94      0.94      0.93      3162



In [None]:
# Let's convert the model to ONNX
onnx_model = convert_sklearn(
    bad_model, initial_types=[('X', FloatTensorType((None, X.shape[1])))],
    target_opset=12)

# Let's check the accuracy of the converted model
sess = rt.InferenceSession(onnx_model.SerializeToString())
y_pred_onnx =  sess.run(None, {'X': X_test.values.astype(np.float32)})

accuracy_onnx_model = accuracy_score(y_test, y_pred_onnx[0])
print('Accuracy of the ONNX model: ', accuracy_onnx_model)

Accuracy of the ONNX model:  0.47786211258697026


In [42]:
# Let's save the model
onnx.save(onnx_model, ONNX_OUTPUT)

# Let's load the model
new_session = rt.InferenceSession(ONNX_OUTPUT)

# Let's predict the target
y_pred_onnx2 =  new_session.run(None, {'X': X_test.values.astype(np.float32)})

accuracy_onnx_model = accuracy_score(y_test, y_pred_onnx2[0])
print('Accuracy of the ONNX model: ', accuracy_onnx_model)

Accuracy of the ONNX model:  0.47786211258697026


In [43]:
from partition_tests_2 import PartitionTester

tester = PartitionTester("../data/synth_data_for_training.csv")
tester.run("model_1.onnx")

  df_raw = pd.read_csv(self.DATA_PATH, header=None)



      PARTITION TEST RESULTS

Partition: men
Data points: 1995
Actual fraud rate:   10.03%
Predicted fraud rate:55.09%

--- Confusion Matrix ---
TP=181  TN=877  FP=918  FN=19

--- Metrics ---
Accuracy: 53.03%
FPR: 51.14%
FNR: 9.50%
TPR/Recall: 90.50%
TNR: 48.86%

Partition: women
Data points: 1799
Actual fraud rate:   10.01%
Predicted fraud rate:53.31%

--- Confusion Matrix ---
TP=162  TN=822  FP=797  FN=18

--- Metrics ---
Accuracy: 54.70%
FPR: 49.23%
FNR: 10.00%
TPR/Recall: 90.00%
TNR: 50.77%

Partition: young_adults
Data points: 153
Actual fraud rate:   24.84%
Predicted fraud rate:16.99%

--- Confusion Matrix ---
TP=22  TN=111  FP=4  FN=16

--- Metrics ---
Accuracy: 86.93%
FPR: 3.48%
FNR: 42.11%
TPR/Recall: 57.89%
TNR: 96.52%

Partition: middle_aged
Data points: 2957
Actual fraud rate:   9.67%
Predicted fraud rate:58.84%

--- Confusion Matrix ---
TP=272  TN=1203  FP=1468  FN=14

--- Metrics ---
Accuracy: 49.88%
FPR: 54.96%
FNR: 4.90%
TPR/Recall: 95.10%
TNR: 45.04%

Partition: senio

In [18]:
import pandas as pd
import numpy as np
import onnx
import onnxruntime as ort
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

# ==========================================
# CONFIGURATION
# ==========================================
DATA_PATH = "../data/synth_data_for_training.csv"
MODEL_1_PATH = "model_1.onnx"  # Good Model
MODEL_2_PATH = "model_2.onnx"  # Bad Model

# ==========================================
# FEATURE SPLIT DEFINITION
# ==========================================

# We define ONLY the bad prefixes.
# The Good Model will automatically get everything else.
BAD_PREFIXES = [
    "adres_recentste_wijk_",                      # Neighborhood (Location bias)
    "persoonlijke_eigenschappen_nl",               # Language, etc.
    "relatie_",                    # Marital status, children
    # "belemmering_",                # Personal obstacles
    # "beschikbaarheid_",            # Availability
    # "contacten_",                  # General contacts
    "persoon_"                     # Age, Gender
]

# ==========================================
# PART 1: CLASS DEFINITIONS (TESTERS)
# ==========================================

class PartitionTester:
    def __init__(self, data_path):
        self.DATA_PATH = data_path
        self.TARGET = "checked"

        # Load & Prepare Data
        try:
            df = pd.read_csv(self.DATA_PATH)
        except:
            df_raw = pd.read_csv(self.DATA_PATH, header=None)
            colnames = df_raw.iloc[0].tolist()
            df = pd.read_csv(self.DATA_PATH, skiprows=1, names=colnames)

        df[self.TARGET] = pd.to_numeric(df[self.TARGET], errors="coerce")
        df = df.dropna(subset=[self.TARGET]).copy()
        df[self.TARGET] = df[self.TARGET].astype(int)

        X = df.drop(columns=[self.TARGET]).apply(pd.to_numeric, errors="coerce").fillna(0)
        y = df[self.TARGET]

        _, self.X_test, _, self.y_test = train_test_split(
            X, y, test_size=0.3, random_state=42, stratify=y
        )

        # Define Partitions
        self.partitions = [
            {"name": "men", "condition": lambda df: df['persoon_geslacht_vrouw'] == 0},
            {"name": "women", "condition": lambda df: df['persoon_geslacht_vrouw'] == 1},
            {"name": "young_adults", "condition": lambda df: df['persoon_leeftijd_bij_onderzoek'] < 30},
            {"name": "seniors", "condition": lambda df: df['persoon_leeftijd_bij_onderzoek'] >= 60},
            {"name": "understands_dutch", "condition": lambda df: df['persoonlijke_eigenschappen_nl_begrijpen3'] == 1},
            {"name": "no_dutch", "condition": lambda df: df['persoonlijke_eigenschappen_nl_begrijpen3'] == 0},
            {"name": "charlois", "condition": lambda df: df['adres_recentste_wijk_charlois'] == 1},
            {"name": "kralingen", "condition": lambda df: df['adres_recentste_wijk_kralingen_c'] == 1},
        ]

    def _load_model(self, m):
        if isinstance(m, str):
            return ort.InferenceSession(m, providers=["CPUExecutionProvider"])
        return m

    def _predict(self, model, X_part):
        if hasattr(model, "predict"):
            return model.predict(X_part)
        elif isinstance(model, ort.InferenceSession):
            input_name = model.get_inputs()[0].name
            X_np = X_part.to_numpy().astype(np.float32)
            outputs = model.run(None, {input_name: X_np})
            label_idx = 0
            for i, o in enumerate(model.get_outputs()):
                if "label" in o.name.lower(): label_idx = i
            return np.array(outputs[label_idx]).astype(int).flatten()

    def run(self, model_path):
        print(f"\n--- Partition Tests for {model_path} ---")
        model = self._load_model(model_path)
        
        print(f"{'Partition':<25} | {'N':<5} | {'Fraud%':<8} | {'Pred%':<8} | {'Accuracy':<8}")
        print("-" * 65)

        for part in self.partitions:
            cond = part["condition"]
            df_part = self.X_test[cond(self.X_test)]
            if df_part.empty: continue

            preds = self._predict(model, df_part)
            idx = df_part.index
            true_labels = self.y_test.loc[idx].astype(int)
            acc = accuracy_score(true_labels, preds)
            print(f"{part['name']:<25} | {len(df_part):<5} | {true_labels.mean()*100:.1f}%   | {preds.mean()*100:.1f}%   | {acc*100:.1f}%")


class MetamorphicTester:
    def __init__(self, data_path):
        pt = PartitionTester(data_path)
        self.X_base = pt.X_test.copy()
        self.helper = pt

    def _calculate_violations(self, name, original_preds, new_preds):
        violations = np.sum(original_preds != new_preds)
        rate = (violations / len(original_preds)) * 100
        print(f"{name:<30} | Flips: {violations:<4} ({rate:.2f}%)")

    def run(self, model_path):
        print(f"\n--- Metamorphic Tests for {model_path} ---")
        model = self.helper._load_model(model_path)
        preds_base = self.helper._predict(model, self.X_base)

        # Test 1: Gender Flip
        X_mutant = self.X_base.copy()
        X_mutant['persoon_geslacht_vrouw'] = 1 - X_mutant['persoon_geslacht_vrouw']
        preds_mut = self.helper._predict(model, X_mutant)
        self._calculate_violations("Gender Flip", preds_base, preds_mut)

        # Test 2: Language Flip
        if 'persoonlijke_eigenschappen_nl_begrijpen3' in self.X_base.columns:
            X_mutant = self.X_base.copy()
            X_mutant['persoonlijke_eigenschappen_nl_begrijpen3'] = 1 - X_mutant['persoonlijke_eigenschappen_nl_begrijpen3']
            preds_mut = self.helper._predict(model, X_mutant)
            self._calculate_violations("Language Flip", preds_base, preds_mut)


# ==========================================
# PART 2: MODEL TRAINING
# ==========================================

def train_and_save_models():
    print("\n>>> Loading Data...")
    df = pd.read_csv(DATA_PATH)
    y = df['checked']
    X = df.drop(['checked'], axis=1).astype(np.float32)
    
    # --- LOGIC START: STRICT SPLIT ---
    all_features = list(X.columns)
    
    # 1. Identify Bad Indices (Starts with BAD_PREFIXES)
    bad_indices = [
        i for i, c in enumerate(all_features) 
        if any(c.startswith(p) for p in BAD_PREFIXES)
    ]
    
    # 2. Identify Good Indices (Everything NOT in bad_indices)
    good_indices = [
        i for i in range(len(all_features)) 
        if i not in bad_indices
    ]
    
    # Sanity Check
    print(f"Total Features: {len(all_features)}")
    print(f"Features in Bad Model (Biased): {len(bad_indices)}")
    print(f"Features in Good Model (Rest):  {len(good_indices)}")
    # --- LOGIC END ---

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

    # ---------------- GOOD MODEL ----------------
    print("\n>>> Training GOOD Model (Uses All - Bad)...")
    
    good_model = Pipeline([
        ('selector', ColumnTransformer([('keep', 'passthrough', good_indices)], remainder='drop')),
        ('scaler', StandardScaler(with_mean=False)),
        ('gb', GradientBoostingClassifier(n_estimators=200, max_depth=5, random_state=42))
    ])
    good_model.fit(X_train, y_train)
    
    # Eval Good Model
    y_pred = good_model.predict(X_test)
    y_proba = good_model.predict_proba(X_test)[:, 1]
    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred, labels=[0, 1]).ravel()
    
    print("\n=== GOOD MODEL PERFORMANCE ===")
    print(f"Accuracy:  {acc:.4f}")
    print(f"AUC:       {auc:.4f}")
    print(f"TN={tn} FP={fp} FN={fn} TP={tp}")
    print(classification_report(y_test, y_pred))

    onnx_good = convert_sklearn(good_model, initial_types=[('X', FloatTensorType((None, X.shape[1])))], target_opset=12)
    with open(MODEL_1_PATH, "wb") as f: f.write(onnx_good.SerializeToString())
    print(f"Saved {MODEL_1_PATH}")

    # ---------------- BAD MODEL ----------------
    print("\n>>> Training BAD Model (Uses ONLY Bad)...")
    
    bad_model = Pipeline([
        ('selector', ColumnTransformer([('keep', 'passthrough', bad_indices)], remainder='drop')),
        ('scaler', StandardScaler(with_mean=False)),
        # Slightly stronger parameters to help it overfit to the biases
        ('gb', GradientBoostingClassifier(n_estimators=300, max_depth=6, random_state=42))
    ])
    bad_model.fit(X_train, y_train)
    
    # Eval Bad Model
    y_pred_bad = bad_model.predict(X_test)
    y_proba_bad = bad_model.predict_proba(X_test)[:, 1]
    acc_bad = accuracy_score(y_test, y_pred_bad)
    auc_bad = roc_auc_score(y_test, y_proba_bad)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred_bad, labels=[0, 1]).ravel()
    
    print("\n=== BAD MODEL PERFORMANCE ===")
    print(f"Accuracy:  {acc_bad:.4f}")
    print(f"AUC:       {auc_bad:.4f}")
    print(f"TN={tn} FP={fp} FN={fn} TP={tp}")
    print(classification_report(y_test, y_pred_bad))

    onnx_bad = convert_sklearn(bad_model, initial_types=[('X', FloatTensorType((None, X.shape[1])))], target_opset=12)
    with open(MODEL_2_PATH, "wb") as f: f.write(onnx_bad.SerializeToString())
    print(f"Saved {MODEL_2_PATH}")


# ==========================================
# PART 3: MAIN EXECUTION
# ==========================================

if __name__ == "__main__":
    train_and_save_models()

    pt = PartitionTester(DATA_PATH)
    pt.run(MODEL_1_PATH)
    pt.run(MODEL_2_PATH)

    mt = MetamorphicTester(DATA_PATH)
    mt.run(MODEL_1_PATH)
    mt.run(MODEL_2_PATH)


>>> Loading Data...
Total Features: 315
Features in Bad Model (Biased): 44
Features in Good Model (Rest):  271

>>> Training GOOD Model (Uses All - Bad)...

=== GOOD MODEL PERFORMANCE ===
Accuracy:  0.9273
AUC:       0.9002
TN=2823 FP=33 FN=197 TP=109
              precision    recall  f1-score   support

           0       0.93      0.99      0.96      2856
           1       0.77      0.36      0.49       306

    accuracy                           0.93      3162
   macro avg       0.85      0.67      0.72      3162
weighted avg       0.92      0.93      0.91      3162

Saved model_1.onnx

>>> Training BAD Model (Uses ONLY Bad)...

=== BAD MODEL PERFORMANCE ===
Accuracy:  0.8887
AUC:       0.7688
TN=2775 FP=81 FN=271 TP=35
              precision    recall  f1-score   support

           0       0.91      0.97      0.94      2856
           1       0.30      0.11      0.17       306

    accuracy                           0.89      3162
   macro avg       0.61      0.54      0.55   