In [32]:
from sklearn.model_selection import train_test_split
import onnxruntime as rt
import onnx
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import to_onnx
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.pipeline import Pipeline
from skl2onnx import convert_sklearn
from sklearn.preprocessing import StandardScaler
# define a XGBoost classifier
import xgboost as xgb
import warnings
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import confusion_matrix


In [33]:
from imblearn.over_sampling import SMOTE
import pandas as pd
from sklearn.preprocessing import StandardScaler
import onnxruntime as rt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
data = pd.read_csv('data/synth_data_for_training.csv')
y = data['checked']
X = data.drop(['checked'], axis=1)
X = X.astype(np.float32)

# Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply SMOTE to balance the dataset
smote = SMOTE(random_state=42)
X, y = smote.fit_resample(X_scaled, y)

# check the distribution of the target variable
y.value_counts(normalize=True)

checked
0    0.5
1    0.5
Name: proportion, dtype: float64

In [34]:
# Let's load the model
new_session = rt.InferenceSession("model_2.onnx")

# Correctly use the numpy array for predictions
y_pred_onnx2 = new_session.run(None, {'X': X.astype(np.float32)})

# Calculate metrics using the balanced dataset
accuracy_onnx_model = accuracy_score(y, y_pred_onnx2[0])
f1_model = f1_score(y, y_pred_onnx2[0], average='binary')
precision_model = precision_score(y, y_pred_onnx2[0], average='binary')
recall_model = recall_score(y, y_pred_onnx2[0], average='binary')

print('Accuracy of the ONNX model: ', accuracy_onnx_model)
print('Precision of the ONNX model:', precision_model)
print('Recall of the ONNX model:', recall_model)
print('F1 score of the ONNX model:', f1_model)

Accuracy of the ONNX model:  0.6598857644991213
Precision of the ONNX model: 0.6354500111665302
Recall of the ONNX model: 0.7500878734622144
F1 score of the ONNX model: 0.6880264377544029


In [35]:
# Compute confusion matrix
confusion_matrix(y, y_pred_onnx2[0])

array([[6483, 4897],
       [2844, 8536]])

In [36]:
# Check how imbalanced the datasets is

y.value_counts(normalize=True)

checked
0    0.5
1    0.5
Name: proportion, dtype: float64

In [37]:
# Assuming 'X' is your dataset after applying SMOTE and it's currently a numpy array
# Convert 'X' back to a DataFrame to use 'iterrows()'
# If 'X' originally had column names, you'll need to specify them again here.
column_names = data.drop(['checked'], axis=1).columns
X = pd.DataFrame(X, columns=column_names)

In [38]:
# Metamorphic testing: (Other than fairness testing)
# If a value changes then the prediction likelihood should change too in line with the purpose of the model 
# pla_historie_ontwikkeling 0 or 25 // number of developments in PLA history

# Load the model
testing_session = rt.InferenceSession("model_2.onnx")


# Initialize variables to store likelihoods
likelihoods_0 = []
likelihoods_25 = []

# Iterate through each sample in the test set
for index, row in X.iterrows():
    # Convert the row to a DataFrame to ensure it's a DataFrame object
    X_sample = pd.DataFrame(row).transpose()

    # Make predictions for 0 developments in PLA history
    X_sample_0 = X_sample.copy()
    X_sample_0['pla_historie_ontwikkeling'] = 0
    y_proba_0 = testing_session.run(None, {'X': X_sample_0.values.astype(np.float32)})[1][0]

    # Make predictions for 25 developments in PLA history
    X_sample_25 = X_sample.copy()
    X_sample_25['pla_historie_ontwikkeling'] = 25
    y_proba_25 = testing_session.run(None, {'X': X_sample_25.values.astype(np.float32)})[1][0]

    # Append the likelihoods for both age groups
    likelihoods_0.append([y_proba_0[1]])  # Probability of class 1 (fraud) for 0 developments in PLA history
    likelihoods_25.append([y_proba_25[1]])  # Probability of class 1 (fraud) for 25 developments in PLA history

# Convert likelihoods lists to NumPy arrays
likelihoods_0 = np.array(likelihoods_0)
likelihoods_25 = np.array(likelihoods_25)

# Calculate the mean likelihoods for each group
mean_likelihood_0 = np.mean(likelihoods_0)
mean_likelihood_25 = np.mean(likelihoods_25)

print("Mean likelihood for 0 developments in PLA history:", mean_likelihood_0)
print("Mean likelihood for 25 developments in PLA history:", mean_likelihood_25)

Mean likelihood for 0 developments in PLA history: 0.6197402321889027
Mean likelihood for 25 developments in PLA history: 0.5177660040753796


In [39]:
# contacten_onderwerp_no_show // Contact subject client has not shown up for meeting
likelihoods_show = []
likelihoods_noshow = []

# Iterate through each sample in the test set
for index, row in X.iterrows():
    # Convert the row to a DataFrame to ensure it's a DataFrame object
    X_sample = pd.DataFrame(row).transpose()

    # Make predictions for a client that has shown up for meetings
    X_sample_show = X_sample.copy()
    X_sample_show['contacten_onderwerp_no_show'] = 0.0
    y_proba_show = testing_session.run(None, {'X': X_sample_show.values.astype(np.float32)})[1][0]

    # Make predictions for no show client
    X_sample_noshow = X_sample.copy()
    X_sample_noshow['contacten_onderwerp_no_show'] = 1.0
    y_proba_noshow = testing_session.run(None, {'X': X_sample_noshow.values.astype(np.float32)})[1][0]

    # Append the likelihoods for both age groups
    likelihoods_show.append([y_proba_show[1]])  # Probability of class 1 (fraud) for a client that has shown up for meetings
    likelihoods_noshow.append([y_proba_noshow[1]])  # Probability of class 1 (fraud) for no show client

# Convert likelihoods lists to NumPy arrays
likelihoods_show = np.array(likelihoods_show)
likelihoods_noshow = np.array(likelihoods_noshow)

# Calculate the mean likelihoods for each group
mean_likelihood_show = np.mean(likelihoods_show)
mean_likelihood_noshow = np.mean(likelihoods_noshow)

print("Mean likelihood for a client that has shown up for meetings:", mean_likelihood_show)
print("Mean likelihood for no show client:", mean_likelihood_noshow)

Mean likelihood for a client that has shown up for meetings: 0.5422416541430778
Mean likelihood for no show client: 0.6208756809389654


In [40]:
# instrument_ladder_huidig_activering // instrument ladder is currently activated
likelihoods_notactivated = []
likelihoods_activated = []

# Iterate through each sample in the test set
for index, row in X.iterrows():
    # Convert the row to a DataFrame to ensure it's a DataFrame object
    X_sample = pd.DataFrame(row).transpose()

    # Make predictions for a client without an activated instrument ladder
    X_sample_notactivated = X_sample.copy()
    X_sample_notactivated['instrument_ladder_huidig_activering'] = 0.0
    y_proba_notactivated = testing_session.run(None, {'X': X_sample_notactivated.values.astype(np.float32)})[1][0]

    # Make predictions for a client with an activated instrument ladder
    X_sample_activated = X_sample.copy()
    X_sample_activated['instrument_ladder_huidig_activering'] = 1.0
    y_proba_activated = testing_session.run(None, {'X': X_sample_activated.values.astype(np.float32)})[1][0]

    # Append the likelihoods for both age groups
    likelihoods_notactivated.append([y_proba_notactivated[1]])  # Probability of class 1 (fraud) for a client without an activated instrument ladder
    likelihoods_activated.append([y_proba_activated[1]])  # Probability of class 1 (fraud) for a client with an activated instrument ladder

# Convert likelihoods lists to NumPy arrays
likelihoods_notactivated = np.array(likelihoods_notactivated)
likelihoods_activated = np.array(likelihoods_activated)

# Calculate the mean likelihoods for each group
mean_likelihood_notactivated = np.mean(likelihoods_notactivated)
mean_likelihood_activated = np.mean(likelihoods_activated)

print("Mean likelihood for a client without an activated instrument ladder:", mean_likelihood_notactivated)
print("Mean likelihood for a client with an activated instrument ladder:", mean_likelihood_activated)

Mean likelihood for a client without an activated instrument ladder: 0.6102081514750927
Mean likelihood for a client with an activated instrument ladder: 0.5257431063611184


In [41]:
# instrument_reden_beeindiging_historie_succesvol // successful instrumentation history
likelihoods_not = []
likelihoods_successful = []

# Iterate through each sample in the test set
for index, row in X.iterrows():
    # Convert the row to a DataFrame to ensure it's a DataFrame object
    X_sample = pd.DataFrame(row).transpose()

    # Make predictions for a client without a successful instrumentation history
    X_sample_not = X_sample.copy()
    X_sample_not['instrument_reden_beeindiging_historie_succesvol'] = 0.0
    y_proba_not = testing_session.run(None, {'X': X_sample_not.values.astype(np.float32)})[1][0]

    # Make predictions for a client with a successful instrumentation history
    X_sample_successful = X_sample.copy()
    X_sample_successful['instrument_reden_beeindiging_historie_succesvol'] = 1.0
    y_proba_successful = testing_session.run(None, {'X': X_sample_successful.values.astype(np.float32)})[1][0]

    # Append the likelihoods for both age groups
    likelihoods_not.append([y_proba_not[1]])  # Probability of class 1 (fraud) for a client without a successful instrumentation history
    likelihoods_successful.append([y_proba_successful[1]])  # Probability of class 1 (fraud) for a client with a successful instrumentation history

# Convert likelihoods lists to NumPy arrays
likelihoods_not = np.array(likelihoods_not)
likelihoods_successful = np.array(likelihoods_successful)

# Calculate the mean likelihoods for each group
mean_likelihood_not = np.mean(likelihoods_not)
mean_likelihood_successful = np.mean(likelihoods_successful)

print("Mean likelihood for a client without a successful instrumentation history:", mean_likelihood_not)
print("Mean likelihood for a client with a successful instrumentation history:", mean_likelihood_successful)

Mean likelihood for a client without a successful instrumentation history: 0.6026182460109253
Mean likelihood for a client with a successful instrumentation history: 0.5279079782810907


In [42]:
# Age test
# Initialize variables to store counts
same_predictions_count = 0
total_samples = len(X)

# Iterate through each sample in the test set
for index, row in X.iterrows():
    # Convert the row to a DataFrame to ensure it's a DataFrame object
    X_sample = pd.DataFrame(row).transpose()

    # Make predictions for age 25
    X_sample_25 = X_sample.copy()
    X_sample_25['persoon_leeftijd_bij_onderzoek'] = 25
    y_pred_25 = testing_session.run(None, {'X': X_sample_25.values.astype(np.float32)})

    # Make predictions for age 65
    X_sample_65 = X_sample.copy()
    X_sample_65['persoon_leeftijd_bij_onderzoek'] = 65
    y_pred_65 = testing_session.run(None, {'X': X_sample_65.values.astype(np.float32)})

    # Check if predictions are the same
    if y_pred_25 == y_pred_65:
        same_predictions_count += 1

# Calculate the fraction of cases where the predictions are the same
fraction_same_predictions = same_predictions_count / total_samples

print("Fraction of cases where predictions are the same for age 25 and 65:", fraction_same_predictions)

Fraction of cases where predictions are the same for age 25 and 65: 0.0


In [43]:
# Gender test
# Initialize variables to store counts
same_predictions_count = 0
total_samples = len(X_sample_successful)

likelihoods_men = []
likelihoods_women = []

# Iterate through each sample in the test set
for index, row in X.iterrows():
    # Convert the row to a DataFrame to ensure it's a DataFrame object
    X_sample = pd.DataFrame(row).transpose()

    # Make predictions for men
    X_sample_men = X_sample.copy()
    X_sample_men['persoon_geslacht_vrouw'] = 0.0
    y_pred_men = testing_session.run(None, {'X': X_sample_men.values.astype(np.float32)})

    # Make predictions for women
    X_sample_women = X_sample.copy()
    X_sample_women['persoon_geslacht_vrouw'] = 1.0
    y_pred_women = testing_session.run(None, {'X': X_sample_women.values.astype(np.float32)})
    
    
    # Append the likelihoods for both groups
    likelihoods_men.append([y_pred_men[1][0][1]])
    likelihoods_women.append([y_pred_women[1][0][1]])

    # Check if predictions are the same
    if y_pred_men == y_pred_women:
        same_predictions_count += 1

# Calculate the fraction of cases where the predictions are the same
fraction_same_predictions = same_predictions_count / total_samples

# Convert likelihoods lists to NumPy arrays
likelihoods_men = np.array(likelihoods_men)
likelihoods_women = np.array(likelihoods_women)

# Calculate the mean likelihoods for each group
mean_likelihood_men = np.mean(likelihoods_men)
mean_likelihood_women = np.mean(likelihoods_women)

print("Fraction of cases where predictions are the same for men and women:", fraction_same_predictions)
print("Mean likelihood for a man:", mean_likelihood_men)
print("Mean likelihood for a woman:", mean_likelihood_women)


Fraction of cases where predictions are the same for men and women: 0.0
Mean likelihood for a man: 0.36349982588121466
Mean likelihood for a woman: 0.861941528171039


In [44]:
# Language test
# Initialize variables to store counts
same_predictions_count = 0
total_samples = len(X)

# Iterate through each sample in the test set
for index, row in X.iterrows():
    # Convert the row to a DataFrame to ensure it's a DataFrame object
    X_sample = pd.DataFrame(row).transpose()

    # Make predictions for not
    X_sample_not = X_sample.copy()
    X_sample_not['persoonlijke_eigenschappen_spreektaal_anders'] = 0.0
    y_pred_not = testing_session.run(None, {'X': X_sample_not.values.astype(np.float32)})

    # Make predictions for other
    X_sample_other = X_sample.copy()
    X_sample_other['persoonlijke_eigenschappen_spreektaal_anders'] = 1.0
    y_pred_other = testing_session.run(None, {'X': X_sample_other.values.astype(np.float32)})

    # Check if predictions are the same
    if y_pred_not == y_pred_other:
        same_predictions_count += 1

# Calculate the fraction of cases where the predictions are the same
fraction_same_predictions = same_predictions_count / total_samples

print("Fraction of cases where predictions are the same for dutch speakers and non-dutch speakers:", fraction_same_predictions)

Fraction of cases where predictions are the same for dutch speakers and non-dutch speakers: 1.0


In [45]:
# Children test
# Initialize variables to store counts
same_predictions_count = 0
total_samples = len(X)

# Iterate through each sample in the test set
for index, row in X.iterrows():
    # Convert the row to a DataFrame to ensure it's a DataFrame object
    X_sample = pd.DataFrame(row).transpose()

    # Make predictions for not
    X_sample_not = X_sample.copy()
    X_sample_not['relatie_kind_heeft_kinderen'] = 0.0
    y_pred_not = testing_session.run(None, {'X': X_sample_not.values.astype(np.float32)})

    # Make predictions for other
    X_sample_children = X_sample.copy()
    X_sample_children['relatie_kind_heeft_kinderen'] = 1.0
    y_pred_children = testing_session.run(None, {'X': X_sample_children.values.astype(np.float32)})

    # Check if predictions are the same
    if y_pred_not == y_pred_children:
        same_predictions_count += 1

# Calculate the fraction of cases where the predictions are the same
fraction_same_predictions = same_predictions_count / total_samples

print("Fraction of cases where predictions are the same for people with or without children:", fraction_same_predictions)

Fraction of cases where predictions are the same for people with or without children: 1.0
