The code to process German Credit data, edit/train models, and perform adversarial debiasing. 

Necessary libraries for the notebook.

In [2]:
import os
import tensorflow as tf
import tf2onnx
from tensorflow.keras.models import load_model, Model
from tensorflow.keras import layers, models
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score
from keras.utils import to_categorical
from scipy.io import savemat
import numpy as np
import pandas as pd
import warnings
import csv

2024-07-11 13:59:18.770131: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Data Preprocessing

In [3]:
# Custom preprocessing function for the German dataset
def german_custom_preprocessing(df):
    def group_credit_hist(x):
        if x in ['A30', 'A31', 'A32']:
            return 'None/Paid'
        elif x == 'A33':
            return 'Delay'
        elif x == 'A34':
            return 'Other'
        else:
            return 'NA'

    def group_employ(x):
        if x == 'A71':
            return 'Unemployed'
        elif x in ['A72', 'A73']:
            return '1-4 years'
        elif x in ['A74', 'A75']:
            return '4+ years'
        else:
            return 'NA'

    def group_savings(x):
        if x in ['A61', 'A62']:
            return '<500'
        elif x in ['A63', 'A64']:
            return '500+'
        elif x == 'A65':
            return 'Unknown/None'
        else:
            return 'NA'

    def group_status(x):
        if x in ['A11', 'A12']:
            return '<200'
        elif x in ['A13']:
            return '200+'
        elif x == 'A14':
            return 'None'
        else:
            return 'NA'

    status_map = {'A91': 1, 'A93': 1, 'A94': 1, 'A92': 0, 'A95': 0}  # 1: 'male'
    df['sex'] = df['personal_status'].replace(status_map)

    df['credit_history'] = df['credit_history'].apply(lambda x: group_credit_hist(x))
    df['savings'] = df['savings'].apply(lambda x: group_savings(x))
    df['employment'] = df['employment'].apply(lambda x: group_employ(x))
    df['status'] = df['status'].apply(lambda x: group_status(x))

    df.credit.replace([1, 2], [1, 0], inplace=True)

    return df

def load_german():
    filepath = '../data/german/german.data'
    column_names = ['status', 'month', 'credit_history', 'purpose', 'credit_amount', 'savings', 'employment',
                    'investment_as_income_percentage', 'personal_status', 'other_debtors', 'residence_since', 
                    'property', 'age', 'installment_plans', 'housing', 'number_of_credits', 'skill_level', 
                    'people_liable_for', 'telephone', 'foreign_worker', 'credit']
    na_values = []
    df = pd.read_csv(filepath, sep=' ', header=None, names=column_names, na_values=na_values)
    
    df = german_custom_preprocessing(df)
    feat_to_drop = ['personal_status']
    df = df.drop(feat_to_drop, axis=1)
    
    # Encode categorical features
    cat_feat = ['status', 'credit_history', 'purpose', 'savings', 'employment', 'other_debtors', 'property', 
                'installment_plans', 'housing', 'skill_level', 'telephone', 'foreign_worker']
    for col in cat_feat:
        df[col] = LabelEncoder().fit_transform(df[col])
    
    # Encode the target variable
    label_name = 'credit'
    
    X = df.drop(labels=[label_name], axis=1, inplace=False)
    y = df[label_name]
    
    # Extract the protected attribute ('sex')
    protected_attribute = X['sex'].values
    
    # Split the data into training and testing sets
    seed = 42
    X_train, X_test, y_train, y_test, protected_train, protected_test = train_test_split(
        X, y, protected_attribute, test_size=0.15, random_state=seed
    )
    
    # One-hot encode the labels
    y_train = to_categorical(y_train, num_classes=2)
    y_test = to_categorical(y_test, num_classes=2)
    
    return X_train, X_test, y_train, y_test, protected_train, protected_test

# Saves data for use in verification
def load_and_save_german_data():
    X_train, X_test, y_train, y_test, _, _ = load_german()
    
    # Scaling numerical features with MinMaxScaler
    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Prepare data dictionary to save as .mat file
    data_dict = {
        'X': X_test, 
        'y': y_test   
    }
    
    # Save to .mat file for use in MATLAB
    savemat("./processed_data/german_data.mat", data_dict)
    print("Data saved to german_data.mat")

    return X_train, X_test, y_train, y_test

### Model Editing

Method to save the models as onnx files for verification. 

In [4]:
# Function to save the model as ONNX format
def save_model_onnx(model, input_shape, onnx_file_path):
    # Create a dummy input tensor with the correct input shape (batch_size, input_shape)
    dummy_input = tf.random.normal([1] + list(input_shape))

    # Convert the model to ONNX
    model_proto, external_tensor_storage = tf2onnx.convert.from_keras(model, 
                                                                      input_signature=(tf.TensorSpec(shape=[None] + list(input_shape), dtype=tf.float32),),
                                                                      opset=13)
    
    # Save the ONNX model to the specified path
    with open(onnx_file_path, "wb") as f:
        f.write(model_proto.SerializeToString())
    
    print(f"Model has been saved in ONNX format at {onnx_file_path}")

Change the models so they are able to be used in FairNNV. FairNNV cannot handle sigmoid so shift to softmax and adjust final layers. 

In [5]:
# Function to modify a model for multiclass classification
def modify_model_for_multiclass(model_path, num_classes):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        model = load_model(model_path)

    # Create a new input layer with the correct shape
    new_input = tf.keras.layers.Input(shape=(20,))
    x = new_input

    # Transfer the layers except the last one
    for layer in model.layers[:-1]:
        x = layer(x)

    # Create a new output layer
    output = tf.keras.layers.Dense(num_classes, activation='softmax', name='new_output')(x)
    
    # Create a new model
    new_model = tf.keras.models.Model(inputs=new_input, outputs=output)
    
    return new_model

# Ensure the save directories exist
model_dir = './german/german_h5'
save_dir = './german/german_keras'
onnx_save_dir = './german/german_onnx'
num_classes = 2

if not os.path.exists(save_dir):
    os.makedirs(save_dir)
if not os.path.exists(onnx_save_dir):
    os.makedirs(onnx_save_dir)

# Modify each model in the directory to remove sigmoid
for model_file in os.listdir(model_dir):
    if model_file.endswith('.h5'):
        model_path = os.path.join(model_dir, model_file)
        new_model = modify_model_for_multiclass(model_path, num_classes)
        
        # Update the model's loss function
        new_model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
        
        # Save the modified model
        save_path = os.path.join(save_dir, model_file.replace('.h5', '.keras'))
        new_model.save(save_path)




Re-train models. 

In [6]:
# Load and preprocess the German dataset
X_train, X_test, y_train, y_test = load_and_save_german_data()

for model_file in os.listdir(save_dir):
    if model_file.endswith('.keras'):
        model_path = os.path.join(save_dir, model_file)
        
        try:
            # Load the modified model
            print(f"Loading model {model_file}")
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=UserWarning)
                model = load_model(model_path)

            # Reinitialize the optimizer
            model.compile(
                optimizer=Adam(),
                loss='categorical_crossentropy', 
                metrics=['accuracy']
            )

            # Fit the model
            print(f"Training model {model_file}")
            history = model.fit(X_train, y_train, epochs=50, validation_split=0.2)

            # Evaluate the model
            y_pred = model.predict(X_test)
            y_pred_classes = np.argmax(y_pred, axis=1)
            accuracy = accuracy_score(np.argmax(y_test, axis=1), y_pred_classes)

            print(f"Model {model_file} - Accuracy: {accuracy}")

            # Save the retrained model
            model.save(model_path)
            print(f"Model {model_file} retrained and saved successfully.")

            # Save the model as ONNX
            onnx_save_path = os.path.join(onnx_save_dir, model_file.replace('.keras', '.onnx'))
            save_model_onnx(model, (20,), onnx_save_path)

        except Exception as e:
            print(f"Failed to process {model_file}. Error: {e}")

Data saved to german_data.mat
Loading model GC-1.keras
Training model GC-1.keras
Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.7007 - loss: 0.6246 - val_accuracy: 0.6941 - val_loss: 0.6084
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7253 - loss: 0.5697 - val_accuracy: 0.7059 - val_loss: 0.5946
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7239 - loss: 0.5620 - val_accuracy: 0.7059 - val_loss: 0.5841
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6937 - loss: 0.5689 - val_accuracy: 0.6941 - val_loss: 0.5763
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7141 - loss: 0.5514 - val_accuracy: 0.6824 - val_loss: 0.5696
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7286 - loss: 0.5438 -

2024-07-11 14:00:13.771781: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-07-11 14:00:13.771873: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-07-11 14:00:13.790884: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-07-11 14:00:13.791012: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.6996 - loss: 0.5965 - val_accuracy: 0.7000 - val_loss: 0.5900
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7115 - loss: 0.5607 - val_accuracy: 0.6882 - val_loss: 0.5736
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7291 - loss: 0.5471 - val_accuracy: 0.6941 - val_loss: 0.5666
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7305 - loss: 0.5354 - val_accuracy: 0.6941 - val_loss: 0.5623
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7278 - loss: 0.5379 - val_accuracy: 0.6882 - val_loss: 0.5606
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7211 - loss: 0.5437 - val_accuracy: 0.7000 - val_loss: 0.5577
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

2024-07-11 14:00:17.397107: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-07-11 14:00:17.397195: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-07-11 14:00:17.419735: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-07-11 14:00:17.419857: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.3110 - loss: 5.7657 - val_accuracy: 0.3059 - val_loss: 5.2232
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3000 - loss: 5.2054 - val_accuracy: 0.3059 - val_loss: 4.5671
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3022 - loss: 4.5166 - val_accuracy: 0.3059 - val_loss: 3.9421
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.2780 - loss: 4.0788 - val_accuracy: 0.3059 - val_loss: 3.3576
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3061 - loss: 3.3241 - val_accuracy: 0.3118 - val_loss: 2.8127
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3114 - loss: 2.6889 - val_accuracy: 0.3000 - val_loss: 2.3056
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━







[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Model GC-3.keras - Accuracy: 0.7066666666666667
Model GC-3.keras retrained and saved successfully.
Model has been saved in ONNX format at ./german/german_onnx/GC-3.onnx
Loading model GC-4.keras
Training model GC-4.keras
Epoch 1/50


2024-07-11 14:00:21.273456: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-07-11 14:00:21.273539: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-07-11 14:00:21.290779: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-07-11 14:00:21.290867: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6395 - loss: 0.6911 - val_accuracy: 0.6941 - val_loss: 0.6855
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7205 - loss: 0.6826 - val_accuracy: 0.6941 - val_loss: 0.6786
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6572 - loss: 0.6800 - val_accuracy: 0.6941 - val_loss: 0.6724
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7057 - loss: 0.6692 - val_accuracy: 0.6941 - val_loss: 0.6660
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7050 - loss: 0.6630 - val_accuracy: 0.6941 - val_loss: 0.6608
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7050 - loss: 0.6572 - val_accuracy: 0.6941 - val_loss: 0.6556
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

2024-07-11 14:00:24.859838: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-07-11 14:00:24.859921: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-07-11 14:00:24.881069: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-07-11 14:00:24.881197: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6471 - loss: 0.6861 - val_accuracy: 0.6941 - val_loss: 0.6525
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7209 - loss: 0.6336 - val_accuracy: 0.6941 - val_loss: 0.6159
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6803 - loss: 0.6177 - val_accuracy: 0.6941 - val_loss: 0.6136
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7208 - loss: 0.5849 - val_accuracy: 0.6941 - val_loss: 0.6088
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6845 - loss: 0.6147 - val_accuracy: 0.6941 - val_loss: 0.6067
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7212 - loss: 0.5748 - val_accuracy: 0.6941 - val_loss: 0.6017
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

2024-07-11 14:00:29.906391: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-07-11 14:00:29.906478: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-07-11 14:00:29.940395: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-07-11 14:00:29.940483: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session


### Adversairal Debiasing 

In [48]:
def save_metrics_to_csv(filename, model_file, model_name, classification_accuracy, balanced_accuracy, disparate_impact, equal_opportunity_difference, average_odds_difference,precision,recall,f1):
    # Check if the file exists to write the header only once
    file_exists = os.path.isfile(filename)

    with open(filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        if not file_exists:
            # Write the header if the file does not exist
            writer.writerow(['Model File', 'Model', 'Classification Accuracy', 'Balanced Accuracy', 'Disparate Impact', 'Equal Opportunity Difference', 'Average Odds Difference', 'Precision', 'Recall','F1'])
        
        # Write the metrics
        writer.writerow([model_file, model_name, classification_accuracy, balanced_accuracy, disparate_impact, equal_opportunity_difference, average_odds_difference, precision, recall, f1])

Various metrics for evaluation including accuracy and fairness.

In [52]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Metrics calculation functions
def precision(y_true, y_pred, average='binary'):
    return precision_score(y_true, y_pred, average=average)

def recall(y_true, y_pred, average='binary'):
    return recall_score(y_true, y_pred, average=average)

def f1(y_true, y_pred, average='binary'):
    return f1_score(y_true, y_pred, average=average)

def classification_accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

def balanced_accuracy(y_true, y_pred):
    classes = np.unique(y_true)
    recall_scores = []
    for cls in classes:
        true_positives = np.sum((y_true == cls) & (y_pred == cls))
        possible_positives = np.sum(y_true == cls)
        recall_scores.append(true_positives / possible_positives)
    return np.mean(recall_scores)

def disparate_impact(y_true, y_pred, protected_attribute):
    privileged = protected_attribute == 1
    unprivileged = protected_attribute == 0
    if np.sum(privileged) == 0 or np.sum(unprivileged) == 0:
        return np.nan
    privileged_outcome = np.mean(y_pred[privileged]) if np.sum(privileged) > 0 else np.nan
    unprivileged_outcome = np.mean(y_pred[unprivileged]) if np.sum(unprivileged) > 0 else np.nan
    if privileged_outcome == 0:
        return np.nan  
    return unprivileged_outcome / privileged_outcome

def equal_opportunity_difference(y_true, y_pred, protected_attribute):
    privileged = protected_attribute == 1
    unprivileged = protected_attribute == 0
    true_positive_rate_privileged = np.sum((y_true[privileged] == 1) & (y_pred[privileged] == 1)) / np.sum(y_true[privileged] == 1)
    true_positive_rate_unprivileged = np.sum((y_true[unprivileged] == 1) & (y_pred[unprivileged] == 1)) / np.sum(y_true[unprivileged] == 1)
    return true_positive_rate_unprivileged - true_positive_rate_privileged

def average_odds_difference(y_true, y_pred, protected_attribute):
    privileged = protected_attribute == 1
    unprivileged = protected_attribute == 0
    tpr_privileged = np.sum((y_true[privileged] == 1) & (y_pred[privileged] == 1)) / np.sum(y_true[privileged] == 1)
    tpr_unprivileged = np.sum((y_true[unprivileged] == 1) & (y_pred[unprivileged] == 1)) / np.sum(y_true[unprivileged] == 1)
    fpr_privileged = np.sum((y_true[privileged] == 0) & (y_pred[privileged] == 1)) / np.sum(y_true[privileged] == 0)
    fpr_unprivileged = np.sum((y_true[unprivileged] == 0) & (y_pred[unprivileged] == 1)) / np.sum(y_true[unprivileged] == 0)
    average_odds_privileged = (tpr_privileged + fpr_privileged) / 2
    average_odds_unprivileged = (tpr_unprivileged + fpr_unprivileged) / 2
    return average_odds_unprivileged - average_odds_privileged

In [55]:
# Adversary model definition
def build_adversary_model(input_shape):
    adversary_input = layers.Input(shape=input_shape)
    x = layers.Dense(64, activation='relu')(adversary_input)
    x = layers.Dense(32, activation='relu')(x)
    adversary_output = layers.Dense(1, activation='sigmoid')(x)
    adversary_model = models.Model(inputs=adversary_input, outputs=adversary_output)
    adversary_model.compile(optimizer='adam', loss='binary_crossentropy')
    return adversary_model

In [None]:
# Load and preprocess the data
X_train, X_test, y_train, y_test, protected_train, protected_test = load_german()

# Standardize the features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Directory paths
input_directory = './german/german_keras'
output_directory = './german/german_debiased_onnx'

# Ensure the output directory exists
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

metrics_filename = './model_metrics/german_model_metrics.csv'

# Iterate over all .keras files in the input directory to convert to ONNX file
for file in os.listdir(input_directory):
    if file.endswith('.keras'):
        # Full path to the current model file
        input_path = os.path.join(input_directory, file)
        output_path = os.path.join(output_directory, file.replace('.keras', '.onnx'))

        try:
            # Load the model
            print(f"Loading model from {input_path}")
            classifier_model = load_model(input_path)

            # Ensure the model is compiled with the correct optimizer and metrics
            classifier_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

            # Print metrics for plain model
            y_test_pred_plain = classifier_model.predict(X_test).argmax(axis=1)
            y_test_true = y_test.argmax(axis=1)

            plain_classification_accuracy = classification_accuracy(y_test_true, y_test_pred_plain)
            plain_balanced_accuracy = balanced_accuracy(y_test_true, y_test_pred_plain)
            plain_disparate_impact = disparate_impact(y_test_true, y_test_pred_plain, protected_test)
            plain_equal_opportunity_difference = equal_opportunity_difference(y_test_true, y_test_pred_plain, protected_test)
            plain_average_odds_difference = average_odds_difference(y_test_true, y_test_pred_plain, protected_test)
            plain_precision = precision(y_test_true, y_test_pred_plain, average='macro')  # Use 'macro' for multi-class
            plain_recall = recall(y_test_true, y_test_pred_plain, average='macro')        # Use 'macro' for multi-class
            plain_f1 = f1(y_test_true, y_test_pred_plain, average='macro')                # Use 'macro' for multi-class

            save_metrics_to_csv(metrics_filename, file, 'Plain Model', plain_classification_accuracy, plain_balanced_accuracy, plain_disparate_impact, plain_equal_opportunity_difference, plain_average_odds_difference, plain_precision, plain_recall, plain_f1)
            
            # Build and compile the adversary model
            adversary_model = build_adversary_model(classifier_model.output_shape[1:])

            # Training parameters
            num_epochs = 50
            batch_size = 128
            learning_rate = 0.001
            adversary_loss_weight = 0.7

            # Optimizers
            classifier_optimizer = tf.keras.optimizers.Adam(learning_rate)
            adversary_optimizer = tf.keras.optimizers.Adam(learning_rate)

            # Loss functions
            classification_loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
            adversary_loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False)

           # Training loop
            for epoch in range(num_epochs):
                # Shuffle the training data
                indices = np.arange(X_train.shape[0])
                np.random.shuffle(indices)
                
                # Mini-batch training
                for start in range(0, X_train.shape[0], batch_size):
                    end = min(start + batch_size, X_train.shape[0])
                    batch_indices = indices[start:end]
                    
                    X_batch = X_train[batch_indices]
                    y_batch = y_train[batch_indices]
                    protected_batch = protected_train[batch_indices].reshape(-1, 1)
                    
                    with tf.GradientTape() as classifier_tape:
                        # Forward pass through the classifier
                        classifier_predictions = classifier_model(X_batch, training=True)
                        
                        # Forward pass through the adversary with no gradient accumulation
                        adversary_predictions = adversary_model(classifier_predictions, training=False)
                        
                        # Compute losses
                        classification_loss = classification_loss_fn(y_batch, classifier_predictions)
                        adversary_loss = adversary_loss_fn(protected_batch, adversary_predictions)
                        total_loss = classification_loss - adversary_loss_weight * adversary_loss
                    
                    # Compute gradients and update classifier weights
                    classifier_gradients = classifier_tape.gradient(total_loss, classifier_model.trainable_variables)
                    classifier_optimizer.apply_gradients(zip(classifier_gradients, classifier_model.trainable_variables))
                    
                    with tf.GradientTape() as adversary_tape:
                        # Forward pass through the classifier
                        classifier_predictions = classifier_model(X_batch, training=True)
                        
                        # Forward pass through the adversary
                        adversary_predictions = adversary_model(classifier_predictions, training=True)
                        
                        # Compute adversary loss
                        adversary_loss = adversary_loss_fn(protected_batch, adversary_predictions)
                    
                    # Compute gradients and update adversary weights
                    adversary_gradients = adversary_tape.gradient(adversary_loss, adversary_model.trainable_variables)
                    adversary_optimizer.apply_gradients(zip(adversary_gradients, adversary_model.trainable_variables))
    
                print(f"Epoch {epoch + 1}/{num_epochs}, Classification Loss: {classification_loss.numpy()}, Adversary Loss: {adversary_loss.numpy()}")

            
            # Predictions for debiased model
            y_test_pred_debiased = classifier_model.predict(X_test).argmax(axis=1)

            debiased_classification_accuracy = classification_accuracy(y_test_true, y_test_pred_debiased)
            debiased_balanced_accuracy = balanced_accuracy(y_test_true, y_test_pred_debiased)
            debiased_disparate_impact = disparate_impact(y_test_true, y_test_pred_debiased, protected_test)
            debiased_equal_opportunity_difference = equal_opportunity_difference(y_test_true, y_test_pred_debiased, protected_test)
            debiased_average_odds_difference = average_odds_difference(y_test_true, y_test_pred_debiased, protected_test)
            debiased_precision = precision(y_test_true, y_test_pred_plain, average='macro')  # Use 'macro' for multi-class
            debiased_recall = recall(y_test_true, y_test_pred_plain, average='macro')        # Use 'macro' for multi-class
            debiased_f1 = f1(y_test_true, y_test_pred_plain, average='macro')                # Use 'macro' for multi-class

            save_metrics_to_csv(metrics_filename, file, 'Debiased Model', debiased_classification_accuracy, debiased_balanced_accuracy, debiased_disparate_impact, debiased_equal_opportunity_difference, debiased_average_odds_difference, debiased_precision, debiased_recall, debiased_f1)
            
            # Save the debiased model as ONNX
            input_shape = (20,)  # Adjust the input shape based on your model's expected input
            save_model_onnx(classifier_model, input_shape, output_path)

        except Exception as e:
            print(f"Failed to convert {file}. Error: {e}")

## Adversarial Debiasing Process For Multiple Runs

In [None]:
import csv

def save_metrics_to_csv(filename, model_name, model_type, means, stds):
    headers = [
        'model_name', 'model_type', 'metric', 'mean', 'std_dev'
    ]
    with open(filename, 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        if os.path.getsize(filename) == 0:
            writer.writerow(headers)
        for metric, mean_value in means.items():
            writer.writerow([model_name, model_type, metric, mean_value, stds[metric]])

# Load and preprocess the data
X_train, X_test, y_train, y_test, protected_train, protected_test = load_german()

# Standardize the features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Directory paths
input_directory = './german/german_keras'
output_directory = './german/german_debiased_onnx'

# Ensure the output directory exists
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

metrics_filename = './model_metrics/german_model_metrics.csv'

# Number of runs
num_runs = 5

# Iterate over all .keras files in the input directory to convert to ONNX file
for file in os.listdir(input_directory):
    if file.endswith('.keras'):
        # Full path to the current model file
        input_path = os.path.join(input_directory, file)
        output_path = os.path.join(output_directory, file.replace('.keras', '.onnx'))

        try:
            plain_metrics = {
                'classification_accuracy': [],
                'balanced_accuracy': [],
                'disparate_impact': [],
                'equal_opportunity_difference': [],
                'average_odds_difference': [],
                'precision': [],
                'recall': [],
                'f1': []
            }

            debiased_metrics = {
                'classification_accuracy': [],
                'balanced_accuracy': [],
                'disparate_impact': [],
                'equal_opportunity_difference': [],
                'average_odds_difference': [],
                'precision': [],
                'recall': [],
                'f1': []
            }
           
            # Inside the loop for each .keras file
            for run in range(num_runs):
                print(f"Run {run + 1}/{num_runs}")

                # Random seed for variability
                np.random.seed(run)
                tf.random.set_seed(run)

                # Shuffle the training data
                indices = np.arange(X_train.shape[0])
                np.random.shuffle(indices)
                X_train_shuffled = X_train[indices]
                y_train_shuffled = y_train[indices]
                protected_train_shuffled = protected_train[indices]

                # Load the model
                classifier_model = load_model(input_path)

                # Compile the model
                classifier_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

                # Train the model with shuffled data
                classifier_model.fit(X_train_shuffled, y_train_shuffled, epochs=10, batch_size=128, verbose=0)

                # Train and evaluate the plain model
                y_test_pred_plain = classifier_model.predict(X_test).argmax(axis=1)
                y_test_true = y_test.argmax(axis=1)

                plain_metrics['classification_accuracy'].append(classification_accuracy(y_test_true, y_test_pred_plain))
                plain_metrics['balanced_accuracy'].append(balanced_accuracy(y_test_true, y_test_pred_plain))
                plain_metrics['disparate_impact'].append(disparate_impact(y_test_true, y_test_pred_plain, protected_test))
                plain_metrics['equal_opportunity_difference'].append(equal_opportunity_difference(y_test_true, y_test_pred_plain, protected_test))
                plain_metrics['average_odds_difference'].append(average_odds_difference(y_test_true, y_test_pred_plain, protected_test))
                plain_metrics['precision'].append(precision_score(y_test_true, y_test_pred_plain, average='macro', zero_division=1))
                plain_metrics['recall'].append(recall_score(y_test_true, y_test_pred_plain, average='macro'))
                plain_metrics['f1'].append(f1_score(y_test_true, y_test_pred_plain, average='macro'))

                # Build and compile the adversary model
                adversary_model = build_adversary_model(classifier_model.output_shape[1:])

                # Training parameters
                num_epochs = 50
                batch_size = 128
                learning_rate = 0.001
                adversary_loss_weight = 0.7

                # Optimizers
                classifier_optimizer = tf.keras.optimizers.Adam(learning_rate)
                adversary_optimizer = tf.keras.optimizers.Adam(learning_rate)

                # Loss functions
                classification_loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
                adversary_loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False)

                # Training loop
                for epoch in range(num_epochs):
                    # Shuffle the training data
                    indices = np.arange(X_train_shuffled.shape[0])
                    np.random.shuffle(indices)
                    
                    # Mini-batch training
                    for start in range(0, X_train_shuffled.shape[0], batch_size):
                        end = min(start + batch_size, X_train_shuffled.shape[0])
                        batch_indices = indices[start:end]
                        
                        X_batch = X_train_shuffled[batch_indices]
                        y_batch = y_train_shuffled[batch_indices]
                        protected_batch = protected_train_shuffled[batch_indices].reshape(-1, 1)
                        
                        with tf.GradientTape() as classifier_tape:
                            # Forward pass through the classifier
                            classifier_predictions = classifier_model(X_batch, training=True)
                            
                            # Forward pass through the adversary with no gradient accumulation
                            adversary_predictions = adversary_model(classifier_predictions, training=False)
                            
                            # Compute losses
                            classification_loss = classification_loss_fn(y_batch, classifier_predictions)
                            adversary_loss = adversary_loss_fn(protected_batch, adversary_predictions)
                            total_loss = classification_loss - adversary_loss_weight * adversary_loss
                        
                        # Compute gradients and update classifier weights
                        classifier_gradients = classifier_tape.gradient(total_loss, classifier_model.trainable_variables)
                        classifier_optimizer.apply_gradients(zip(classifier_gradients, classifier_model.trainable_variables))
                        
                        with tf.GradientTape() as adversary_tape:
                            # Forward pass through the classifier
                            classifier_predictions = classifier_model(X_batch, training=True)
                            
                            # Forward pass through the adversary
                            adversary_predictions = adversary_model(classifier_predictions, training=True)
                            
                            # Compute adversary loss
                            adversary_loss = adversary_loss_fn(protected_batch, adversary_predictions)
                        
                        # Compute gradients and update adversary weights
                        adversary_gradients = adversary_tape.gradient(adversary_loss, adversary_model.trainable_variables)
                        adversary_optimizer.apply_gradients(zip(adversary_gradients, adversary_model.trainable_variables))

                    print(f"Epoch {epoch + 1}/{num_epochs}, Classification Loss: {classification_loss.numpy()}, Adversary Loss: {adversary_loss.numpy()}")

                y_test_pred_debiased = classifier_model.predict(X_test).argmax(axis=1)

                debiased_metrics['classification_accuracy'].append(classification_accuracy(y_test_true, y_test_pred_debiased))
                debiased_metrics['balanced_accuracy'].append(balanced_accuracy(y_test_true, y_test_pred_debiased))
                debiased_metrics['disparate_impact'].append(disparate_impact(y_test_true, y_test_pred_debiased, protected_test))
                debiased_metrics['equal_opportunity_difference'].append(equal_opportunity_difference(y_test_true, y_test_pred_debiased, protected_test))
                debiased_metrics['average_odds_difference'].append(average_odds_difference(y_test_true, y_test_pred_debiased, protected_test))
                debiased_metrics['precision'].append(precision_score(y_test_true, y_test_pred_debiased, average='macro', zero_division=1))
                debiased_metrics['recall'].append(recall_score(y_test_true, y_test_pred_debiased, average='macro'))
                debiased_metrics['f1'].append(f1_score(y_test_true, y_test_pred_debiased, average='macro'))

            # Calculate mean and std for plain metrics
            plain_means = {key: np.mean(values) for key, values in plain_metrics.items()}
            plain_stds = {key: np.std(values) for key, values in plain_metrics.items()}

            # Calculate mean and std for debiased metrics
            debiased_means = {key: np.mean(values) for key, values in debiased_metrics.items()}
            debiased_stds = {key: np.std(values) for key, values in debiased_metrics.items()}

            # Save metrics to CSV
            save_metrics_to_csv(metrics_filename, file, 'Plain Model', plain_means, plain_stds)
            save_metrics_to_csv(metrics_filename, file, 'Debiased Model', debiased_means, debiased_stds)

            # Save the debiased model as ONNX
            input_shape = (20,)  # Adjust the input shape based on your model's expected input
            save_model_onnx(classifier_model, input_shape, output_path)

        except Exception as e:
            print(f"Failed to convert {file}. Error: {e}")

### Workshopping Adversarial Debiasing Code

In [27]:
# # Adversary model definition
# def build_adversary_model(input_shape):
#     adversary_input = layers.Input(shape=input_shape)
#     x = layers.Dense(64, activation='relu')(adversary_input)
#     x = layers.Dense(32, activation='relu')(x)
#     adversary_output = layers.Dense(1, activation='sigmoid')(x)
#     adversary_model = models.Model(inputs=adversary_input, outputs=adversary_output)
#     adversary_model.compile(optimizer='adam', loss='binary_crossentropy')
#     return adversary_model
from tensorflow.keras.models import load_model, Model, Sequential
from tensorflow.keras.layers import Dense, Input

# Function to build the adversary model
def build_adversary_model(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1)  # No activation here, we will use `from_logits=True` in the loss function
    ])
    return model

# Load and preprocess the data
X_train, X_test, y_train, y_test, protected_train, protected_test = load_german()

# Standardize the features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Directory paths
input_directory = './german/german_keras'
output_directory = './german/german_debiased_onnx'

# Ensure the output directory exists
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

metrics_filename = './model_metrics/german_model_metrics.csv'

In [43]:
import os
import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Dense, Input
import numpy as np

def build_adversary_model_from_classifier(classifier_model):
    input_shape = classifier_model.output_shape[1:]  # Get the shape of the classifier's output
    adversary_input = Input(shape=input_shape)
    
    x = adversary_input
    for layer in classifier_model.layers[1:-1]:  # Skip the first and last layers
        if isinstance(layer, Dense):
            x = Dense(layer.units, activation='relu')(x)  # Use relu for hidden layers
    
    adversary_output = Dense(1)(x)  # Final adversary output layer without activation
    adversary_model = Model(inputs=adversary_input, outputs=adversary_output)
    
    return adversary_model


for file in os.listdir(input_directory):
    if file.endswith('.keras'):
        input_path = os.path.join(input_directory, file)
        output_path = os.path.join(output_directory, file.replace('.keras', '.onnx'))

        try:
            # Load the classifier model
            classifier_model = load_model(input_path)
            # classifier_model.summary()

            # Modify the classifier model
            input_layer = classifier_model.input
            hidden_layers = classifier_model.layers[:-1]
            new_output_layer = Dense(units=2, name="new_output_layer")(hidden_layers[-1].output)
            modified_classifier_model = Model(inputs=input_layer, outputs=new_output_layer)
            # modified_classifier_model.summary()

            # Print metrics for plain model
            y_test_pred_plain = classifier_model.predict(X_test).argmax(axis=1)
            y_test_true = y_test.argmax(axis=1)
            plain_classification_accuracy = classification_accuracy(y_test_true, y_test_pred_plain)
            plain_balanced_accuracy = balanced_accuracy(y_test_true, y_test_pred_plain)
            plain_disparate_impact = disparate_impact(y_test_true, y_test_pred_plain, protected_test)
            plain_equal_opportunity_difference = equal_opportunity_difference(y_test_true, y_test_pred_plain, protected_test)
            plain_average_odds_difference = average_odds_difference(y_test_true, y_test_pred_plain, protected_test)
            save_metrics_to_csv(metrics_filename, file, 'Plain Model', plain_classification_accuracy, plain_balanced_accuracy, plain_disparate_impact, plain_equal_opportunity_difference, plain_average_odds_difference)

            # Build the adversary model
            adversary_model = build_adversary_model_from_classifier(modified_classifier_model)
            # adversary_model.summary()

            # Training parameters
            num_epochs = 50
            batch_size = 128

            # Loss functions
            classification_loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
            adversary_loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)

            # Parameters
            adversary_loss_weight = 0.9
            classifier_learning_rate = 0.0005
            adversary_learning_rate = 0.001

            # Optimizers
            classifier_optimizer = tf.keras.optimizers.Adam(classifier_learning_rate)
            adversary_optimizer = tf.keras.optimizers.Adam(adversary_learning_rate)

            # Training loop
            for epoch in range(num_epochs):
                indices = np.arange(X_train.shape[0])
                np.random.shuffle(indices)
                
                for start in range(0, X_train.shape[0], batch_size):
                    end = min(start + batch_size, X_train.shape[0])
                    batch_indices = indices[start:end]
                    
                    X_batch = X_train[batch_indices]
                    y_batch = y_train[batch_indices]
                    protected_batch = protected_train[batch_indices].reshape(-1, 1)
                    
                    with tf.GradientTape() as classifier_tape, tf.GradientTape() as adversary_tape:
                        logits = modified_classifier_model(X_batch, training=True)
                        adversary_predictions = adversary_model(logits, training=True)
                        
                        classification_loss = classification_loss_fn(y_batch, logits)
                        adversary_loss = adversary_loss_fn(protected_batch, adversary_predictions)
                        total_loss = classification_loss - adversary_loss_weight * adversary_loss
                    
                    classifier_gradients = classifier_tape.gradient(total_loss, modified_classifier_model.trainable_variables)
                    adversary_gradients = adversary_tape.gradient(adversary_loss, adversary_model.trainable_variables)
                    
                    combined_gradients = []
                    for c_grad, a_grad in zip(classifier_gradients, adversary_gradients):
                        if c_grad is not None and a_grad is not None:
                            proj = tf.reduce_sum(c_grad * a_grad) / (tf.reduce_sum(a_grad * a_grad) + 1e-8)
                            combined_grad = c_grad - proj * a_grad - adversary_loss_weight * a_grad
                            combined_gradients.append(combined_grad)
                        else:
                            combined_gradients.append(c_grad)

                    classifier_optimizer.apply_gradients(zip(combined_gradients, modified_classifier_model.trainable_variables))
                    adversary_optimizer.apply_gradients(zip(adversary_gradients, adversary_model.trainable_variables))
                
                print(f"Epoch {epoch + 1}/{num_epochs}, Classification Loss: {classification_loss.numpy()}, Adversary Loss: {adversary_loss.numpy()}")

                classifier_predictions_test = modified_classifier_model.predict(X_test)
                adversary_predictions_test = adversary_model.predict(classifier_predictions_test).round()
                adversary_accuracy = np.mean(adversary_predictions_test == protected_test.reshape(-1, 1))
                print(f"Adversary accuracy on test set: {adversary_accuracy}")

                y_test_pred_debiased = modified_classifier_model.predict(X_test).argmax(axis=1)
                debiased_classification_accuracy = classification_accuracy(y_test_true, y_test_pred_debiased)
                debiased_balanced_accuracy = balanced_accuracy(y_test_true, y_test_pred_debiased)
                debiased_disparate_impact = disparate_impact(y_test_true, y_test_pred_debiased, protected_test)
                debiased_equal_opportunity_difference = equal_opportunity_difference(y_test_true, y_test_pred_debiased, protected_test)
                debiased_average_odds_difference = average_odds_difference(y_test_true, y_test_pred_debiased, protected_test)
                save_metrics_to_csv(metrics_filename, file, 'Debiased Model', debiased_classification_accuracy, debiased_balanced_accuracy, debiased_disparate_impact, debiased_equal_opportunity_difference, debiased_average_odds_difference)
                break

        except Exception as e:
            print(f"Failed to convert {file}. Error: {e}")
        break


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


Failed to convert GC-1.keras. Error: {{function_node __wrapped__Mul_device_/job:localhost/replica:0/task:0/device:CPU:0}} Incompatible shapes: [20,50] vs. [2,50] [Op:Mul] name: 


2024-07-12 16:03:52.135386: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: INVALID_ARGUMENT: Incompatible shapes: [20,50] vs. [2,50]
