In [1]:
import pandas as pd
import numpy as np

from fit_adversarial import fit_adversarial_regressor, test_model
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("all_vals_race1_interestrate1.csv")
from sklearn.model_selection import train_test_split

df.drop(
    columns=[
        "IsTestData",
        "IsCalibrateData",
        "v1",
        "istestdata2",
        "iscalibratedata2",
    ],
    inplace=True,
)


df["minority"] = (
    (df["race_dum_Black"] == 1)
    | (df["race_dum_Native Am, Alaska, Hawaii"] == 1)
    | (df["race_dum_White hisp"] == 1))

import tensorflow as tf
print(tf.sysconfig.get_build_info())
print("GPUs Available:", tf.config.list_physical_devices("GPU"))

race_columns = [
    "race_dum_Unknown",
    "race_dum_Asian",
    "race_dum_Black",
    "race_dum_Native Am, Alaska, Hawaii",
    "race_dum_White hisp",
    "race_dum_White_non_hisp",
]

ethnicity_column = "ethnicity_dum_hisp"

features = df.drop(
    columns=["default", "cur_int_rate", "sato", "default_age"] + race_columns + [ethnicity_column],
    axis=1,
)

target = df["cur_int_rate"]


sensitive_features = df[["minority"]].values

import math

features = features.applymap(
    lambda x: x if isinstance(x, (int, float)) and not math.isnan(x) else 0
)

X_train, X_val, y_train, y_val, sensitive_train, sensitive_val = train_test_split(
    features, target, sensitive_features, test_size=0.3, random_state=42
)

  df = pd.read_csv("all_vals_race1_interestrate1.csv")


OrderedDict([('cpu_compiler', 'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.29.30133/bin/HostX64/x64/cl.exe'), ('cuda_compute_capabilities', ['sm_35', 'sm_50', 'sm_60', 'sm_70', 'sm_75', 'compute_80']), ('cuda_version', '64_112'), ('cudart_dll_name', 'cudart64_112.dll'), ('cudnn_dll_name', 'cudnn64_8.dll'), ('cudnn_version', '64_8'), ('is_cuda_build', True), ('is_rocm_build', False), ('is_tensorrt_build', False), ('msvcp_dll_names', 'msvcp140.dll,msvcp140_1.dll'), ('nvcuda_dll_name', 'nvcuda.dll')])
GPUs Available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


  features = features.applymap(


In [2]:
# EarlyStopping-style callback factory compatible with AdversarialFairnessRegressor callbacks API
def make_early_stopping(monitor="val_predictor_loss", min_delta=0.0, patience=5, mode="min", restore_best_weights=True):
    """
    Create a callback that inspects estimator._history at epoch end and stops when
    the monitored metric plateaus (no improvement for `patience` epochs).
    
    Parameters
    ----------
    monitor: str
        One of keys recorded in estimator._history (e.g., 'val_predictor_loss').
    min_delta: float
        Minimum change to qualify as an improvement.
    patience: int
        Number of epochs with no improvement after which training will be stopped.
    mode: str
        'min' to minimize the monitored value, 'max' to maximize.
    restore_best_weights: bool
        If True, restore predictor/adversary weights from the best epoch upon stopping.
    """
    if mode not in ("min", "max"):
        raise ValueError("mode must be 'min' or 'max'")
    state = {
        "best": None,
        "wait": 0,
        "best_weights": None,
        "best_step": 0,
    }
    def cb(self, step, X, y, z, pos_label):
        hist = getattr(self, "_history", None)
        if not hist or monitor not in hist or len(hist[monitor]) == 0:
            # Nothing to monitor yet (likely before first epoch end)
            return False
        current = hist[monitor][-1]
        best = state["best"]
        improved = (current < (best - min_delta)) if (mode == "min" and best is not None) else (current > (best + min_delta)) if (mode == "max" and best is not None) else True
        if improved:
            state["best"] = current
            state["wait"] = 0
            # Save best weights for TF backend (predictor & adversary)
            if restore_best_weights:
                try:
                    p_w = self.backendEngine_.predictor_model.get_weights()
                    a_w = self.backendEngine_.adversary_model.get_weights()
                    state["best_weights"] = (p_w, a_w)
                    state["best_step"] = step
                except Exception:
                    # If backend doesn't support get_weights, ignore
                    pass
        else:
            state["wait"] += 1
            if state["wait"] >= patience:
                if restore_best_weights and state["best_weights"] is not None:
                    try:
                        pw, aw = state["best_weights"]
                        self.backendEngine_.predictor_model.set_weights(pw)
                        self.backendEngine_.adversary_model.set_weights(aw)
                    except Exception:
                        pass
                print(f"EarlyStopping: stopping (monitor={monitor}, best={state['best']}, step={state['best_step']})")
                return True
        return False
    return cb

In [3]:
alphas = [0, 1, 5, 10, 50]
lr_decays = [1, 0.95, 0.9, 0.85, 0.75]
alpha_decays = [1, 0.95, 0.9, 0.85, 0.75]

In [5]:
import os, json, itertools

os.makedirs("models", exist_ok=True)
metrics_path = "models/metrics_results.jsonl"

if not os.path.exists(metrics_path):
    with open(metrics_path, "w") as f:
        pass

for alpha, lr_decay, a_decay in itertools.product(alphas, lr_decays, alpha_decays):
    cb = make_early_stopping(monitor="val_predictor_loss", min_delta=1e-4, patience=4, mode="min", restore_best_weights=True)
    try:
        mitigator, scaler = fit_adversarial_regressor(
            alpha=alpha,
            X_train=X_train,
            X_val=X_val,
            y_train=y_train,
            y_val=y_val,
            sensitive_train=sensitive_train,
            sensitive_val=sensitive_val,
            epochs=10,
            use_skip_connections=False,
            alpha_decay=a_decay,
            learning_rate_decay=lr_decay,
            # callbacks=[cb],
        )
        metrics = test_model(mitigator, scaler, features, target, sensitive_features)
        metrics_record = {
            "alpha": alpha,
            "learning_rate_decay": lr_decay,
            "alpha_decay": a_decay,
            **metrics,
        }
        with open(metrics_path, "a") as f:
            f.write(json.dumps(metrics_record) + "\n")
        print(f"Done: {metrics_record}")
    except Exception as e:
        err_record = {
            "alpha": alpha,
            "learning_rate_decay": lr_decay,
            "alpha_decay": a_decay,
            "error": str(e),
        }
        with open(metrics_path, "a") as f:
            f.write(json.dumps(err_record) + "\n")
        print(f"Failed: {err_record}")

Epoch 1/10, Average adversary loss: 0.501989. Average predictor loss: 0.000867
Current alpha 0, learning_rate 0.001
Validation - Epoch 1/10, Adversary Loss: 5.119798, Predictor Loss: 0.000074
Validation - Epoch 1/10, Adversary Loss: 5.119798, Predictor Loss: 0.000074
Epoch 2/10, Average adversary loss: 0.501339. Average predictor loss: 0.000100
Current alpha 0, learning_rate 0.001
Epoch 2/10, Average adversary loss: 0.501339. Average predictor loss: 0.000100
Current alpha 0, learning_rate 0.001
Validation - Epoch 2/10, Adversary Loss: 5.132249, Predictor Loss: 0.000059
Validation - Epoch 2/10, Adversary Loss: 5.132249, Predictor Loss: 0.000059
Epoch 3/10, Average adversary loss: 0.501357. Average predictor loss: 0.000079
Current alpha 0, learning_rate 0.001
Epoch 3/10, Average adversary loss: 0.501357. Average predictor loss: 0.000079
Current alpha 0, learning_rate 0.001
Validation - Epoch 3/10, Adversary Loss: 5.079553, Predictor Loss: 0.000056
Validation - Epoch 3/10, Adversary Loss:

In [1]:
import json

path = r"C:\Projects\fairlearn-adversarial\models\metrics_results.jsonl"

data = []
with open(path, "r") as f:
    for line in f:
        data.append(json.loads(line))

print(data)

[{'alpha': 0, 'learning_rate_decay': 1, 'alpha_decay': 1, 'mse': 5.4688255356159316e-05, 'rmse': 0.00739515080009592, 'mae': 0.005658382666967845, 'r2': 0.7390588040848548, 'fairness_metrics': {'mean_metrics': {'independence': 0.0031765287921848725, 'separation': 9.807805880815603e-06, 'sufficiency': 0.0013216665718638787}, 'std_metrics': {'independence': 3.651324731483567e-05, 'separation': 2.4656923442328885e-06, 'sufficiency': 2.46969736900718e-05}}}, {'alpha': 0, 'learning_rate_decay': 1, 'alpha_decay': 0.95, 'mse': 5.213397722093475e-05, 'rmse': 0.007220386223806504, 'mae': 0.005393061180550133, 'r2': 0.751246364045667, 'fairness_metrics': {'mean_metrics': {'independence': 0.003600521547647521, 'separation': 1.5137529717443692e-05, 'sufficiency': 0.0009020291195538978}, 'std_metrics': {'independence': 3.952675871360835e-05, 'separation': 2.731046200956052e-06, 'sufficiency': 1.8677739540665e-05}}}, {'alpha': 0, 'learning_rate_decay': 1, 'alpha_decay': 0.9, 'mse': 5.484349568587604

In [4]:
from collections import defaultdict

best_by_alpha = {}

for record in data:
    if "alpha" not in record or "r2" not in record:
        continue  # skip malformed or non-metric records

    a = record["alpha"]
    if a not in best_by_alpha or record["r2"] > best_by_alpha[a]["r2"]:
        best_by_alpha[a] = record

print(best_by_alpha)

{0: {'alpha': 0, 'learning_rate_decay': 0.85, 'alpha_decay': 1, 'mse': 5.072738701088386e-05, 'rmse': 0.007122316126856759, 'mae': 0.005302134637868838, 'r2': 0.7579578111229759, 'fairness_metrics': {'mean_metrics': {'independence': 0.0031981592628612513, 'separation': 1.3542702753040756e-05, 'sufficiency': 0.0013037254526604915}, 'std_metrics': {'independence': 3.667735872724549e-05, 'separation': 2.8744757320123665e-06, 'sufficiency': 2.477864309438256e-05}}}, 1: {'alpha': 1, 'learning_rate_decay': 1, 'alpha_decay': 0.75, 'mse': 5.363942724908498e-05, 'rmse': 0.007323894268016502, 'mae': 0.005419218711344919, 'r2': 0.7440632142417893, 'fairness_metrics': {'mean_metrics': {'independence': 9.549986511460863e-05, 'separation': 0.00936885913795044, 'sufficiency': 0.013716890868230507}, 'std_metrics': {'independence': 6.7320121488598855e-06, 'separation': 8.167177225567291e-05, 'sufficiency': 8.581046152869767e-05}}}, 5: {'alpha': 5, 'learning_rate_decay': 1, 'alpha_decay': 0.9, 'mse': 5.

In [5]:
best_list = list(best_by_alpha.values())

In [6]:
for best in best_list:
    print(best)

{'alpha': 0, 'learning_rate_decay': 0.85, 'alpha_decay': 1, 'mse': 5.072738701088386e-05, 'rmse': 0.007122316126856759, 'mae': 0.005302134637868838, 'r2': 0.7579578111229759, 'fairness_metrics': {'mean_metrics': {'independence': 0.0031981592628612513, 'separation': 1.3542702753040756e-05, 'sufficiency': 0.0013037254526604915}, 'std_metrics': {'independence': 3.667735872724549e-05, 'separation': 2.8744757320123665e-06, 'sufficiency': 2.477864309438256e-05}}}
{'alpha': 1, 'learning_rate_decay': 1, 'alpha_decay': 0.75, 'mse': 5.363942724908498e-05, 'rmse': 0.007323894268016502, 'mae': 0.005419218711344919, 'r2': 0.7440632142417893, 'fairness_metrics': {'mean_metrics': {'independence': 9.549986511460863e-05, 'separation': 0.00936885913795044, 'sufficiency': 0.013716890868230507}, 'std_metrics': {'independence': 6.7320121488598855e-06, 'separation': 8.167177225567291e-05, 'sufficiency': 8.581046152869767e-05}}}
{'alpha': 5, 'learning_rate_decay': 1, 'alpha_decay': 0.9, 'mse': 5.408224006645

In [13]:
from collections import defaultdict
import numbers


def extract_numeric_metrics(record):
    metrics = {}

    # top-level numeric metrics
    for k, v in record.items():
        if isinstance(v, numbers.Number) and k != "alpha":
            metrics[k] = v

    # fairness metrics (nested)
    fm = record.get("fairness_metrics", {})
    mm = fm.get("mean_metrics", {})

    for k, v in mm.items():
        if isinstance(v, numbers.Number):
            metrics[f"fairness_{k}"] = v

    return metrics


# 1. Filter records by r2
filtered = [rec for rec in data if isinstance(rec.get("r2"), numbers.Number) and rec["r2"] > 0.65]


# 2. Group by alpha
groups = defaultdict(list)
for rec in filtered:
    if "alpha" in rec:
        groups[rec["alpha"]].append(rec)


# 3. Compute averages
averages = {}

for alpha, records in groups.items():
    sums = defaultdict(float)
    counts = defaultdict(int)

    for rec in records:
        flat = extract_numeric_metrics(rec)
        for k, v in flat.items():
            sums[k] += v
            counts[k] += 1

    averages[alpha] = {k: sums[k] / counts[k] for k in sums}


# 4. Print sorted output
for alpha in sorted(averages.keys()):
    print(f"alpha={alpha}")
    print(averages[alpha])
    print()

alpha=0
{'learning_rate_decay': 0.8900000000000001, 'alpha_decay': 0.89, 'mse': 5.3692503273633075e-05, 'rmse': 0.007325885374696927, 'mae': 0.0054752259396723645, 'r2': 0.7438099656927214, 'fairness_independence': 0.004233565153942782, 'fairness_separation': 0.0013136306353729035, 'fairness_sufficiency': 0.0015615796408697713}

alpha=1
{'learning_rate_decay': 0.9076923076923078, 'alpha_decay': 0.8538461538461538, 'mse': 5.75881185164457e-05, 'rmse': 0.007585187359774986, 'mae': 0.005676449616069283, 'r2': 0.7252223092815846, 'fairness_independence': 0.011890532491393404, 'fairness_separation': 0.050008932285231365, 'fairness_sufficiency': 0.04353303019881484}

alpha=5
{'learning_rate_decay': 0.8950000000000001, 'alpha_decay': 0.8599999999999998, 'mse': 6.050312065317791e-05, 'rmse': 0.0077681446888222955, 'mae': 0.005782017090694967, 'r2': 0.7113135799081503, 'fairness_independence': 0.024107526662311866, 'fairness_separation': 0.07392973626381882, 'fairness_sufficiency': 0.0578763649