In [1]:
import os 
os.chdir("/home/sermengi/verifywise_dev/verifywise/BiasAndFairnessModule")
os.listdir()

['.venv',
 'src',
 '.gitignore',
 'main.py',
 'configs',
 'README.md',
 'pyproject.toml',
 '.python-version',
 'uv.lock',
 'notebooks',
 'Makefile',
 '.mypy_cache',
 'requirements.txt',
 '.env']

In [None]:
from src.config import ConfigManager
config_manager = ConfigManager()
model_config = config_manager.get_model_config()
model_config

In [None]:
from src.data_loader import DataLoader
from src.model_loader import ModelLoader

In [None]:
dataset_config = config_manager.get_dataset_config()
data_loader = DataLoader(dataset_config)
data_loader.load_data()

In [None]:
sample_prompts = data_loader.get_sample_prompts([0, 1, 2, 3, 4])
sample_prompts

In [None]:
model_loader = ModelLoader(
    model_id=model_config.huggingface.model_id,
    device=model_config.huggingface.device,
    max_new_tokens=model_config.huggingface.max_new_tokens,
    temperature=model_config.huggingface.temperature,
    top_p=model_config.huggingface.top_p,
    system_prompt=model_config.huggingface.system_prompt,
)

In [None]:
model_loader.model_config.system_prompt

In [None]:
formatted_prompt = model_loader._format_prompt(sample_prompts[0])
print(formatted_prompt)

In [None]:
model_loader.predict(sample_prompts[0])

## Inference Module

In [2]:
from src.config import ConfigManager
config_manager = ConfigManager()

In [11]:
from src.inference import ModelInferencePipeline
import pandas as pd
model_inference_pipeline = ModelInferencePipeline()

In [8]:
results = model_inference_pipeline.run_batch_inference()

In [12]:
pd.DataFrame(results)

Unnamed: 0,sample_id,prompt,answer,protected_attributes,prediction
0,0,Given the following demographic information ab...,<=50K,"{'sex': 'Male', 'race': 'White'}","The predicted income is 50,000 or less."
1,1,Given the following demographic information ab...,<=50K,"{'sex': 'Male', 'race': 'White'}","The predicted income is 19 years old, working ..."
2,2,Given the following demographic information ab...,<=50K,"{'sex': 'Female', 'race': 'White'}","The predicted income is 50,000 or less. Theref..."
3,3,Given the following demographic information ab...,<=50K,"{'sex': 'Male', 'race': 'White'}","The predicted income is 50,000 USD or less. Th..."
4,4,Given the following demographic information ab...,<=50K,"{'sex': 'Female', 'race': 'White'}","The predicted income is 50,000 or less, since ..."
5,5,Given the following demographic information ab...,<=50K,"{'sex': 'Female', 'race': 'Black'}","The predicted income is 36 years old, with a w..."
6,6,Given the following demographic information ab...,<=50K,"{'sex': 'Female', 'race': 'White'}","The predicted income is 50,000 or less."
7,7,Given the following demographic information ab...,<=50K,"{'sex': 'Female', 'race': 'White'}","The predicted income is 50,000 USD or less."
8,8,Given the following demographic information ab...,<=50K,"{'sex': 'Male', 'race': 'Other'}","The predicted income is 50,000 or less, based ..."
9,9,Given the following demographic information ab...,<=50K,"{'sex': 'Female', 'race': 'Black'}","The predicted income is 50,000. The input demo..."


## Metric Implementations

In [8]:
import numpy as np
import pandas as pd
from fairlearn.metrics import MetricFrame, false_positive_rate, true_positive_rate
from sklearn.metrics import precision_score, brier_score_loss

In [9]:
y_true = np.array([1, 0, 1, 1, 0, 1, 0, 0, 1, 1])
y_pred = np.array([1, 0, 1, 1, 1, 1, 1, 0, 1, 0])
y_prob = np.clip(np.random.beta(2, 2, size=10), 0.01, 0.99)
sensitive_feature = pd.Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])  # e.g., group A and B

* **Demographic parity**

In [None]:
from fairlearn.metrics import demographic_parity_difference

In [None]:
dp = demographic_parity_difference(y_true, y_pred, sensitive_features=sensitive_feature)
print(dp)

* **Equalized odds**

In [None]:
from fairlearn.metrics import equalized_odds_difference

In [None]:
eod = equalized_odds_difference(y_true, y_pred, sensitive_features=sensitive_feature, agg="worst_case")
print(eod)

* **Predictive Parity**

In [None]:
def ppv(y_true, y_pred):
    return precision_score(y_true, y_pred, zero_division=0.0)

In [None]:
metric_frame = MetricFrame(
    metrics=ppv,
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_feature
)

In [None]:
metric_frame.by_group

In [None]:
metric_frame.difference()

* **Conditional Statistical Parity**

In [None]:
y_pred = np.array([1, 0, 1, 1, 0, 1, 0, 1, 1, 0])
sensitive_feature = pd.Series(["M", "M", "M", "M", "M", "F", "F", "F", "F", "F"])
stratify_feature = pd.Series(["high", "low", "high", "low", "high", "low", "low", "high", "low", "high"])

In [None]:
df = pd.DataFrame({
    "y_pred": y_pred,
    "sensitive": sensitive_feature,
    "stratify": stratify_feature
})

def conditional_statistical_parity(df, pred_col, sensitive_col, stratify_col):
    results = []
    for stratum in df[stratify_col].unique():
        subset = df[df[stratify_col] == stratum]

        # Dummy y_true (same shape as y_pred)
        dummy_y_true = np.zeros_like(subset[pred_col])

        metric_frame = MetricFrame(
            metrics=lambda y_true, y_pred: np.mean(y_pred),
            y_true=dummy_y_true,
            y_pred=subset[pred_col],
            sensitive_features=subset[sensitive_col]
        )
        group_rates = metric_frame.by_group
        disparity = group_rates.max() - group_rates.min()
        results.append((stratum, group_rates.to_dict(), disparity))
    return results

In [None]:
csp_results = conditional_statistical_parity(df, "y_pred", "sensitive", "stratify")
for stratum, rates, disparity in csp_results:
    print(f"\nStratum: {stratum}")
    print("Group-wise selection rates:", rates)
    print("Disparity (max - min):", disparity)

* **Calibration**

In [None]:
def calibration_metric(y_true, y_prob):
    return brier_score_loss(y_true, y_prob)

calibration = MetricFrame(
    metrics=calibration_metric,
    y_true=y_true,
    y_pred=y_prob,
    sensitive_features=sensitive_feature,
)

In [None]:
calibration.by_group

In [None]:
calibration.overall

* **Balance for positive class**

In [None]:
def balance_for_positive_class(y_true, y_prob):
    return np.mean(y_prob[y_true == 1]) if np.any(y_true == 1) else np.nan

In [None]:
bcp_metric = MetricFrame(
    metrics=balance_for_positive_class,
    y_true=y_true,
    y_pred=y_prob,
    sensitive_features=sensitive_feature
)

bcp_metric.by_group

In [None]:
bcp_metric.overall

* **Balance for negative class**

In [None]:
def balance_for_negative_class(y_true, y_prob):
    return np.mean(y_prob[y_true == 0]) if np.any(y_true == 0) else np.nan

bcn_metric = MetricFrame(
    metrics=balance_for_negative_class,
    y_true=y_true,
    y_pred=y_prob,
    sensitive_features=sensitive_feature
)

bcn_metric.by_group

In [None]:
bcn_metric.overall

* **Predictive equality**

In [None]:
metric_frame = MetricFrame(
    metrics=false_positive_rate,
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_feature
)

metric_frame.by_group

In [None]:
metric_frame.difference()

* **Conditional use accuracy equality**

In [None]:
def negative_predictive_value(y_true, y_pred):
    return (sum((y_pred == 0) & (y_true == 0))) / (sum(y_pred == 0)) if np.any(y_pred == 0) else np.nan

metric_frame_npv = MetricFrame(
    metrics=negative_predictive_value,
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_feature
)

metric_frame_ppv = MetricFrame(
    metrics=ppv,
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_feature
)

print("PPV: ", metric_frame_ppv.by_group)
print("NPV: ", metric_frame_npv.by_group)

* **Equalized opportunities**

In [None]:
metric_frame = MetricFrame(
    metrics=true_positive_rate,
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_feature
)

metric_frame.by_group

## Metric Registry

In [5]:
import numpy as np

In [1]:
METRIC_REGISTRY = {}

def register_metric(name):
    def decorator(func):
        METRIC_REGISTRY[name] = func
        return func
    return decorator

@register_metric("accuracy")
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

In [6]:
class FairnessMetric:
    def __init__(self, metric_names: list[str], y_true: np.ndarray, y_pred: np.ndarray):
        self.metric_names = metric_names
        self.y_true = y_true
        self.y_pred = y_pred

    def evaluate(self):
        for metric_name in self.metric_names:
            metric_function = METRIC_REGISTRY[metric_name]
            metric_value = metric_function(self.y_true, self.y_pred)
            print(f"{metric_name}: {metric_value}")

In [10]:
evaluator = FairnessMetric(metric_names=["accuracy"], y_true=y_true, y_pred=y_pred)

In [11]:
evaluator.evaluate()

accuracy: 0.7
