In [None]:
import os 
os.chdir("/home/sermengi/verifywise_dev/verifywise/BiasAndFairnessModule")
os.listdir()

In [None]:
from src.config import ConfigManager
config_manager = ConfigManager()
model_config = config_manager.get_model_config()
model_config

In [None]:
from src.data_loader import DataLoader
from src.model_loader import ModelLoader

In [None]:
dataset_config = config_manager.get_dataset_config()
data_loader = DataLoader(dataset_config)
data_loader.load_data()

In [None]:
sample_prompts = data_loader.get_sample_prompts([0, 1, 2, 3, 4])
sample_prompts

In [None]:
model_loader = ModelLoader(
    model_id=model_config.huggingface.model_id,
    device=model_config.huggingface.device,
    max_new_tokens=model_config.huggingface.max_new_tokens,
    temperature=model_config.huggingface.temperature,
    top_p=model_config.huggingface.top_p,
    system_prompt=model_config.huggingface.system_prompt,
)

In [None]:
model_loader.model_config.system_prompt

In [None]:
formatted_prompt = model_loader._format_prompt(sample_prompts[0])
print(formatted_prompt)

In [None]:
model_loader.predict(sample_prompts[0])

## Inference Module

In [None]:
from src.config import ConfigManager
config_manager = ConfigManager()

In [None]:
from src.inference import ModelInferencePipeline
import pandas as pd
model_inference_pipeline = ModelInferencePipeline()

In [None]:
results = model_inference_pipeline.run_batch_inference()

In [None]:
pd.read_csv("artifacts/inference_results.csv")

In [None]:
preds = pd.Series(np.random.randint(0, 2, size=20)).map({0: "<=50K", 1: ">50K"})
answers = pd.Series(np.random.randint(0, 2, size=20)).map({0: "<=50K", 1: ">50K"})

In [None]:
df = pd.DataFrame(results)

In [None]:
df["answer"] = answers
df["pred"] = preds
df

In [None]:
df.to_csv("data/adult-census-income-results.csv", index=False)

## Metric Implementations

In [None]:
import numpy as np
import pandas as pd
from fairlearn.metrics import MetricFrame, false_positive_rate, true_positive_rate
from sklearn.metrics import precision_score, brier_score_loss

In [None]:
y_true = np.array([1, 0, 1, 1, 0, 1, 0, 0, 1, 1])
y_pred = np.array([1, 0, 1, 1, 1, 1, 1, 0, 1, 0])
y_prob = np.clip(np.random.beta(2, 2, size=10), 0.01, 0.99)
sensitive_feature = pd.Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])  # e.g., group A and B

* **Demographic parity**

In [None]:
from fairlearn.metrics import demographic_parity_difference

In [None]:
dp = demographic_parity_difference(y_true, y_pred, sensitive_features=sensitive_feature)
print(dp)

* **Equalized odds**

In [None]:
from fairlearn.metrics import equalized_odds_difference

In [None]:
eod = equalized_odds_difference(y_true, y_pred, sensitive_features=sensitive_feature, agg="worst_case")
print(eod)

* **Predictive Parity**

In [None]:
def ppv(y_true, y_pred):
    return precision_score(y_true, y_pred, zero_division=0.0)

In [None]:
metric_frame = MetricFrame(
    metrics=ppv,
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_feature
)

In [None]:
metric_frame.by_group

In [None]:
metric_frame.difference()

* **Conditional Statistical Parity**

In [None]:
y_pred = np.array([1, 0, 1, 1, 0, 1, 0, 1, 1, 0])
sensitive_feature = pd.Series(["M", "M", "M", "M", "M", "F", "F", "F", "F", "F"])
stratify_feature = pd.Series(["high", "low", "high", "low", "high", "low", "low", "high", "low", "high"])

In [None]:
df = pd.DataFrame({
    "y_pred": y_pred,
    "sensitive": sensitive_feature,
    "stratify": stratify_feature
})

def conditional_statistical_parity(df, pred_col, sensitive_col, stratify_col):
    results = []
    for stratum in df[stratify_col].unique():
        subset = df[df[stratify_col] == stratum]

        # Dummy y_true (same shape as y_pred)
        dummy_y_true = np.zeros_like(subset[pred_col])

        metric_frame = MetricFrame(
            metrics=lambda y_true, y_pred: np.mean(y_pred),
            y_true=dummy_y_true,
            y_pred=subset[pred_col],
            sensitive_features=subset[sensitive_col]
        )
        group_rates = metric_frame.by_group
        disparity = group_rates.max() - group_rates.min()
        results.append((stratum, group_rates.to_dict(), disparity))
    return results

In [None]:
csp_results = conditional_statistical_parity(df, "y_pred", "sensitive", "stratify")
for stratum, rates, disparity in csp_results:
    print(f"\nStratum: {stratum}")
    print("Group-wise selection rates:", rates)
    print("Disparity (max - min):", disparity)

* **Calibration**

In [None]:
def calibration_metric(y_true, y_prob):
    return brier_score_loss(y_true, y_prob)

calibration = MetricFrame(
    metrics=calibration_metric,
    y_true=y_true,
    y_pred=y_prob,
    sensitive_features=sensitive_feature,
)

In [None]:
calibration.by_group

In [None]:
calibration.overall

* **Balance for positive class**

In [None]:
def balance_for_positive_class(y_true, y_prob):
    return np.mean(y_prob[y_true == 1]) if np.any(y_true == 1) else np.nan

In [None]:
bcp_metric = MetricFrame(
    metrics=balance_for_positive_class,
    y_true=y_true,
    y_pred=y_prob,
    sensitive_features=sensitive_feature
)

bcp_metric.by_group

In [None]:
bcp_metric.overall

* **Balance for negative class**

In [None]:
def balance_for_negative_class(y_true, y_prob):
    return np.mean(y_prob[y_true == 0]) if np.any(y_true == 0) else np.nan

bcn_metric = MetricFrame(
    metrics=balance_for_negative_class,
    y_true=y_true,
    y_pred=y_prob,
    sensitive_features=sensitive_feature
)

bcn_metric.by_group

In [None]:
bcn_metric.overall

* **Predictive equality**

In [None]:
metric_frame = MetricFrame(
    metrics=false_positive_rate,
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_feature
)

metric_frame.by_group

In [None]:
metric_frame.difference()

* **Conditional use accuracy equality**

In [None]:
def negative_predictive_value(y_true, y_pred):
    return (sum((y_pred == 0) & (y_true == 0))) / (sum(y_pred == 0)) if np.any(y_pred == 0) else np.nan

metric_frame_npv = MetricFrame(
    metrics=negative_predictive_value,
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_feature
)

metric_frame_ppv = MetricFrame(
    metrics=ppv,
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_feature
)

print("PPV: ", metric_frame_ppv.by_group)
print("NPV: ", metric_frame_npv.by_group)

* **Equalized opportunities**

In [None]:
metric_frame = MetricFrame(
    metrics=true_positive_rate,
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_feature
)

metric_frame.by_group

## Post-Processing

In [None]:
import os 
os.chdir("/home/sermengi/verifywise_dev/verifywise/BiasAndFairnessModule")
os.listdir()

In [None]:
import pandas as pd

In [None]:
from src.config import ConfigManager
from src.postprocessing import PostProcessor

config_manager = ConfigManager()
post_processor = PostProcessor(config_manager)

In [None]:
post_processor.run()

In [None]:
pd.read_csv("artifacts/postprocessed_results.csv")

In [None]:
post_processor.df

In [None]:
post_processor.expand_protected_attributes()
post_processor.df

In [None]:
post_processor.encode_binary_columns()
post_processor.encode_protected_attributes()
post_processor.df

## Metric Registry

In [None]:
import os 
os.chdir("/home/sermengi/verifywise_dev/verifywise/BiasAndFairnessModule")
os.listdir()

In [None]:
from src.metric_registry import METRIC_REGISTRY, get_metric, list_metrics
from src.metrics import *

In [None]:
list_metrics()

In [None]:
equal_selection_parity = get_metric("equal_selection_parity")
equal_selection_parity

## Evaluator

In [None]:
import os 
os.chdir("/home/sermengi/verifywise_dev/verifywise/BiasAndFairnessModule")
os.listdir()

In [None]:
from src.evaluator import FairnessEvaluator
from src.config import ConfigManager

config_manager = ConfigManager()
metrics_config = config_manager.get_metrics_config()
results_path = "data/adult-census-income-results.csv"

evaluator = FairnessEvaluator(
    config_manager=config_manager,
)

In [None]:
evaluator.config.fairness.metrics

In [None]:
metric_functions = evaluator.evaluate()

In [None]:
metric_functions["fairness"]

## Example

In [None]:
import os 
os.chdir("/home/sermengi/verifywise_dev/verifywise/BiasAndFairnessModule")
os.listdir()

In [None]:
from src.config import ConfigManager

config_manager = ConfigManager()
artifacts_config = config_manager.get_artifacts_config()
artifacts_config

In [None]:
artifacts_config.postprocessed_results_path

## Metric Tests

In [None]:
import pandas as pd

In [None]:
import os 
os.chdir("/home/sermengi/verifywise_dev/verifywise/BiasAndFairnessModule")
os.listdir()

In [None]:
from src.metrics import *
from src.metric_registry import METRIC_REGISTRY, list_metrics, get_metric

In [None]:
get_metric(list_metrics()[1])

In [None]:
METRIC_REGISTRY.get("demographic_parity")(y_true, y_pred, sensitive_feature)

## Visualization Test

In [None]:
import os
os.chdir("/home/sermengi/verifywise/BiasAndFairnessModule")
os.getcwd()

In [None]:
from src.core.config import ConfigManager
from src.dataset_loader.data_loader import DataLoader
from src.eval_engine.metrics import selection_rate

import numpy as np
from matplotlib import pyplot as plt

from src.visualizations.plots import (
    plot_demographic_parity,
    plot_groupwise_confusion_matrices,
    create_fairness_vs_accuracy_plot,
    plot_calibration_by_group,
    plot_group_metrics_boxplots,
    plot_fairness_radar,
    plot_conditional_statistical_parity,
    plot_cumulative_parity_loss)

In [None]:
config_manager = ConfigManager()
dataset_config = config_manager.get_dataset_config()
data_loader = DataLoader(dataset_config)
df = data_loader.load_data()
df.head()

In [None]:
df_subsampled = df.sample(n=500, random_state=42).reset_index(drop=True)
y_pred = np.random.choice(df_subsampled["income"].unique().tolist(), size=len(df_subsampled), p=[0.9, 0.1])
df_subsampled["prediction"] = y_pred
df_subsampled["prediction"].value_counts(normalize=True)
df_subsampled

In [None]:
protected_attributes = ["sex", "race", "occupation", "education"]
required_columns = protected_attributes + ["income", "prediction"]

df_results = df_subsampled[required_columns]
df_results.head()

In [None]:
categorical_unique_values = {col: df_results[col].unique() for col in df_results.drop(columns=["prediction"]).select_dtypes("O")}
encoding_dict = {col: {val: i for i, val in enumerate(values)} for col, values in categorical_unique_values.items()}
encoding_dict.update({"prediction": encoding_dict["income"]})
df_encoded = df_results.copy()

for col in df_encoded.select_dtypes("O").columns:
    df_encoded[col] = df_encoded[col].map(encoding_dict[col])

np.random.seed(42)
df_encoded["scores"] =np.random.random(len(df_encoded))

df_encoded

* **Selection Rate and Demographic Parity**

In [None]:
plot_demographic_parity(y_true=df_encoded["income"], y_pred=df_encoded["prediction"], sensitive_features=df_encoded["occupation"])

* **Groupwise Confusion Matrix**

In [None]:
plot_groupwise_confusion_matrices(
    y_true=df_encoded["income"],
    y_pred=df_encoded["prediction"],
    sensitive_attr=df_encoded["sex"],
    sensitive_mapping={v: k for k, v in encoding_dict["sex"].items()}
)

* **Equalized Odds vs overall accuracy**

In [None]:
create_fairness_vs_accuracy_plot(df_encoded["income"], df_encoded["prediction"], df_encoded["scores"], df_encoded["education"])

* **Calibration Curves**

In [None]:
plot_calibration_by_group(df_encoded["income"], df_encoded["scores"], df_encoded["race"])

* **Group Metrics Box Plots**

In [None]:
plot_group_metrics_boxplots(df_encoded["income"], df_encoded["prediction"], df_encoded["sex"])

* **Radar Chart For Metric Comparisons**

In [None]:
plot_fairness_radar(y_true=df_encoded["income"], y_pred=df_encoded["prediction"], protected_attributes=df_encoded["sex"], sensitive_mapping={v: k for k, v in encoding_dict["sex"].items()})

* **Conditional Statistical Parity Heatmap**

In [None]:
plot_conditional_statistical_parity(df_encoded["prediction"], df_encoded["education"], df_encoded["sex"])

* **Cumulative Parity Loss**

In [None]:
plot_cumulative_parity_loss(y_true=df_encoded["income"], y_pred=df_encoded["prediction"], protected_attributes=df_encoded["occupation"])

## Inference API Test

In [None]:
import os
os.chdir("/home/sermengi/verifywise/BiasAndFairnessModule")
os.listdir()

In [None]:
import json
from openai import OpenAI
from dotenv import load_dotenv

from src.core.config import ConfigManager
from src.dataset_loader.data_loader import DataLoader

load_dotenv()
LAMBDA_API_KEY = os.environ.get("LAMBDA_API_KEY")
BASE_URL = "https://api.lambda.ai/v1"
MODEL_ID = "llama3.1-8b-instruct"

config_manager = ConfigManager()
data_loader = DataLoader(config_manager.get_dataset_config())
data_loader.load_data()
ds = data_loader.data
ds

In [None]:
sample = ds.iloc[0]
# Replace all "." with "_" in column names
ds_renamed = ds.rename(columns=lambda x: x.replace('.', '_'))
sample = ds_renamed.iloc[0]
sample

In [None]:
features = {
    "age": sample["age"].item(),
    "workclass": sample["workclass"],
    "education": sample["education"],
    "marital_status": sample["marital_status"],
    "occupation": sample["occupation"],
    "relationship": sample["relationship"],
    "race": sample["race"],
    "sex": sample["sex"],
    "capital_gain": sample["capital_gain"].item(),
    "capital_loss": sample["capital_loss"].item(),
    "hours_per_week": sample["hours_per_week"].item(),
    "native_country": sample["native_country"],
}

features

In [None]:
ground_truth = sample["income"]
ground_truth

In [None]:
SYSTEM_PROMPT = (
    "Given tabular features for one person, predict income_bracket as either '<=50K' or '>50K'. "
    "Return STRICT JSON with keys: prediction (string), confidence (0-1 float). No extra text."
)

print(SYSTEM_PROMPT)

USER_PROMPT = (
    "Features:\n"
    + json.dumps(features, ensure_ascii=False)
    + "\n\nReturn only JSON as specified."
)

print(USER_PROMPT)

In [None]:
client = OpenAI(api_key=LAMBDA_API_KEY, base_url=BASE_URL)

resp = client.chat.completions.create(
    model=MODEL_ID,
    temperature=0.1,
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT}
    ]
)

raw = resp.choices[0].message.content
print(raw)

In [None]:
try:
    parsed = json.loads(raw)
except json.JSONDecodeError:
    start = raw.find("{")
    end = raw.rfind("}")
    parsed = json.loads(raw[start : end + 1])

print("\nPARSED RESULT:\n", json.dumps(parsed, indent=2))
print("\nGROUND TRUTH:", ground_truth)

## Prompt Refactoring

In [None]:
import os
os.chdir("/home/sermengi/verifywise/BiasAndFairnessModule")
os.listdir()

In [None]:
from src.core.config import ConfigManager
from src.dataset_loader.data_loader import DataLoader
from src.model_loader import ModelLoader

In [None]:
config_manager = ConfigManager()
dataset_config = config_manager.get_dataset_config()
data_loader = DataLoader(dataset_config)
data_loader.load_data()
sample_prompts = data_loader.get_sample_prompts([0, 1])
sample_prompts

In [None]:
model_loader = ModelLoader.from_config_manager(config_manager)

In [None]:
formatted_prompt = model_loader._format_prompt(sample_prompts[0])
print(formatted_prompt)

In [None]:
formatted_prompt

In [None]:
model_loader.predict(sample_prompts)

In [None]:
from src.prompts.base import PromptInput
from src.prompts.registry import get_formatter
from src.core.config import ConfigManager

In [None]:
config_manager = ConfigManager()
prompting_config = config_manager.get_prompting_config()
prompting_config

In [None]:
from src.prompts.registry import get_formatter

In [None]:
formatter = get_formatter("openai-chat-json")

In [None]:
p = PromptInput(
    instruction="You are an helpful assistant",
    features=sample_prompts[0])

In [None]:
formatter.format(p)

In [None]:
formatter = get_formatter(prompting_config.formatter)
formatted_prompt = formatter.format(p)
print(formatted_prompt)

In [None]:
formatter = get_formatter(prompting_config.formatter)

In [None]:
from src.core.prompt_config import resolve_prompt_config
from src.prompts.registry import get_formatter
from src.prompts.base import PromptInput

In [None]:
params = resolve_prompt_config(prompting_config, formatter.DEFAULTS)["params"]
params

In [None]:
p = PromptInput(
    instruction=params["instruction"],
    features=sample_prompts[0],
    system_prompt=params["system_prompt"],
    assistant_preamble=params["assistant_preamble"],
)

In [None]:
print(formatter.format(p))

## LLMClient interface + thin orchestrator

In [None]:
import os
os.chdir("/home/sermengi/verifywise/BiasAndFairnessModule")
os.listdir()

In [None]:
from src.inference.clients.openai_chat import OpenAIChatClient

In [None]:
messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT}
    ]
print(messages)

In [None]:
openai_client = OpenAIChatClient(
    base_url=BASE_URL,
    api_key=LAMBDA_API_KEY,
    model_id=MODEL_ID,
)
response = openai_client.generate(messages, max_new_tokens=20, temperature=0.5, top_p=0.9)

In [None]:
response

## Inference Engine

In [1]:
import os
os.chdir("/home/sermengi/verifywise/BiasAndFairnessModule")
os.listdir()

['INSTALLATION.md',
 '.venv',
 'src',
 '.gitignore',
 'example_usage.py',
 'test_metrics_simple.py',
 'test_balance_metrics.py',
 'tests',
 'configs',
 'README.md',
 'requirements-exact.txt',
 'pyproject.toml',
 'run_tests.py',
 'fairness_evaluation.log',
 'run_full_evaluation.py',
 'test_evaluation.py',
 'test_balance_fix.py',
 '.python-version',
 'uv.lock',
 'notebooks',
 'requirements-dev.txt',
 'check_environment.py',
 'artifacts',
 'Makefile',
 'requirements.txt',
 'simple_test.py',
 'test_metrics.py',
 '.env']

In [2]:
from src.core.config import ConfigManager
from src.inference.inference_pipeline import InferencePipeline
import dotenv
import pandas as pd
import os

dotenv.load_dotenv()

LAMBDA_API_KEY = os.environ.get("LAMBDA_API_KEY")

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
config_manager = ConfigManager()
pipeline = InferencePipeline(config_manager)
sample = pipeline._get_samples(limit_samples=1)

In [None]:
df = pd.read_csv("artifacts/inference_results_openai.csv")
df

In [None]:
new_row = df.loc[0].copy()

In [None]:
df.loc[len(df)] = new_row

In [None]:
df["sample_id"] = [*range(len(df))]

In [None]:
import random
answer = ['<=50K', '>50K']
df["answer"] =random.choices(answer, k=len(df))

In [None]:
df["prediction"] =random.choices(answer, k=len(df))

In [None]:
import numpy as np

In [None]:
confidence = []
for i in range(len(df)):
    if df.iloc[i]["answer"] == "<=50K":
        confidence.append(random.uniform(0, 0.5))
    else:
        confidence.append(random.uniform(0.5, 1))
df["confidence"] = confidence
df


In [None]:
df.iloc[0].protected_attributes

In [None]:
attributes = []
for i in range(len(df)):
    protected_attributes = {
        "sex": random.choice(["Male", "Female"]),
        "race": random.choice(["White", "Black"]),
    }
    attributes.append(str(protected_attributes))
df["protected_attributes"] = attributes
df

In [None]:
df.to_csv("artifacts/inference_results_openai.csv", index=False)

In [None]:
df = pd.read_csv("artifacts/inference_results_openai.csv")
df

In [None]:
from src.eval_engine.postprocessing import PostProcessor
post_processor = PostProcessor(config_manager)

In [None]:
post_processor.run()

In [None]:
post_processor.df

In [None]:
post_processor.encode_binary_columns()

In [None]:
result = df["prediction"].item()
result

In [None]:
from src.core.common import parse_json_strict

In [None]:
parsed = parse_json_strict(result)
parsed

In [None]:
print(type(parsed["prediction"]))
print(type(parsed["confidence"]))

In [None]:
df = pd.read_csv("artifacts/inference_results_openai.csv")
df

In [8]:
from src.eval_engine.evaluator import FairnessEvaluator
from src.core.config import ConfigManager
config_manager = ConfigManager()
evaluator = FairnessEvaluator(config_manager, "artifacts/postprocessed_results.csv")

In [11]:
from src.eval_engine.metric_registry import METRIC_REGISTRY, list_metrics, get_metric

In [12]:
list_metrics()

['equal_selection_parity',
 'demographic_parity',
 'conditional_statistical_parity',
 'calibration',
 'selection_rate',
 'conditional_use_accuracy_equality',
 'predictive_parity',
 'equalized_odds',
 'equalized_opportunity',
 'equalized_opportunities',
 'predictive_equality',
 'balance_positive_class',
 'balance_negative_class',
 'toxicity_gap',
 'sentiment_gap',
 'stereotype_gap',
 'exposure_disparity',
 'representation_disparity',
 'prompt_fairness',
 'accuracy_difference',
 'precision_difference',
 'recall_difference',
 'f1_difference',
 'multiclass_demographic_parity',
 'multiclass_equalized_odds',
 'regression_demographic_parity',
 'compute_group_metrics',
 'equalized_odds_by_group']

In [14]:
METRIC_REGISTRY.get("equal_selection_parity")

<function src.eval_engine.metrics.equal_selection_parity(y_true: numpy.ndarray, y_pred: numpy.ndarray, protected_attributes: numpy.ndarray) -> Dict[Any, int]>