In [0]:
import numpy as np
import pandas as pd
from sklearn.metrics import precision_score
from scipy.stats import norm

In [0]:
# Display all columns
pd.set_option('display.max_columns', None)

In [0]:
# create sample data that has three columns y-pred, y-actual and gender
import pandas as pd
import numpy as np

# Generate sample data
n = 100000  # number of rows
np.random.seed(42)
y_true=np.random.randint(0, 2, size=n)
age=np.random.randint(0, 100, n)
# y_pred=np.where(np.random.rand(n) > 0.2, y_true,1 - y_true)

predicted = []
for a, ag in zip(y_true, age):
    if ag < 17:  # Introduce bias: lower accuracy for younger group
        if np.random.rand() > 0.4:  # Only 60% correct for age < 40
            predicted.append(a)
        else:
            predicted.append(1 - a)
    else:  # Ages 40+, model is more accurate
        predicted.append(a)

df = pd.DataFrame({
    'y_pred': predicted,
    'y_true': y_true,
    'age': age
})

# df = pd.DataFrame({
#     'y_true': y_true,
#     'y_pred': y_pred,
#     'age': np.random.randint(0, 100, n)
# })

print(df.head())

In [0]:
import numpy as np
import pandas as pd
from sklearn.metrics import precision_score
from scipy.stats import norm

def bootstrap_ci(data, metric_fn, n_bootstrap=5000, alpha=0.05):
    """Compute metric and its CI using bootstrapping."""
    stats = []
    for _ in range(n_bootstrap):
        sample = data.sample(frac=1, replace=True)
        stats.append(metric_fn(sample))
    ci_low = np.percentile(stats, 100 * (alpha/2))
    ci_high = np.percentile(stats, 100 * (1 - alpha/2))
    return np.mean(stats), (ci_low, ci_high), stats

def cohen_d(a, b):
    """Compute Cohen's d for effect size."""
    m1, m2 = np.mean(a), np.mean(b)
    s1, s2 = np.std(a, ddof=1), np.std(b, ddof=1)
    s = np.sqrt(((len(a)-1)*s1**2 + (len(b)-1)*s2**2) / (len(a)+len(b)-2))
    return (m1 - m2) / s if s > 0 else 0.0

# Example: Assume df has columns 'y_true', 'y_pred', 'age_group'
def precision_metric(group_df):
    return precision_score(group_df['y_true'], group_df['y_pred'])

def fairness_analysis(df, sensitive_col='age_group'):
    results = []
    # Global metric & distribution
    global_mean, global_ci, global_stats = bootstrap_ci(df, precision_metric)
    # For each subgroup
    for group, group_df in df.groupby(sensitive_col):
        sub_mean, sub_ci, sub_stats = bootstrap_ci(group_df, precision_metric)
        bias_flag = False
        effect_size = None
        # Step 1: CI Check
        if not (sub_ci[0] <= global_mean <= sub_ci[1]):
            # Step 2: Effect Size
            effect_size = abs(cohen_d(np.array(global_stats), np.array(sub_stats)))
            if effect_size >= 0.5:  # Medium or Large effect
                bias_flag = True
        results.append({
            sensitive_col: group,
            'subgroup_precision': sub_mean,
            'subgroup_ci': sub_ci,
            'bias_flag': bias_flag,
            'effect_size': effect_size,
            'global_precision': global_mean
        })
    return pd.DataFrame(results)

age_labels = ['0-17', '18-29', '30-44', '45-59', '60-74', '75+']
age_bins = [0, 18, 30, 45, 60, 75, 100]
df['age_group'] = pd.cut(df['age'], bins=age_bins, labels=age_labels, right=False)
result_df = fairness_analysis(df, sensitive_col='age_group')
print(result_df)