# Bias Detection Tutorial

This notebook demonstrates how to use the AI Safety Toolkit's bias detection module to identify and analyze bias in machine learning models.

## 1. Setup and Imports

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from modules.bias_detection import BiasDetector
from modules.data_loader import DataLoader
from modules.utils import load_sample_model, generate_synthetic_biased_data

## 2. Load and Explore Data

In [None]:
# Load the Adult Income dataset
data_loader = DataLoader()
data = data_loader.load_adult_dataset()

print(f"Dataset shape: {data.shape}")
print(f"\nColumns: {data.columns.tolist()}")
print(f"\nFirst few rows:")
data.head()

In [None]:
# Explore the protected attribute distribution
protected_dist = data['protected_race'].value_counts()
print("Protected attribute distribution:")
print(protected_dist)
print(f"\nMinority group percentage: {protected_dist[1] / len(data) * 100:.1f}%")

## 3. Train a Model

In [None]:
# Prepare data for ML
X_train, X_test, y_train, y_test, protected_train, protected_test = data_loader.prepare_ml_data(
    data, 'income', 'protected_race'
)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Get predictions
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print(f"Model accuracy: {model.score(X_test, y_test):.3f}")

## 4. Bias Detection Analysis

In [None]:
# Initialize bias detector
bias_detector = BiasDetector()

# Calculate key bias metrics
disparate_impact = bias_detector.calculate_disparate_impact(y_pred, protected_test)
statistical_parity = bias_detector.calculate_statistical_parity(y_pred, protected_test)
equalized_odds = bias_detector.calculate_equalized_odds(y_test, y_pred, protected_test)

print("Bias Detection Results:")
print(f"Disparate Impact: {disparate_impact:.3f}")
print(f"Statistical Parity Difference: {statistical_parity:.3f}")
print(f"Equalized Odds Difference: {equalized_odds:.3f}")

# Interpret results
print("\nInterpretation:")
if disparate_impact < 0.8:
    print("⚠️ Disparate impact violation detected!")
else:
    print("✅ Disparate impact within acceptable range")
    
if abs(statistical_parity) > 0.1:
    print("⚠️ Statistical parity violation detected!")
else:
    print("✅ Statistical parity within acceptable range")
    
if equalized_odds > 0.1:
    print("⚠️ Equalized odds violation detected!")
else:
    print("✅ Equalized odds within acceptable range")

## 5. Comprehensive Bias Report

In [None]:
# Generate comprehensive bias report
bias_report = bias_detector.generate_bias_report(y_test, y_pred, protected_test)

print("Comprehensive Bias Report:")
print("=" * 50)

for metric, value in bias_report.items():
    if metric != 'interpretations':
        if isinstance(value, dict):
            print(f"\n{metric.upper()}:")
            for sub_metric, sub_value in value.items():
                if isinstance(sub_value, (int, float)):
                    print(f"  {sub_metric}: {sub_value:.3f}")
                else:
                    print(f"  {sub_metric}: {sub_value}")
        else:
            print(f"{metric}: {value:.3f}")

print("\nINTERPRETATIONS:")
for interpretation in bias_report['interpretations']:
    print(f"• {interpretation}")

## 6. Visualization

In [None]:
# Create bias visualization
bias_fig = bias_detector.plot_bias_metrics(y_pred, protected_test, y_test)
bias_fig.show()

## 7. Synthetic Biased Data Example

In [None]:
# Generate synthetic biased data with different bias levels
bias_levels = [0.0, 0.3, 0.6, 0.9]
results = []

for bias_level in bias_levels:
    # Generate synthetic data
    synthetic_data = generate_synthetic_biased_data(n_samples=1000, bias_strength=bias_level)
    
    # Split data
    X = synthetic_data.drop(['target', 'protected_attr'], axis=1)
    y = synthetic_data['target']
    protected = synthetic_data['protected_attr']
    
    X_train, X_test, y_train, y_test, prot_train, prot_test = train_test_split(
        X, y, protected, test_size=0.3, random_state=42
    )
    
    # Train model
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # Calculate bias metrics
    di = bias_detector.calculate_disparate_impact(y_pred, prot_test)
    sp = bias_detector.calculate_statistical_parity(y_pred, prot_test)
    
    results.append({
        'bias_level': bias_level,
        'disparate_impact': di,
        'statistical_parity': abs(sp)
    })

# Display results
results_df = pd.DataFrame(results)
print("Effect of Bias Strength on Fairness Metrics:")
print(results_df.round(3))

In [None]:
# Plot the relationship between bias strength and fairness metrics
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Disparate Impact
ax1.plot(results_df['bias_level'], results_df['disparate_impact'], 'o-', linewidth=2, markersize=8)
ax1.axhline(y=0.8, color='r', linestyle='--', label='Fairness Threshold (0.8)')
ax1.set_xlabel('Bias Strength')
ax1.set_ylabel('Disparate Impact')
ax1.set_title('Disparate Impact vs Bias Strength')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Statistical Parity
ax2.plot(results_df['bias_level'], results_df['statistical_parity'], 'o-', linewidth=2, markersize=8, color='orange')
ax2.axhline(y=0.1, color='r', linestyle='--', label='Fairness Threshold (0.1)')
ax2.set_xlabel('Bias Strength')
ax2.set_ylabel('Statistical Parity Difference')
ax2.set_title('Statistical Parity vs Bias Strength')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()