In [None]:
# Keep Binder session alive during workshop
import time
from IPython.display import Javascript, display

def keep_alive():
    display(Javascript('setInterval(function(){ console.log("keeping alive"); }, 300000);'))
    
keep_alive()

In [None]:
# AIF360 Bias Detection & Mitigation Workshop
# For Directors and Managers: Understanding Ethical AI Toolboxes

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# AIF360 imports
from aif360.datasets import StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms.preprocessing import Reweighing

In [None]:
# Try to import advanced features (may not be available in all environments)
try:
    from aif360.explainers import MetricTextExplainer
    EXPLAINER_AVAILABLE = True
except ImportError:
    print(" MetricTextExplainer not available in this environment")
    EXPLAINER_AVAILABLE = False

try:
    import aif360.sklearn.metrics as aif_sklearn_metrics
    SKLEARN_METRICS_AVAILABLE = True
except ImportError:
    SKLEARN_METRICS_AVAILABLE = False

print(" Welcome to the AIF360 Example!")
print("=" * 60)

In [None]:
# =============================================================================
# SECTION 1: BUSINESS CONTEXT - WHY THIS MATTERS TO LEADERSHIP
# =============================================================================

print("\n BUSINESS CONTEXT FOR LEADERSHIP")
print("=" * 40)
print("""
KEY QUESTIONS FOR DIRECTORS/MANAGERS:
• What are the financial/legal risks of biased AI systems?
• How do we measure and communicate bias to stakeholders?
• What tools can our data science teams use to mitigate bias?
• How do we balance fairness with business performance?

This notebook demonstrates AIF360's capabilities using a hiring scenario.
""")

In [None]:
# =============================================================================
# SECTION 2: DATA LOADING AND PREPARATION
# =============================================================================

print("\n DATA LOADING AND PREPARATION")
print("=" * 35)

# Define column names for Adult dataset
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num',
               'marital-status', 'occupation', 'relationship', 'race', 'sex',
               'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income']

# Load the data files from your repo
print(" Loading Adult dataset from local files...")
df_train = pd.read_csv('data/adult/adult.data', names=column_names, na_values=' ?', skipinitialspace=True)
df_test = pd.read_csv('data/adult/adult.test', names=column_names, na_values=' ?', skipinitialspace=True, skiprows=1)

# Combine datasets
df_raw = pd.concat([df_train, df_test], ignore_index=True)
print(f" Loaded {len(df_raw):,} records")

# Clean the data
print(" Cleaning and preparing data...")

# Remove missing values
df_clean = df_raw.dropna()
print(f"After removing missing values: {len(df_clean):,} records")

# Clean income column (remove periods from test set)
df_clean['income'] = df_clean['income'].str.replace('.', '', regex=False)

# Create binary target variable (0: <=50K, 1: >50K)
df_clean['target'] = (df_clean['income'] == '>50K').astype(int)

# Create binary sex variable (0: Female, 1: Male)
df_clean['sex_binary'] = (df_clean['sex'] == 'Male').astype(int)

# Encode categorical variables for AIF360 compatibility
categorical_cols = ['workclass', 'education', 'marital-status', 'occupation', 
                   'relationship', 'race', 'native-country']

le = LabelEncoder()
df_processed = df_clean.copy()

for col in categorical_cols:
    df_processed[col] = le.fit_transform(df_processed[col].astype(str))

# Create the final cleaned dataset that will be used throughout
df_final = df_processed[['age', 'workclass', 'fnlwgt', 'education', 'education-num',
                        'marital-status', 'occupation', 'relationship', 'race', 
                        'sex_binary', 'capital-gain', 'capital-loss', 'hours-per-week', 
                        'native-country', 'target']].copy()

print(" Data preparation complete!")
print(f"Final dataset: {df_final.shape[0]} rows × {df_final.shape[1]} columns")
print("\n Demographics in final dataset:")
print(f"Female (0): {(df_final['sex_binary'] == 0).sum():,} ({(df_final['sex_binary'] == 0).mean()*100:.1f}%)")
print(f"Male (1): {(df_final['sex_binary'] == 1).sum():,} ({(df_final['sex_binary'] == 1).mean()*100:.1f}%)")
print(f"High income (>50K): {df_final['target'].sum():,} ({df_final['target'].mean()*100:.1f}%)")


In [None]:
# =============================================================================
# SECTION 3: CREATE AIF360 DATASET AND INITIAL BIAS ANALYSIS
# =============================================================================

print("\n SECTION 3: INITIAL BIAS MEASUREMENT")
print("=" * 40)

# Create AIF360 StandardDataset
dataset = StandardDataset(
    df=df_final,
    label_name='target',
    favorable_classes=[1],
    protected_attribute_names=['sex_binary'],
    privileged_classes=[[1]],  # Male = 1
    categorical_features=['workclass', 'education', 'marital-status', 'occupation', 
                         'relationship', 'race', 'native-country']
)

print(" Created AIF360 dataset structure")

# Split the data
train_dataset, test_dataset = dataset.split([0.7], shuffle=True, seed=123)

# Define privileged and unprivileged groups
privileged_groups = [{'sex_binary': 1}]    # Male
unprivileged_groups = [{'sex_binary': 0}]  # Female

# Calculate bias metrics on training data
metric_orig_train = BinaryLabelDatasetMetric(
    train_dataset,
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)

print(" BIAS METRICS IN ORIGINAL TRAINING DATA:")
disparate_impact = metric_orig_train.disparate_impact()
statistical_parity_diff = metric_orig_train.statistical_parity_difference()

print(f"Disparate Impact: {disparate_impact:.3f}")
print(f"Statistical Parity Difference: {statistical_parity_diff:.3f}")

print("\n WHAT THESE NUMBERS MEAN FOR LEADERSHIP:")
print("• Disparate Impact < 0.8 = Potential legal risk (80% rule)")
print("• Statistical Parity Difference > 0.1 = Significant bias concern")

# Risk assessment
risk_level = " HIGH RISK" if disparate_impact < 0.8 else " MEDIUM RISK" if disparate_impact < 0.9 else " LOW RISK"
print(f"• Current Risk Level: {risk_level}")

# Visualize the bias
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Get training data as DataFrame for visualization
train_df = train_dataset.convert_to_dataframe()[0]

# Outcome rates by gender
outcome_by_gender = train_df.groupby('sex_binary')['target'].mean()
ax1.bar(['Female', 'Male'], outcome_by_gender, color=['#ff7f7f', '#7f7fff'])
ax1.set_title('High-Income Rate by Gender\n(Training Data)')
ax1.set_ylabel('Rate of High-Income (>50K)')
ax1.set_ylim(0, 0.5)
for i, v in enumerate(outcome_by_gender):
    ax1.text(i, v + 0.01, f'{v:.1%}', ha='center', fontweight='bold')

# Risk dashboard
ax2.text(0.5, 0.7, f'Disparate Impact: {disparate_impact:.3f}', 
         ha='center', va='center', fontsize=14, fontweight='bold')
ax2.text(0.5, 0.5, risk_level, ha='center', va='center', fontsize=16, fontweight='bold')
ax2.text(0.5, 0.3, 'Legal threshold: 0.8', ha='center', va='center', fontsize=12)
ax2.set_xlim(0, 1)
ax2.set_ylim(0, 1)
ax2.set_title('Legal Risk Assessment')
ax2.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# =============================================================================
# SECTION 4: AIF360 BIAS MITIGATION - REWEIGHING
# =============================================================================

print("\n SECTION 4: AIF360 BIAS MITIGATION")
print("=" * 37)

print("AIF360 offers multiple bias mitigation techniques:")
print("• PRE-processing: Fix bias in training data (Reweighing)")
print("• IN-processing: Build fairness into model training")  
print("• POST-processing: Adjust model outputs for fairness")
print("\nDemonstrating PRE-processing with Reweighing...")

# Apply Reweighing algorithm
np.random.seed(123)
reweighing = Reweighing(
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)

# Transform training data
train_dataset_reweighed = reweighing.fit_transform(train_dataset)
print("Applied Reweighing to training data")

# Check bias metrics after reweighing
metric_reweighed = BinaryLabelDatasetMetric(
    train_dataset_reweighed,
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)

print("\n BIAS METRICS AFTER REWEIGHING:")
disparate_impact_new = metric_reweighed.disparate_impact()
statistical_parity_diff_new = metric_reweighed.statistical_parity_difference()

print(f"Original Disparate Impact: {disparate_impact:.3f}")
print(f"After Reweighing: {disparate_impact_new:.3f}")
improvement = ((disparate_impact_new - disparate_impact) / abs(disparate_impact) * 100)
print(f"Improvement: {improvement:+.1f}%")

print(f"\nOriginal Statistical Parity Diff: {statistical_parity_diff:.3f}")
print(f"After Reweighing: {statistical_parity_diff_new:.3f}")

In [None]:
# =============================================================================
# SECTION 5: MODEL TRAINING AND BUSINESS IMPACT ANALYSIS
# =============================================================================

print("\n SECTION 5: BUSINESS IMPACT ANALYSIS")
print("=" * 35)

print("Training models to compare business performance...")

# Prepare data for sklearn
scaler = StandardScaler()

# Original model (without bias mitigation)
X_train_orig = scaler.fit_transform(train_dataset.features)
y_train_orig = train_dataset.labels.ravel()

model_orig = LogisticRegression(solver='lbfgs', max_iter=1000, random_state=123)
model_orig.fit(X_train_orig, y_train_orig)

# Bias-mitigated model (with reweighing)
X_train_reweighed = scaler.fit_transform(train_dataset_reweighed.features)
y_train_reweighed = train_dataset_reweighed.labels.ravel()
sample_weights = train_dataset_reweighed.instance_weights

model_reweighed = LogisticRegression(solver='lbfgs', max_iter=1000, random_state=123)
model_reweighed.fit(X_train_reweighed, y_train_reweighed, sample_weight=sample_weights)

# Test both models
X_test = scaler.transform(test_dataset.features)
y_test = test_dataset.labels.ravel()

y_pred_orig = model_orig.predict(X_test)
y_pred_reweighed = model_reweighed.predict(X_test)

# Calculate performance metrics
accuracy_orig = accuracy_score(y_test, y_pred_orig)
accuracy_reweighed = accuracy_score(y_test, y_pred_reweighed)

print(" EXECUTIVE DASHBOARD: MODEL COMPARISON")
print("=" * 43)

print(" BUSINESS PERFORMANCE:")
print(f"Original Model Accuracy: {accuracy_orig:.1%}")
print(f"Reweighed Model Accuracy: {accuracy_reweighed:.1%}")
print(f"Performance Trade-off: {accuracy_reweighed - accuracy_orig:+.1%}")

# Calculate fairness metrics for predictions
test_pred_orig = test_dataset.copy()
test_pred_orig.labels = y_pred_orig
metric_test_orig = BinaryLabelDatasetMetric(
    test_pred_orig, 
    unprivileged_groups=unprivileged_groups, 
    privileged_groups=privileged_groups
)

test_pred_reweighed = test_dataset.copy() 
test_pred_reweighed.labels = y_pred_reweighed
metric_test_reweighed = BinaryLabelDatasetMetric(
    test_pred_reweighed,
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)

print("\n FAIRNESS METRICS ON TEST PREDICTIONS:")
di_orig = metric_test_orig.disparate_impact()
di_reweighed = metric_test_reweighed.disparate_impact()

print(f"Original Model Disparate Impact: {di_orig:.3f}")
print(f"Reweighed Model Disparate Impact: {di_reweighed:.3f}")

# Risk levels
orig_risk = " HIGH" if di_orig < 0.8 else " MEDIUM" if di_orig < 0.9 else " LOW"
reweighed_risk = " HIGH" if di_reweighed < 0.8 else " MEDIUM" if di_reweighed < 0.9 else " LOW"

print(f"\n LEGAL RISK ASSESSMENT:")
print(f"Original Model: {orig_risk}")  
print(f"Reweighed Model: {reweighed_risk}")

In [None]:
# =============================================================================
# SECTION 5B: AIF360'S COMPREHENSIVE FAIRNESS METRICS
# =============================================================================

print("\n SECTION 5B: AIF360'S MULTI-METRIC FAIRNESS ANALYSIS")
print("=" * 54)
print("AIF360's key advantage: Multiple fairness definitions in one tool...")

# Create ClassificationMetric for comprehensive analysis (AIF360-specific)
cm_orig = ClassificationMetric(
    test_dataset, test_pred_orig,
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)

cm_reweighed = ClassificationMetric(
    test_dataset, test_pred_reweighed, 
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)

# AIF360's comprehensive metrics (this is what makes it unique)
print("\n AIF360'S FAIRNESS METRICS COMPARISON:")
print("=" * 45)

fairness_metrics = [
    ("Disparate Impact", cm_orig.disparate_impact(), cm_reweighed.disparate_impact(), ">0.8 (legal threshold)"),
    ("Statistical Parity Difference", cm_orig.statistical_parity_difference(), cm_reweighed.statistical_parity_difference(), "~0.0 (equal rates)"),
    ("Equal Opportunity Difference", cm_orig.equal_opportunity_difference(), cm_reweighed.equal_opportunity_difference(), "~0.0 (equal TPR)"),
    ("Average Odds Difference", cm_orig.average_odds_difference(), cm_reweighed.average_odds_difference(), "~0.0 (balanced errors)"),
    ("Theil Index", cm_orig.theil_index(), cm_reweighed.theil_index(), "Lower is better"),
]

print(f"{'Metric':<30} {'Original':<10} {'Reweighed':<10} {'Target':<20}")
print("-" * 75)
for name, orig_val, reweighed_val, target in fairness_metrics:
    print(f"{name:<30} {orig_val:<10.3f} {reweighed_val:<10.3f} {target:<20}")

print(f"\nAccuracy                       {accuracy_orig:<10.3f} {accuracy_reweighed:<10.3f} {'Higher is better':<20}")

print("\n WHY THESE AIF360 METRICS MATTER TO LEADERSHIP:")
print("• Disparate Impact: Legal compliance metric (EEOC 80% rule)")
print("• Statistical Parity: Equal selection rates across groups") 
print("• Equal Opportunity: Fair treatment of qualified candidates")
print("• Average Odds: Balanced across all prediction types")
print("• Theil Index: Overall systemic inequality measure")

# AIF360's group-specific metrics (another unique feature)
print("\n AIF360'S GROUP-SPECIFIC ANALYSIS:")
print("=" * 40)

print("ORIGINAL MODEL - Group Performance:")
print(f"Female True Positive Rate: {cm_orig.true_positive_rate(privileged=False):.3f}")
print(f"Male True Positive Rate: {cm_orig.true_positive_rate(privileged=True):.3f}")
print(f"Female False Positive Rate: {cm_orig.false_positive_rate(privileged=False):.3f}")
print(f"Male False Positive Rate: {cm_orig.false_positive_rate(privileged=True):.3f}")

print("\nREWEIGHED MODEL - Group Performance:")
print(f"Female True Positive Rate: {cm_reweighed.true_positive_rate(privileged=False):.3f}")
print(f"Male True Positive Rate: {cm_reweighed.true_positive_rate(privileged=True):.3f}")
print(f"Female False Positive Rate: {cm_reweighed.false_positive_rate(privileged=False):.3f}")
print(f"Male False Positive Rate: {cm_reweighed.false_positive_rate(privileged=True):.3f}")

# Simple AIF360-focused visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# AIF360 Disparate Impact visualization
models = ['Original', 'Reweighed']
di_values = [cm_orig.disparate_impact(), cm_reweighed.disparate_impact()]
colors = ['red' if x < 0.8 else 'orange' if x < 0.9 else 'green' for x in di_values]

bars = ax1.bar(models, di_values, color=colors, alpha=0.7)
ax1.axhline(y=0.8, color='red', linestyle='--', alpha=0.7, label='Legal Threshold (0.8)')
ax1.set_ylabel('Disparate Impact')
ax1.set_title('AIF360: Disparate Impact Analysis')
ax1.legend()
ax1.grid(True, alpha=0.3)

for bar, val in zip(bars, di_values):
    ax1.text(bar.get_x() + bar.get_width()/2, val + 0.02, f'{val:.3f}', 
             ha='center', va='bottom', fontweight='bold')

# AIF360 Multi-metric summary
metrics_names = ['Disp Impact', 'Stat Parity', 'Equal Opp', 'Avg Odds']
orig_abs_values = [cm_orig.disparate_impact(), 
                   abs(cm_orig.statistical_parity_difference()),
                   abs(cm_orig.equal_opportunity_difference()), 
                   abs(cm_orig.average_odds_difference())]
reweighed_abs_values = [cm_reweighed.disparate_impact(),
                       abs(cm_reweighed.statistical_parity_difference()),
                       abs(cm_reweighed.equal_opportunity_difference()),
                       abs(cm_reweighed.average_odds_difference())]

x = np.arange(len(metrics_names))
width = 0.35

ax2.bar(x - width/2, orig_abs_values, width, label='Original', alpha=0.7, color='#ff7f7f')
ax2.bar(x + width/2, reweighed_abs_values, width, label='Reweighed', alpha=0.7, color='#7f7fff')
ax2.set_xlabel('AIF360 Fairness Metrics')
ax2.set_ylabel('Metric Values')
ax2.set_title('AIF360: Multi-Metric Fairness Comparison')
ax2.set_xticks(x)
ax2.set_xticklabels(metrics_names, fontsize=9)
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\n ADDITIONAL AIF360 BIAS MITIGATION ALGORITHMS:")
print("(Available in your data science toolkit)")
print("• PRE-processing: Reweighing, Disparate Impact Remover, Learning Fair Representations")
print("• IN-processing: Adversarial Debiasing, Fair Adversarial Networks, Prejudice Remover")  
print("• POST-processing: Equalized Odds, Calibrated Equalized Odds, Reject Option Classification")

# AIF360's Metric Text Explainer
print("\n AIF360'S AUTOMATED FAIRNESS EXPLANATION:")
print("=" * 48)

if EXPLAINER_AVAILABLE:
    try:
        # Correct usage based on the example you provided
        explainer = MetricTextExplainer(cm_orig)
        explainer_reweighed = MetricTextExplainer(cm_reweighed)
        
        print(" Original Model Assessment:")
        print(f"Disparate Impact: {cm_orig.disparate_impact():.3f}")
        print(explainer.disparate_impact())
        
        print(f"\nStatistical Parity Difference: {cm_orig.statistical_parity_difference():.3f}")
        if hasattr(explainer, 'statistical_parity_difference'):
            print(explainer.statistical_parity_difference())
        
        print(f"\nEqual Opportunity Difference: {cm_orig.equal_opportunity_difference():.3f}")
        if hasattr(explainer, 'equal_opportunity_difference'):
            print(explainer.equal_opportunity_difference())
        
        print("\n Reweighed Model Assessment:")
        print(f"Disparate Impact: {cm_reweighed.disparate_impact():.3f}")
        print(explainer_reweighed.disparate_impact())
        
        print(f"\nStatistical Parity Difference: {cm_reweighed.statistical_parity_difference():.3f}")
        if hasattr(explainer_reweighed, 'statistical_parity_difference'):
            print(explainer_reweighed.statistical_parity_difference())
            
        print(f"\nEqual Opportunity Difference: {cm_reweighed.equal_opportunity_difference():.3f}")
        if hasattr(explainer_reweighed, 'equal_opportunity_difference'):
            print(explainer_reweighed.equal_opportunity_difference())
        
    except Exception as e:
        print(f" Explainer error: {e}")
        print(" Note: MetricTextExplainer may have version-specific requirements")
        print(" Fairness Assessment Summary (Enhanced Manual Analysis):")
        
        # Enhanced manual analysis that mimics what the explainer would do
        print("\n DETAILED BIAS ANALYSIS:")
        
        # Original model assessment
        di_orig_text = "significant bias" if cm_orig.disparate_impact() < 0.8 else "moderate bias" if cm_orig.disparate_impact() < 0.9 else "acceptable fairness"
        print(f"\n Original Model Analysis:")
        print(f"• Disparate Impact: {cm_orig.disparate_impact():.3f} - Shows {di_orig_text}")
        print(f"• Statistical Parity Difference: {cm_orig.statistical_parity_difference():.3f}")
        print(f"• Equal Opportunity Difference: {cm_orig.equal_opportunity_difference():.3f}")
        
        # Risk interpretation
        if cm_orig.disparate_impact() < 0.8:
            print("•  HIGH LEGAL RISK: Model falls below 80% rule threshold")
        elif cm_orig.disparate_impact() < 0.9:
            print("•  MODERATE RISK: Model should be improved for better compliance")
        else:
            print("• ACCEPTABLE: Model meets basic fairness thresholds")
        
        # Reweighed model assessment
        di_reweighed_text = "significant bias" if cm_reweighed.disparate_impact() < 0.8 else "moderate bias" if cm_reweighed.disparate_impact() < 0.9 else "good fairness"
        print(f"\n Reweighed Model Analysis:")
        print(f"• Disparate Impact: {cm_reweighed.disparate_impact():.3f} - Demonstrates {di_reweighed_text}")
        print(f"• Statistical Parity Difference: {cm_reweighed.statistical_parity_difference():.3f}")
        print(f"• Equal Opportunity Difference: {cm_reweighed.equal_opportunity_difference():.3f}")
        
        # Improvement assessment
        improvement_di = cm_reweighed.disparate_impact() - cm_orig.disparate_impact()
        print(f"• Disparate Impact Improvement: {improvement_di:+.3f}")
        
        # Final recommendation with realistic assessment
        print(f"\n BIAS MITIGATION ASSESSMENT:")
        if improvement_di > 0:
            print(" POSITIVE: Bias reduction achieved")
            if cm_reweighed.disparate_impact() >= 0.8:
                print(" SUCCESS: Now meets 80% rule threshold")
                print(" RECOMMENDATION: Deploy reweighed model")
            else:
                print(" PARTIAL: Still below 80% threshold but improved")
                print(" RECOMMENDATION: Deploy with additional monitoring, consider combining with other techniques")
        else:
            print(" CONCERNING: Limited or no bias reduction")
            print(" RECOMMENDATION: Try different mitigation algorithms (e.g., post-processing)")
            
else:
    print(" MetricTextExplainer not available in this environment")
    print(" Comprehensive Fairness Assessment:")
    
    # Detailed manual analysis
    print(f"\n Original Model Analysis:")
    di_orig_text = "significant bias" if cm_orig.disparate_impact() < 0.8 else "moderate bias" if cm_orig.disparate_impact() < 0.9 else "acceptable fairness"
    print(f"• Shows {di_orig_text} against female candidates")
    print(f"• Disparate Impact: {cm_orig.disparate_impact():.3f}")
    
    print(f"\n Reweighed Model Analysis:")
    di_reweighed_text = "significant bias" if cm_reweighed.disparate_impact() < 0.8 else "moderate bias" if cm_reweighed.disparate_impact() < 0.9 else "good fairness"
    print(f"• Demonstrates {di_reweighed_text}")  
    print(f"• Disparate Impact: {cm_reweighed.disparate_impact():.3f}")
    
    print(f"\n Recommendation: {'Deploy reweighed model' if cm_reweighed.disparate_impact() > cm_orig.disparate_impact() else 'Consider additional bias mitigation'}")

# Add executive summary of results
print(f"\n EXECUTIVE SUMMARY OF BIAS MITIGATION:")
improvement_di = cm_reweighed.disparate_impact() - cm_orig.disparate_impact()
improvement_percent = (improvement_di / cm_orig.disparate_impact()) * 100

print(f" BIAS REDUCTION RESULTS:")
print(f"• Disparate Impact: {cm_orig.disparate_impact():.3f} → {cm_reweighed.disparate_impact():.3f}")
print(f"• Absolute Improvement: {improvement_di:+.3f}")
print(f"• Relative Improvement: {improvement_percent:+.1f}%")

if cm_reweighed.disparate_impact() >= 0.8:
    print(" SUCCESS: Model now meets legal compliance threshold (0.8)")
    recommendation = "DEPLOY with confidence"
elif improvement_di > 0.1:
    print(" GOOD PROGRESS: Significant improvement but still needs work")
    recommendation = "DEPLOY with additional bias monitoring"
elif improvement_di > 0.05:
    print(" MODEST PROGRESS: Some improvement achieved")
    recommendation = "DEPLOY with caution, plan additional mitigation"
else:
    print(" LIMITED SUCCESS: Minimal bias reduction")
    recommendation = "DO NOT DEPLOY - try different techniques"

print(f" BUSINESS RECOMMENDATION: {recommendation}")

print("\n ADVANCED AIF360 CAPABILITIES DEMONSTRATED:")
print("• Multi-metric fairness evaluation (5+ fairness definitions)")
print("• Automated risk assessment and categorization")
print("• Group-wise performance analysis")
if EXPLAINER_AVAILABLE:
    print("• Automated explanations of fairness results")
else:
    print("• Manual fairness assessment framework")
print("• Executive-friendly visualization dashboards")
print("• Comprehensive compliance reporting")

In [None]:
# =============================================================================
# SECTION 6: EXECUTIVE SUMMARY & RECOMMENDATIONS
# =============================================================================

print("\n" + "="*60)
print(" EXECUTIVE SUMMARY & RECOMMENDATIONS")
print("="*60)

print("\n WHAT AIF360 PROVIDES YOUR ORGANIZATION:")
print("• Standardized bias measurement across different fairness definitions")
print("• Pre-built algorithms to reduce bias in ML models")
print("• Clear metrics that translate to legal/compliance requirements")
print("• Integration with popular ML frameworks (sklearn, etc.)")

print("\n KEY DECISION POINTS FOR LEADERSHIP:")
print(f"1. Performance vs. Fairness Trade-off: {accuracy_reweighed - accuracy_orig:+.1%} accuracy change")
print(f"2. Legal Risk Mitigation: {orig_risk} → {reweighed_risk}")
print("3. Implementation Effort: Medium (requires data science team training)")
print("4. Ongoing Monitoring: Automated bias metrics in production")

print("\n RECOMMENDED NEXT STEPS:")
print("1. Establish bias monitoring KPIs for your ML models")
print("2. Train data science teams on AIF360 implementation")
print("3. Integrate bias testing into ML model validation process")  
print("4. Create executive dashboard for ongoing fairness monitoring")

print("\n" + "="*60)
print("="*60)

In [None]:
print("\n DISCUSSION QUESTIONS:")
print("• What level of performance trade-off is acceptable for reduced bias?")
print("• How should we communicate fairness metrics to business stakeholders?")
print("• What governance processes need to change to include bias testing?")
print("• How do we balance different fairness definitions for different use cases?")
