In [None]:
# Political Text Classification Bias Analysis
# A comprehensive notebook for analyzing bias in Republican vs Democrat classification models

# ## Setup and Imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
from sklearn.metrics import precision_score, recall_score, balanced_accuracy_score
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")

print("📚 Libraries imported successfully!")
print("🎯 Ready to analyze classification bias in political text data")

# ## Configuration

# File configuration
CSV_FILE = "tfidf_lr_performance_avg.csv"  # Your CSV file path
PARTY_MAP = {'D': 0, 'R': 1}  # Adjust if your mapping is different
OUTPUT_DIR = "lr_bias_analysis"  # Output directory for results

# Analysis parameters
SIGNIFICANCE_LEVEL = 0.05  # Alpha level for statistical tests
FIGURE_SIZE = (20, 15)  # Size for comprehensive plots

print(f"📁 Input file: {CSV_FILE}")
print(f"🏛️ Party mapping: {PARTY_MAP}")
print(f"📊 Output directory: {OUTPUT_DIR}")

# ## Data Loading and Preprocessing

def load_and_preprocess_data(csv_path, party_map):
    """
    Load and preprocess the performance data for bias analysis
    """
    try:
        print(f"🔄 Loading data from {csv_path}...")
        df = pd.read_csv(csv_path)
        print(f"✅ Loaded {len(df)} records")
        print(f"📋 Columns: {list(df.columns)}")
        
        # Ensure we have required confusion matrix columns
        required_cols = ['tn', 'fp', 'fn', 'tp']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            raise ValueError(f"❌ Missing required columns: {missing_cols}")
        
        # Handle potential NaN or zero values
        for col in required_cols:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
        
        # Create reverse party map for labeling
        reverse_party_map = {v: k for k, v in party_map.items()}
        
        # Calculate bias metrics
        eps = 1e-10  # Small epsilon to avoid division by zero
        
        # Precision: TP / (TP + FP) for class 1, TN / (TN + FN) for class 0
        df['precision_class_0'] = df['tn'] / (df['tn'] + df['fn'] + eps)
        df['precision_class_1'] = df['tp'] / (df['tp'] + df['fp'] + eps)
        
        # Recall: TP / (TP + FN) for class 1, TN / (TN + FP) for class 0
        df['recall_class_0'] = df['tn'] / (df['tn'] + df['fp'] + eps)
        df['recall_class_1'] = df['tp'] / (df['tp'] + df['fn'] + eps)
        
        # Bias metrics
        df['precision_difference'] = df['precision_class_1'] - df['precision_class_0']
        df['recall_difference'] = df['recall_class_1'] - df['recall_class_0']
        
        # Balanced accuracy
        df['balanced_accuracy'] = (df['recall_class_0'] + df['recall_class_1']) / 2
        
        # Class distributions
        df['total_predictions'] = df['tp'] + df['fp'] + df['tn'] + df['fn']
        df['predicted_class_0'] = df['tn'] + df['fn']
        df['predicted_class_1'] = df['tp'] + df['fp']
        df['actual_class_0'] = df['tn'] + df['fp']
        df['actual_class_1'] = df['tp'] + df['fn']
        
        # Prediction bias
        df['predicted_rate_class_1'] = df['predicted_class_1'] / (df['total_predictions'] + eps)
        df['actual_rate_class_1'] = df['actual_class_1'] / (df['total_predictions'] + eps)
        df['prediction_bias'] = df['predicted_rate_class_1'] - df['actual_rate_class_1']
        
        # Error rates
        df['false_positive_rate'] = df['fp'] / (df['fp'] + df['tn'] + eps)
        df['false_negative_rate'] = df['fn'] / (df['fn'] + df['tp'] + eps)
        df['fpr_fnr_difference'] = df['false_positive_rate'] - df['false_negative_rate']
        
        print(f"✅ Successfully calculated bias metrics")
        return df, reverse_party_map
        
    except Exception as e:
        print(f"❌ Error loading data: {e}")
        raise

# Load the data
df, reverse_party_map = load_and_preprocess_data(CSV_FILE, PARTY_MAP)

# ## Data Overview

print("📊 DATASET OVERVIEW")
print("=" * 50)
print(f"📈 Total records: {len(df)}")
print(f"🗓️ Congress years: {sorted(df['year'].unique())}")
print(f"🎯 Mean accuracy: {df['accuracy'].mean():.4f} ± {df['accuracy'].std():.4f}")
print(f"🎯 Mean F1 score: {df['f1_score'].mean():.4f} ± {df['f1_score'].std():.4f}")
print(f"🎯 Mean AUC: {df['auc'].mean():.4f} ± {df['auc'].std():.4f}")

# Display basic statistics
print("\n📋 CONFUSION MATRIX STATISTICS")
print("-" * 30)
cm_stats = df[['tn', 'fp', 'fn', 'tp']].describe()
print(cm_stats)

# ## Statistical Bias Analysis

def calculate_bias_statistics(df, significance_level=0.05):
    """
    Calculate comprehensive bias statistics with significance tests
    """
    results = {}
    
    # Mean bias metrics
    results['mean_precision_difference'] = df['precision_difference'].mean()
    results['mean_recall_difference'] = df['recall_difference'].mean()
    results['mean_prediction_bias'] = df['prediction_bias'].mean()
    results['mean_fpr_fnr_difference'] = df['fpr_fnr_difference'].mean()
    
    # Standard deviations
    results['std_precision_difference'] = df['precision_difference'].std()
    results['std_recall_difference'] = df['recall_difference'].std()
    results['std_prediction_bias'] = df['prediction_bias'].std()
    
    # Statistical significance tests (one-sample t-tests against 0)
    _, results['precision_bias_p_value'] = stats.ttest_1samp(df['precision_difference'], 0)
    _, results['recall_bias_p_value'] = stats.ttest_1samp(df['recall_difference'], 0)
    _, results['prediction_bias_p_value'] = stats.ttest_1samp(df['prediction_bias'], 0)
    _, results['fpr_fnr_bias_p_value'] = stats.ttest_1samp(df['fpr_fnr_difference'], 0)
    
    # Temporal consistency
    year_bias = df.groupby('year').agg({
        'precision_difference': 'mean',
        'recall_difference': 'mean',
        'prediction_bias': 'mean'
    })
    
    results['temporal_consistency'] = {
        'precision_std_across_years': year_bias['precision_difference'].std(),
        'recall_std_across_years': year_bias['recall_difference'].std(),
        'prediction_std_across_years': year_bias['prediction_bias'].std()
    }
    
    # Significance flags
    results['significant_precision_bias'] = results['precision_bias_p_value'] < significance_level
    results['significant_recall_bias'] = results['recall_bias_p_value'] < significance_level
    results['significant_prediction_bias'] = results['prediction_bias_p_value'] < significance_level
    
    return results

# Calculate bias statistics
bias_stats = calculate_bias_statistics(df, SIGNIFICANCE_LEVEL)

print("🔍 BIAS ANALYSIS RESULTS")
print("=" * 50)
print(f"📊 Precision Bias ({reverse_party_map[1]} - {reverse_party_map[0]}): {bias_stats['mean_precision_difference']:.4f} ± {bias_stats['std_precision_difference']:.4f}")
print(f"📊 Recall Bias ({reverse_party_map[1]} - {reverse_party_map[0]}): {bias_stats['mean_recall_difference']:.4f} ± {bias_stats['std_recall_difference']:.4f}")
print(f"📊 Prediction Bias: {bias_stats['mean_prediction_bias']:.4f} ± {bias_stats['std_prediction_bias']:.4f}")

print("\n🧪 STATISTICAL SIGNIFICANCE (p-values)")
print("-" * 40)
print(f"Precision Bias: {bias_stats['precision_bias_p_value']:.6f} {'✅ Significant' if bias_stats['significant_precision_bias'] else '❌ Not significant'}")
print(f"Recall Bias: {bias_stats['recall_bias_p_value']:.6f} {'✅ Significant' if bias_stats['significant_recall_bias'] else '❌ Not significant'}")
print(f"Prediction Bias: {bias_stats['prediction_bias_p_value']:.6f} {'✅ Significant' if bias_stats['significant_prediction_bias'] else '❌ Not significant'}")

# ## Comprehensive Visualization

# Create output directory
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
Path(f"{OUTPUT_DIR}/plots").mkdir(parents=True, exist_ok=True)

def create_comprehensive_bias_plots(df, reverse_party_map, output_dir):
    """
    Create comprehensive bias analysis visualizations
    """
    fig = plt.figure(figsize=FIGURE_SIZE)
    
    # 1. Per-Class Performance Over Time
    ax1 = plt.subplot(3, 3, 1)
    years = sorted(df['year'].unique())
    year_metrics = df.groupby('year').agg({
        'precision_class_0': 'mean',
        'precision_class_1': 'mean',
        'recall_class_0': 'mean',
        'recall_class_1': 'mean'
    })
    
    ax1.plot(years, year_metrics['precision_class_0'], 'b-o', label=f'{reverse_party_map[0]} Precision', alpha=0.8, linewidth=2)
    ax1.plot(years, year_metrics['precision_class_1'], 'r-o', label=f'{reverse_party_map[1]} Precision', alpha=0.8, linewidth=2)
    ax1.plot(years, year_metrics['recall_class_0'], 'b--s', label=f'{reverse_party_map[0]} Recall', alpha=0.8, linewidth=2)
    ax1.plot(years, year_metrics['recall_class_1'], 'r--s', label=f'{reverse_party_map[1]} Recall', alpha=0.8, linewidth=2)
    ax1.set_title('Per-Class Performance Over Time', fontsize=14, fontweight='bold')
    ax1.set_xlabel('Congress Year')
    ax1.set_ylabel('Score')
    ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax1.grid(True, alpha=0.3)
    
    # 2. Precision Bias Distribution
    ax2 = plt.subplot(3, 3, 2)
    ax2.hist(df['precision_difference'], bins=20, alpha=0.7, color='skyblue', edgecolor='black')
    ax2.axvline(0, color='red', linestyle='--', linewidth=2, label='No Bias')
    ax2.axvline(df['precision_difference'].mean(), color='orange', linewidth=3, label='Mean Bias')
    ax2.set_title(f'Precision Bias Distribution\n(Mean: {df["precision_difference"].mean():.4f})', fontsize=14, fontweight='bold')
    ax2.set_xlabel(f'Precision Difference ({reverse_party_map[1]} - {reverse_party_map[0]})')
    ax2.set_ylabel('Frequency')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # 3. Recall Bias Distribution
    ax3 = plt.subplot(3, 3, 3)
    ax3.hist(df['recall_difference'], bins=20, alpha=0.7, color='lightcoral', edgecolor='black')
    ax3.axvline(0, color='red', linestyle='--', linewidth=2, label='No Bias')
    ax3.axvline(df['recall_difference'].mean(), color='orange', linewidth=3, label='Mean Bias')
    ax3.set_title(f'Recall Bias Distribution\n(Mean: {df["recall_difference"].mean():.4f})', fontsize=14, fontweight='bold')
    ax3.set_xlabel(f'Recall Difference ({reverse_party_map[1]} - {reverse_party_map[0]})')
    ax3.set_ylabel('Frequency')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # 4. Prediction Bias Over Time
    ax4 = plt.subplot(3, 3, 4)
    year_pred_bias = df.groupby('year')['prediction_bias'].mean()
    ax4.plot(years, year_pred_bias, 'g-o', linewidth=3, markersize=8, alpha=0.8)
    ax4.axhline(0, color='red', linestyle='--', linewidth=2, label='No Bias')
    ax4.fill_between(years, year_pred_bias, 0, alpha=0.3, color='green')
    ax4.set_title('Prediction Bias Over Time', fontsize=14, fontweight='bold')
    ax4.set_xlabel('Congress Year')
    ax4.set_ylabel(f'Prediction Bias (toward {reverse_party_map[1]})')
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    
    # 5. Average Confusion Matrix Heatmap
    ax5 = plt.subplot(3, 3, 5)
    avg_cm = np.array([[df['tn'].mean(), df['fp'].mean()],
                      [df['fn'].mean(), df['tp'].mean()]])
    sns.heatmap(avg_cm, annot=True, fmt='.1f', 
               xticklabels=[f'Pred {reverse_party_map[0]}', f'Pred {reverse_party_map[1]}'],
               yticklabels=[f'True {reverse_party_map[0]}', f'True {reverse_party_map[1]}'],
               cmap='Blues', ax=ax5, cbar_kws={'label': 'Average Count'})
    ax5.set_title('Average Confusion Matrix', fontsize=14, fontweight='bold')
    
    # 6. Balanced vs Regular Accuracy
    ax6 = plt.subplot(3, 3, 6)
    ax6.scatter(df['accuracy'], df['balanced_accuracy'], alpha=0.6, s=50, c='purple')
    min_acc = min(df['accuracy'].min(), df['balanced_accuracy'].min())
    max_acc = max(df['accuracy'].max(), df['balanced_accuracy'].max())
    ax6.plot([min_acc, max_acc], [min_acc, max_acc], 'r--', alpha=0.8, linewidth=2, label='Perfect Agreement')
    ax6.set_xlabel('Regular Accuracy')
    ax6.set_ylabel('Balanced Accuracy')
    ax6.set_title('Balanced vs Regular Accuracy', fontsize=14, fontweight='bold')
    ax6.legend()
    ax6.grid(True, alpha=0.3)
    
    # 7. Bias vs Performance Correlation
    ax7 = plt.subplot(3, 3, 7)
    ax7.scatter(df['accuracy'], df['precision_difference'], alpha=0.6, s=50, c='purple')
    ax7.set_xlabel('Overall Accuracy')
    ax7.set_ylabel(f'Precision Bias ({reverse_party_map[1]} - {reverse_party_map[0]})')
    ax7.set_title('Accuracy vs Precision Bias', fontsize=14, fontweight='bold')
    ax7.axhline(0, color='red', linestyle='--', alpha=0.8)
    ax7.grid(True, alpha=0.3)
    
    # 8. Class Distribution in Data
    ax8 = plt.subplot(3, 3, 8)
    class_0_ratio = df['actual_class_0'] / df['total_predictions']
    class_1_ratio = df['actual_class_1'] / df['total_predictions']
    
    ax8.hist([class_0_ratio, class_1_ratio], bins=15, alpha=0.7, 
            label=[f'{reverse_party_map[0]} in Data', f'{reverse_party_map[1]} in Data'],
            color=['blue', 'red'])
    ax8.set_title('Class Distribution in Test Data', fontsize=14, fontweight='bold')
    ax8.set_xlabel('Proportion')
    ax8.set_ylabel('Frequency')
    ax8.legend()
    ax8.grid(True, alpha=0.3)
    
    # 9. Error Rates by Class Over Time
    ax9 = plt.subplot(3, 3, 9)
    year_fpr = df.groupby('year')['false_positive_rate'].mean()
    year_fnr = df.groupby('year')['false_negative_rate'].mean()
    
    ax9.plot(years, year_fpr, 'b-o', label=f'{reverse_party_map[0]} Error Rate (FPR)', alpha=0.8, linewidth=2)
    ax9.plot(years, year_fnr, 'r-o', label=f'{reverse_party_map[1]} Error Rate (FNR)', alpha=0.8, linewidth=2)
    ax9.set_title('Error Rates by Class Over Time', fontsize=14, fontweight='bold')
    ax9.set_xlabel('Congress Year')
    ax9.set_ylabel('Error Rate')
    ax9.legend()
    ax9.grid(True, alpha=0.3)
    
    plt.tight_layout()
    
    # Save the plot
    plot_path = f"{output_dir}/plots/comprehensive_bias_analysis.png"
    plt.savefig(plot_path, dpi=300, bbox_inches='tight', facecolor='white')
    print(f"💾 Comprehensive plot saved to: {plot_path}")
    
    plt.show()
    
    return fig

# Create the comprehensive visualization
print("🎨 Creating comprehensive bias visualization...")
fig = create_comprehensive_bias_plots(df, reverse_party_map, OUTPUT_DIR)

# ## Detailed Bias Report

def generate_bias_report(df, bias_stats, reverse_party_map, output_path):
    """
    Generate a detailed text report of the bias analysis
    """
    report = []
    report.append("=" * 70)
    report.append("POLITICAL TEXT CLASSIFICATION BIAS ANALYSIS REPORT")
    report.append("=" * 70)
    report.append("")
    
    # Dataset overview
    report.append("DATASET OVERVIEW:")
    report.append("-" * 20)
    report.append(f"Total records analyzed: {len(df)}")
    report.append(f"Congress years covered: {sorted(df['year'].unique())}")
    report.append(f"Mean accuracy: {df['accuracy'].mean():.4f} ± {df['accuracy'].std():.4f}")
    report.append(f"Mean F1 score: {df['f1_score'].mean():.4f} ± {df['f1_score'].std():.4f}")
    report.append(f"Mean AUC: {df['auc'].mean():.4f} ± {df['auc'].std():.4f}")
    report.append("")
    
    # Bias metrics
    report.append("BIAS METRICS:")
    report.append("-" * 15)
    report.append(f"Precision Bias ({reverse_party_map[1]} - {reverse_party_map[0]}): {bias_stats['mean_precision_difference']:.4f} ± {bias_stats['std_precision_difference']:.4f}")
    report.append(f"Recall Bias ({reverse_party_map[1]} - {reverse_party_map[0]}): {bias_stats['mean_recall_difference']:.4f} ± {bias_stats['std_recall_difference']:.4f}")
    report.append(f"Prediction Bias: {bias_stats['mean_prediction_bias']:.4f} ± {bias_stats['std_prediction_bias']:.4f}")
    report.append("")
    
    # Statistical significance
    report.append("STATISTICAL SIGNIFICANCE (p-values):")
    report.append("-" * 40)
    report.append(f"Precision Bias: {bias_stats['precision_bias_p_value']:.6f}")
    report.append(f"Recall Bias: {bias_stats['recall_bias_p_value']:.6f}")
    report.append(f"Prediction Bias: {bias_stats['prediction_bias_p_value']:.6f}")
    report.append("")
    
    # Interpretation
    report.append("INTERPRETATION:")
    report.append("-" * 15)
    
    alpha = SIGNIFICANCE_LEVEL
    if bias_stats['significant_precision_bias']:
        direction = reverse_party_map[1] if bias_stats['mean_precision_difference'] > 0 else reverse_party_map[0]
        magnitude = "favoring" if bias_stats['mean_precision_difference'] > 0 else "disfavoring"
        report.append(f"• SIGNIFICANT precision bias detected {magnitude} {direction}")
    else:
        report.append("• No statistically significant precision bias detected")
        
    if bias_stats['significant_recall_bias']:
        direction = reverse_party_map[1] if bias_stats['mean_recall_difference'] > 0 else reverse_party_map[0]
        quality = "better" if bias_stats['mean_recall_difference'] > 0 else "worse"
        report.append(f"• SIGNIFICANT recall bias: model is {quality} at identifying {direction}")
    else:
        report.append("• No statistically significant recall bias detected")
        
    if bias_stats['significant_prediction_bias']:
        direction = reverse_party_map[1] if bias_stats['mean_prediction_bias'] > 0 else reverse_party_map[0]
        tendency = "over-predicts" if bias_stats['mean_prediction_bias'] > 0 else "under-predicts"
        report.append(f"• SIGNIFICANT prediction bias: model {tendency} {direction}")
    else:
        report.append("• No statistically significant prediction bias detected")
    
    report.append("")
    
    # Temporal consistency
    report.append("TEMPORAL CONSISTENCY:")
    report.append("-" * 20)
    temporal = bias_stats['temporal_consistency']
    report.append(f"Precision bias std across years: {temporal['precision_std_across_years']:.4f}")
    report.append(f"Recall bias std across years: {temporal['recall_std_across_years']:.4f}")
    report.append(f"Prediction bias std across years: {temporal['prediction_std_across_years']:.4f}")
    report.append("")
    
    # Recommendations
    report.append("RECOMMENDATIONS:")
    report.append("-" * 15)
    
    if any([bias_stats['significant_precision_bias'], 
            bias_stats['significant_recall_bias'],
            bias_stats['significant_prediction_bias']]):
        report.append("• BIAS DETECTED - Consider implementing mitigation strategies:")
        report.append("  - Use balanced class weights during training")
        report.append("  - Optimize classification thresholds separately for each class")
        report.append("  - Apply post-processing calibration techniques")
        report.append("  - Consider adversarial debiasing methods")
        report.append("  - Increase training data for underrepresented scenarios")
    else:
        report.append("• Model appears relatively unbiased")
        report.append("• Continue monitoring bias metrics in future evaluations")
        report.append("• Consider testing on additional validation sets")
    
    report.append("")
    report.append("=" * 70)
    
    report_text = "\n".join(report)
    
    # Save to file
    with open(output_path, 'w') as f:
        f.write(report_text)
    
    print(f"📄 Detailed report saved to: {output_path}")
    
    return report_text

# Generate the detailed report
report_path = f"{OUTPUT_DIR}/bias_analysis_report.txt"
report_text = generate_bias_report(df, bias_stats, reverse_party_map, report_path)

# Display the report in the notebook
print("📋 DETAILED BIAS ANALYSIS REPORT")
print("=" * 50)
print(report_text)

# ## Summary and Next Steps

print("\n" + "🎯" * 25)
print("ANALYSIS COMPLETE!")
print("🎯" * 25)

print(f"\n📁 All results saved to: {OUTPUT_DIR}/")
print(f"   📊 Plots: {OUTPUT_DIR}/plots/")
print(f"   📄 Report: {OUTPUT_DIR}/bias_analysis_report.txt")

print("\n🔍 KEY FINDINGS:")
if bias_stats['significant_precision_bias']:
    direction = "Republicans" if bias_stats['mean_precision_difference'] > 0 else "Democrats"
    print(f"   ⚠️  Significant precision bias favoring {direction}")
else:
    print("   ✅ No significant precision bias")

if bias_stats['significant_recall_bias']:
    direction = "Republicans" if bias_stats['mean_recall_difference'] > 0 else "Democrats"
    print(f"   ⚠️  Significant recall bias favoring {direction}")
else:
    print("   ✅ No significant recall bias")

if bias_stats['significant_prediction_bias']:
    direction = "Republicans" if bias_stats['mean_prediction_bias'] > 0 else "Democrats"
    print(f"   ⚠️  Significant prediction bias toward {direction}")
else:
    print("   ✅ No significant prediction bias")

print("\n💡 NEXT STEPS:")
if any([bias_stats['significant_precision_bias'], 
        bias_stats['significant_recall_bias'],
        bias_stats['significant_prediction_bias']]):
    print("   🔧 Implement bias mitigation techniques")
    print("   📊 Re-evaluate with balanced class weights")
    print("   🎯 Optimize classification thresholds")
else:
    print("   ✅ Model appears unbiased - continue monitoring")
    print("   📈 Consider testing on additional datasets")

print("\n🚀 Happy analyzing!")

# ## Optional: Interactive Exploration

# Uncomment the following section if you want to explore the data interactively

"""
# Interactive data exploration
print("\\n🔍 INTERACTIVE EXPLORATION")
print("=" * 30)
print("The following variables are available for further analysis:")
print("- df: Main dataframe with all bias metrics")
print("- bias_stats: Dictionary with calculated bias statistics")
print("- reverse_party_map: Party label mapping")

# Example: Examine specific years with high bias
high_bias_years = df[abs(df['precision_difference']) > 0.1]['year'].unique()
if len(high_bias_years) > 0:
    print(f"\\n📅 Years with high precision bias (>0.1): {sorted(high_bias_years)}")
    
# Example: Correlation analysis
correlation = df['accuracy'].corr(df['precision_difference'])
print(f"\\n📈 Correlation between accuracy and precision bias: {correlation:.4f}")
"""