# Coverage Analysis: ANC4 and SBA by Track Status

**Purpose**: Calculate population-weighted coverage for ANC4 and SBA by track status.

This notebook analyzes maternal health coverage indicators (ANC4 and SBA) by comparing on-track vs off-track countries, using population-weighted averages based on births data.

**Author**: Data Analysis Team  
**Date**: 2025

## 1. Import Libraries and Setup

In [None]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")

print("Libraries imported successfully!")

## 2. Load and Explore Data

In [None]:
def load_data():
    """Load the merged health dataset."""
    data_path = Path("../02_processed_data/merged_health_data.csv")
    
    if not data_path.exists():
        raise FileNotFoundError(f"Data file not found: {data_path}")
    
    df = pd.read_csv(data_path)
    print(f"Loaded dataset with {len(df)} countries")
    return df

# Load data
df = load_data()

# Display basic info
print("\nDataset Info:")
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")

In [None]:
# Display first few rows
print("First 5 rows of the dataset:")
df.head()

In [None]:
# Check data availability
print("Data Availability Summary:")
print("="*40)
print(f"Countries with ANC4 data: {df['ANC4'].notna().sum()} / {len(df)}")
print(f"Countries with SBA data: {df['SBA'].notna().sum()} / {len(df)}")
print(f"Countries with Births data: {df['Births_2022'].notna().sum()} / {len(df)}")

print("\nTrack Status Distribution:")
print(df['Mortality_Status_Binary'].value_counts())

## 3. Define Analysis Functions

In [None]:
def calculate_population_weighted_coverage(df, indicator, track_status):
    """
    Calculate population-weighted coverage for a specific indicator and track status.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        The dataset containing coverage and births data
    indicator : str
        The coverage indicator ('ANC4' or 'SBA')
    track_status : str
        The track status ('on-track' or 'off-track')
    
    Returns:
    --------
    dict : Dictionary containing calculated statistics
    """
    # Filter data for the specific track status and remove missing values
    subset = df[
        (df['Mortality_Status_Binary'] == track_status) & 
        (df[indicator].notna()) & 
        (df['Births_2022'].notna())
    ].copy()
    
    if len(subset) == 0:
        return {
            'track_status': track_status,
            'indicator': indicator,
            'n_countries': 0,
            'total_births': 0,
            'weighted_coverage': np.nan,
            'min_coverage': np.nan,
            'max_coverage': np.nan,
            'median_coverage': np.nan,
            'countries': []
        }
    
    # Calculate population-weighted average
    # Formula: Σ(coverage_i × births_i) / Σ(births_i)
    weights = subset['Births_2022']
    coverage_values = subset[indicator]
    
    # Handle negative births (population decline) by taking absolute values for weighting
    abs_weights = weights.abs()
    
    weighted_coverage = np.average(coverage_values, weights=abs_weights)
    
    # Calculate other statistics
    total_births = weights.sum()
    n_countries = len(subset)
    min_coverage = coverage_values.min()
    max_coverage = coverage_values.max()
    median_coverage = coverage_values.median()
    
    # Get list of countries for reference
    countries = subset['Country'].tolist()
    
    return {
        'track_status': track_status,
        'indicator': indicator,
        'n_countries': n_countries,
        'total_births': total_births,
        'weighted_coverage': weighted_coverage,
        'min_coverage': min_coverage,
        'max_coverage': max_coverage,
        'median_coverage': median_coverage,
        'countries': countries
    }

print("Analysis functions defined successfully!")

## 4. ANC4 Coverage Analysis

In [None]:
def analyze_coverage_by_indicator(df, indicator):
    """
    Analyze coverage for a specific indicator across track statuses.
    """
    print(f"\n{'='*60}")
    print(f"ANALYZING {indicator} COVERAGE")
    print(f"{'='*60}")
    
    results = {}
    
    # Analyze both track statuses
    for status in ['on-track', 'off-track']:
        results[status] = calculate_population_weighted_coverage(df, indicator, status)
        
        result = results[status]
        print(f"\n{status.upper()} COUNTRIES:")
        print(f"  Number of countries: {result['n_countries']}")
        print(f"  Total births represented: {result['total_births']:,.0f}")
        print(f"  Population-weighted coverage: {result['weighted_coverage']:.1f}%")
        print(f"  Coverage range: {result['min_coverage']:.1f}% - {result['max_coverage']:.1f}%")
        print(f"  Median coverage: {result['median_coverage']:.1f}%")
    
    # Calculate coverage gap
    if (results['on-track']['weighted_coverage'] is not np.nan and 
        results['off-track']['weighted_coverage'] is not np.nan):
        
        coverage_gap = results['on-track']['weighted_coverage'] - results['off-track']['weighted_coverage']
        print(f"\nCOVERAGE GAP ANALYSIS:")
        print(f"  On-track vs Off-track gap: {coverage_gap:.1f} percentage points")
        print(f"  Relative difference: {(coverage_gap/results['off-track']['weighted_coverage']*100):.1f}%")
        
        results['coverage_gap'] = coverage_gap
        results['relative_difference'] = coverage_gap/results['off-track']['weighted_coverage']*100
    
    return results

# Analyze ANC4 coverage
anc4_results = analyze_coverage_by_indicator(df, 'ANC4')

## 5. SBA Coverage Analysis

In [None]:
# Analyze SBA coverage
sba_results = analyze_coverage_by_indicator(df, 'SBA')

## 6. Create Summary Tables

In [None]:
def create_summary_table(anc4_results, sba_results):
    """
    Create a summary comparison table.
    """
    summary_data = []
    
    for indicator, results in [('ANC4', anc4_results), ('SBA', sba_results)]:
        for status in ['on-track', 'off-track']:
            if status in results:
                result = results[status]
                summary_data.append({
                    'Indicator': indicator,
                    'Track_Status': status,
                    'N_Countries': result['n_countries'],
                    'Total_Births': result['total_births'],
                    'Weighted_Coverage_Percent': result['weighted_coverage'],
                    'Min_Coverage_Percent': result['min_coverage'],
                    'Max_Coverage_Percent': result['max_coverage'],
                    'Median_Coverage_Percent': result['median_coverage']
                })
    
    # Add gap analysis
    for indicator, results in [('ANC4', anc4_results), ('SBA', sba_results)]:
        if 'coverage_gap' in results:
            summary_data.append({
                'Indicator': indicator,
                'Track_Status': 'gap_analysis',
                'N_Countries': np.nan,
                'Total_Births': np.nan,
                'Weighted_Coverage_Percent': results['coverage_gap'],
                'Min_Coverage_Percent': np.nan,
                'Max_Coverage_Percent': np.nan,
                'Median_Coverage_Percent': np.nan
            })
    
    return pd.DataFrame(summary_data)

# Create and display summary table
summary_table = create_summary_table(anc4_results, sba_results)
print("\nSUMMARY TABLE:")
print("="*80)
summary_table

## 7. Visualizations

In [None]:
# Create visualization of coverage gaps
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# ANC4 Coverage Comparison
anc4_data = {
    'On-track': anc4_results['on-track']['weighted_coverage'],
    'Off-track': anc4_results['off-track']['weighted_coverage']
}
ax1.bar(anc4_data.keys(), anc4_data.values(), color=['#2E8B57', '#DC143C'], alpha=0.7)
ax1.set_title('ANC4 Coverage by Track Status\n(Population-weighted)', fontsize=14, fontweight='bold')
ax1.set_ylabel('Coverage (%)', fontsize=12)
ax1.set_ylim(0, 100)
for i, (key, value) in enumerate(anc4_data.items()):
    ax1.text(i, value + 2, f'{value:.1f}%', ha='center', fontweight='bold')

# SBA Coverage Comparison
sba_data = {
    'On-track': sba_results['on-track']['weighted_coverage'],
    'Off-track': sba_results['off-track']['weighted_coverage']
}
ax2.bar(sba_data.keys(), sba_data.values(), color=['#2E8B57', '#DC143C'], alpha=0.7)
ax2.set_title('SBA Coverage by Track Status\n(Population-weighted)', fontsize=14, fontweight='bold')
ax2.set_ylabel('Coverage (%)', fontsize=12)
ax2.set_ylim(0, 100)
for i, (key, value) in enumerate(sba_data.items()):
    ax2.text(i, value + 2, f'{value:.1f}%', ha='center', fontweight='bold')

plt.tight_layout()
plt.show()

# Coverage gaps comparison
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
gaps = {
    'ANC4': anc4_results['coverage_gap'],
    'SBA': sba_results['coverage_gap']
}
bars = ax.bar(gaps.keys(), gaps.values(), color=['#FF6B6B', '#4ECDC4'], alpha=0.7)
ax.set_title('Coverage Gaps: On-track vs Off-track Countries', fontsize=16, fontweight='bold')
ax.set_ylabel('Coverage Gap (percentage points)', fontsize=12)
ax.set_ylim(0, max(gaps.values()) * 1.2)

for i, (key, value) in enumerate(gaps.items()):
    ax.text(i, value + 1, f'{value:.1f}pp', ha='center', fontweight='bold')

plt.tight_layout()
plt.show()

## 8. Population Impact Analysis

In [None]:
# Population impact visualization
fig, ax = plt.subplots(1, 1, figsize=(12, 8))

# Prepare data for stacked bar chart
indicators = ['ANC4', 'SBA']
on_track_births = [anc4_results['on-track']['total_births'], sba_results['on-track']['total_births']]
off_track_births = [anc4_results['off-track']['total_births'], sba_results['off-track']['total_births']]

x = np.arange(len(indicators))
width = 0.35

bars1 = ax.bar(x - width/2, on_track_births, width, label='On-track Countries', color='#2E8B57', alpha=0.7)
bars2 = ax.bar(x + width/2, off_track_births, width, label='Off-track Countries', color='#DC143C', alpha=0.7)

ax.set_xlabel('Indicator', fontsize=12)
ax.set_ylabel('Total Births (thousands)', fontsize=12)
ax.set_title('Population Represented by Track Status\n(Countries with data for each indicator)', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(indicators)
ax.legend()

# Add value labels on bars
def add_value_labels(bars):
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height + 500,
                f'{height:,.0f}', ha='center', va='bottom', fontweight='bold')

add_value_labels(bars1)
add_value_labels(bars2)

plt.tight_layout()
plt.show()

## 9. Save Results

In [None]:
def save_detailed_results(df, anc4_results, sba_results):
    """
    Save detailed results including country-level data.
    """
    # Create output directory if it doesn't exist
    output_dir = Path("../05_output/reports")
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # Save summary table
    summary_table = create_summary_table(anc4_results, sba_results)
    summary_path = output_dir / "coverage_analysis_summary.csv"
    summary_table.to_csv(summary_path, index=False)
    print(f"Summary results saved to: {summary_path}")
    
    # Save detailed country-level data with analysis flags
    detailed_df = df.copy()
    
    # Add analysis flags
    detailed_df['ANC4_Data_Available'] = detailed_df['ANC4'].notna()
    detailed_df['SBA_Data_Available'] = detailed_df['SBA'].notna()
    detailed_df['Births_Data_Available'] = detailed_df['Births_2022'].notna()
    
    # Add coverage categories
    def categorize_coverage(value):
        if pd.isna(value):
            return 'No Data'
        elif value < 50:
            return 'Low (<50%)'
        elif value < 80:
            return 'Medium (50-79%)'
        else:
            return 'High (≥80%)'
    
    detailed_df['ANC4_Category'] = detailed_df['ANC4'].apply(categorize_coverage)
    detailed_df['SBA_Category'] = detailed_df['SBA'].apply(categorize_coverage)
    
    detailed_path = output_dir / "coverage_analysis_detailed.csv"
    detailed_df.to_csv(detailed_path, index=False)
    print(f"Detailed results saved to: {detailed_path}")
    
    return summary_table, detailed_df

# Save results
summary_table, detailed_df = save_detailed_results(df, anc4_results, sba_results)

## 10. Detailed Interpretation

In [None]:
def print_interpretation(anc4_results, sba_results):
    """
    Print detailed interpretation of results.
    """
    print(f"\n{'='*80}")
    print("COVERAGE ANALYSIS INTERPRETATION")
    print(f"{'='*80}")
    
    print("\nKEY FINDINGS:")
    print("-" * 40)
    
    # ANC4 Interpretation
    if 'coverage_gap' in anc4_results:
        anc4_gap = anc4_results['coverage_gap']
        anc4_on_track = anc4_results['on-track']['weighted_coverage']
        anc4_off_track = anc4_results['off-track']['weighted_coverage']
        
        print(f"1. ANTENATAL CARE (ANC4+):")
        print(f"   • On-track countries achieve {anc4_on_track:.1f}% coverage")
        print(f"   • Off-track countries achieve {anc4_off_track:.1f}% coverage")
        print(f"   • Coverage gap: {anc4_gap:.1f} percentage points")
        
        if anc4_gap > 20:
            print(f"   • INTERPRETATION: Large coverage gap indicates significant disparities")
        elif anc4_gap > 10:
            print(f"   • INTERPRETATION: Moderate coverage gap suggests room for improvement")
        else:
            print(f"   • INTERPRETATION: Relatively small coverage gap")
    
    # SBA Interpretation
    if 'coverage_gap' in sba_results:
        sba_gap = sba_results['coverage_gap']
        sba_on_track = sba_results['on-track']['weighted_coverage']
        sba_off_track = sba_results['off-track']['weighted_coverage']
        
        print(f"\n2. SKILLED BIRTH ATTENDANCE (SBA):")
        print(f"   • On-track countries achieve {sba_on_track:.1f}% coverage")
        print(f"   • Off-track countries achieve {sba_off_track:.1f}% coverage")
        print(f"   • Coverage gap: {sba_gap:.1f} percentage points")
        
        if sba_gap > 20:
            print(f"   • INTERPRETATION: Large coverage gap indicates significant disparities")
        elif sba_gap > 10:
            print(f"   • INTERPRETATION: Moderate coverage gap suggests room for improvement")
        else:
            print(f"   • INTERPRETATION: Relatively small coverage gap")
    
    # Comparative analysis
    if 'coverage_gap' in anc4_results and 'coverage_gap' in sba_results:
        print(f"\n3. COMPARATIVE ANALYSIS:")
        if anc4_results['coverage_gap'] > sba_results['coverage_gap']:
            print(f"   • ANC4 shows larger coverage gaps than SBA")
            print(f"   • This suggests prenatal care access is more challenging than delivery care")
        elif sba_results['coverage_gap'] > anc4_results['coverage_gap']:
            print(f"   • SBA shows larger coverage gaps than ANC4")
            print(f"   • This suggests delivery care access is more challenging than prenatal care")
        else:
            print(f"   • ANC4 and SBA show similar coverage gaps")
    
    # Population impact
    print(f"\n4. POPULATION IMPACT:")
    total_births_on_track = 0
    total_births_off_track = 0
    
    for results in [anc4_results, sba_results]:
        if 'on-track' in results and results['on-track']['total_births'] > 0:
            total_births_on_track = max(total_births_on_track, results['on-track']['total_births'])
        if 'off-track' in results and results['off-track']['total_births'] > 0:
            total_births_off_track = max(total_births_off_track, results['off-track']['total_births'])
    
    print(f"   • On-track countries represent ~{total_births_on_track:,.0f} births annually")
    print(f"   • Off-track countries represent ~{total_births_off_track:,.0f} births annually")
    
    if total_births_off_track > total_births_on_track:
        print(f"   • CRITICAL: More births occur in off-track countries with lower coverage")
        print(f"   • This amplifies the global impact of coverage gaps")

# Print interpretation
print_interpretation(anc4_results, sba_results)

## 11. Country-Level Analysis

In [None]:
# Show countries with lowest coverage in off-track group
print("COUNTRIES WITH LOWEST ANC4 COVERAGE (Off-track):")
print("="*60)
anc4_off_track = df[
    (df['Mortality_Status_Binary'] == 'off-track') & 
    (df['ANC4'].notna())
].nsmallest(10, 'ANC4')[['Country', 'ANC4', 'Births_2022']]
print(anc4_off_track.to_string(index=False))

print("\n\nCOUNTRIES WITH LOWEST SBA COVERAGE (Off-track):")
print("="*60)
sba_off_track = df[
    (df['Mortality_Status_Binary'] == 'off-track') & 
    (df['SBA'].notna())
].nsmallest(10, 'SBA')[['Country', 'SBA', 'Births_2022']]
print(sba_off_track.to_string(index=False))

print("\n\nCOUNTRIES WITH HIGHEST COVERAGE (On-track):")
print("="*60)
high_performers = df[
    (df['Mortality_Status_Binary'] == 'on-track') & 
    (df['ANC4'].notna()) & 
    (df['SBA'].notna())
].nlargest(10, 'ANC4')[['Country', 'ANC4', 'SBA', 'Births_2022']]
print(high_performers.to_string(index=False))

## 12. Summary and Conclusions

In [None]:
print("\n" + "="*80)
print("FINAL SUMMARY AND CONCLUSIONS")
print("="*80)

print("\n📊 KEY STATISTICS:")
print(f"• ANC4 Coverage Gap: {anc4_results['coverage_gap']:.1f} percentage points")
print(f"• SBA Coverage Gap: {sba_results['coverage_gap']:.1f} percentage points")
print(f"• Population in Off-track Countries: {max(anc4_results['off-track']['total_births'], sba_results['off-track']['total_births']):,.0f} births")
print(f"• Population in On-track Countries: {max(anc4_results['on-track']['total_births'], sba_results['on-track']['total_births']):,.0f} births")

print("\n🎯 POLICY IMPLICATIONS:")
print("• Urgent need to improve ANC4 coverage in off-track countries")
print("• SBA coverage gaps, while smaller, still represent millions of births")
print("• Population-weighted analysis reveals the true scale of global disparities")
print("• Targeted interventions needed for countries with lowest coverage")

print("\n📈 NEXT STEPS:")
print("• Identify specific barriers to care in lowest-performing countries")
print("• Develop targeted intervention strategies")
print("• Monitor progress through regular coverage assessments")
print("• Share best practices from high-performing countries")

print("\n✅ ANALYSIS COMPLETED SUCCESSFULLY")
print("Output files available in: 05_output/reports/")