# Addis Ababa: Temporal Patterns — Aethalometer BC, FTIR EC & HIPS Fabs

This notebook applies the same temporal pattern analyses as notebook 02 but compares
**three independent BC/EC measurement methods** side by side:

| Method | Parameter | Units | Technique |
|--------|-----------|-------|-----------|
| **Aethalometer** | IR BCc (880 nm) | µg/m³ | Real-time optical absorption |
| **FTIR** | EC | µg/m³ | Thermal–optical filter analysis |
| **HIPS** | Fabs / MAC | µg/m³ | Hybrid integrating plate/sphere absorption |

## Tasks Covered:
1. **Monthly patterns** — monthly variation for each method
2. **Weekly patterns** — weekday vs weekend for each method
3. **Ethiopian seasonal patterns** — seasonal distributions side by side
4. **Time series comparison** — all three methods on one timeline
5. **Inter-method agreement** — scatter plots, correlations, ratios

---

## Setup and Imports

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import calendar

# Add scripts folder to path
notebook_dir = os.path.dirname(os.path.abspath('__file__'))
scripts_path = os.path.join(notebook_dir, 'scripts')
if scripts_path not in sys.path:
    sys.path.insert(0, scripts_path)

# Import from existing modular scripts
from config import SITES, MAC_VALUE
from data_matching import (
    load_aethalometer_data,
    load_filter_data,
    add_base_filter_id,
    match_all_parameters,
    pivot_filter_by_id,
)
from outliers import apply_exclusion_flags, get_clean_data

# Configure matplotlib
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 11

# Create output directories
def setup_directories():
    dirs = {
        'plots': 'output/plots/addis_ababa/filter_comparison',
        'data': 'output/data/addis_ababa'
    }
    for dir_path in dirs.values():
        os.makedirs(dir_path, exist_ok=True)
    return dirs

dirs = setup_directories()
print("Setup complete!")
print(f"MAC value: {MAC_VALUE} m²/g")

## Site Configuration

In [None]:
# Configuration
ADDIS_CONFIG = SITES['Addis_Ababa']
SITE_CODE = ADDIS_CONFIG['code']  # 'ETAD'

SEASONS = {
    'Dry Season': [10, 11, 12, 1, 2],
    'Belg Rainy Season': [3, 4, 5],
    'Kiremt Rainy Season': [6, 7, 8, 9]
}
SEASONS_ORDER = ['Dry Season', 'Belg Rainy Season', 'Kiremt Rainy Season']
SEASON_COLORS = {'Dry Season': '#E67E22', 'Belg Rainy Season': '#27AE60', 'Kiremt Rainy Season': '#3498DB'}

# The three BC/EC measurement columns after matching
METHODS = {
    'ir_bcc':    {'label': 'Aeth IR BCc',       'color': '#1f77b4', 'unit': 'µg/m³'},
    'ftir_ec':   {'label': 'FTIR EC',           'color': '#d62728', 'unit': 'µg/m³'},
    'hips_fabs': {'label': 'HIPS Fabs/MAC',     'color': '#2ca02c', 'unit': 'µg/m³'},
}

print(f"Site: {ADDIS_CONFIG['location']} ({SITE_CODE})")
print(f"Methods: {', '.join(m['label'] for m in METHODS.values())}")

## Data Loading

Load aethalometer data and filter data (FTIR EC, HIPS Fabs), then match by date.

In [None]:
# Load aethalometer data
aethalometer_data = load_aethalometer_data()
df_aeth = aethalometer_data['Addis_Ababa']
print(f"Aethalometer: {len(df_aeth):,} daily records")

# Load filter data
filter_data = load_filter_data()
filter_data = add_base_filter_id(filter_data)
print(f"Filter data: {len(filter_data)} total measurements")

# Match all parameters for Addis Ababa (aeth + FTIR EC + HIPS Fabs + iron)
df = match_all_parameters('Addis_Ababa', SITE_CODE, df_aeth, filter_data)

# Add temporal columns
df['Month'] = df['date'].dt.month
df['DayOfWeek'] = df['date'].dt.dayofweek
df['DayOfYear'] = df['date'].dt.dayofyear
df['Ethiopian_Season'] = df['Month'].map(lambda m: 
    'Dry Season' if m in SEASONS['Dry Season'] else
    'Belg Rainy Season' if m in SEASONS['Belg Rainy Season'] else
    'Kiremt Rainy Season'
)

# Report what we have
print(f"\nMatched dataset: {len(df)} samples")
print(f"Date range: {df['date'].min().date()} to {df['date'].max().date()}")
print(f"\nAvailability per method:")
for col, info in METHODS.items():
    n = df[col].notna().sum() if col in df.columns else 0
    print(f"  {info['label']}: {n} samples")

# Identify samples where all three methods are available
method_cols = [c for c in METHODS if c in df.columns]
df['all_three'] = df[method_cols].notna().all(axis=1)
n_all = df['all_three'].sum()
print(f"\nSamples with all three methods: {n_all}")
print(f"\nSeason breakdown:")
print(df['Ethiopian_Season'].value_counts().to_string())

---

# Task 1: Monthly Patterns — All Three Methods

**Goal**: Compare monthly BC/EC concentration patterns across aethalometer, FTIR, and HIPS.

In [None]:
def plot_monthly_all_methods(df, methods=METHODS):
    """
    Plot average monthly pattern for all three measurement methods.
    """
    fig, ax = plt.subplots(figsize=(12, 6))
    
    for col, info in methods.items():
        if col not in df.columns:
            continue
        monthly = df.groupby('Month')[col]
        monthly_avg = monthly.mean()
        monthly_std = monthly.std()
        monthly_n = monthly.count()
        
        # Only plot months with data
        valid_months = monthly_n[monthly_n > 0].index
        avg = monthly_avg[valid_months]
        std = monthly_std[valid_months]
        
        ax.plot(avg.index, avg.values, marker='o', color=info['color'],
                linewidth=2, label=info['label'])
        ax.fill_between(avg.index, avg - std, avg + std,
                       alpha=0.15, color=info['color'])
    
    ax.set_xlabel('Month', fontsize=12)
    ax.set_ylabel('Concentration (µg/m³)', fontsize=12)
    ax.set_title('Monthly BC/EC Patterns: Aethalometer vs FTIR vs HIPS', fontsize=14, fontweight='bold')
    ax.set_xticks(range(1, 13))
    ax.set_xticklabels([calendar.month_abbr[m] for m in range(1, 13)], rotation=45)
    ax.legend(loc='upper right')
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    return fig


def plot_monthly_panel(df, methods=METHODS):
    """
    Panel plot: one subplot per method showing monthly bars colored by season.
    """
    cols = [c for c in methods if c in df.columns and df[c].notna().any()]
    n = len(cols)
    fig, axes = plt.subplots(1, n, figsize=(6*n, 5), sharey=True)
    if n == 1:
        axes = [axes]
    
    for idx, col in enumerate(cols):
        ax = axes[idx]
        info = methods[col]
        
        monthly_avg = df.groupby('Month')[col].mean()
        monthly_std = df.groupby('Month')[col].std()
        monthly_n = df.groupby('Month')[col].count()
        
        valid = monthly_n[monthly_n > 0].index
        
        # Color bars by season
        bar_colors = []
        for m in valid:
            for season, months in SEASONS.items():
                if m in months:
                    bar_colors.append(SEASON_COLORS[season])
                    break
        
        ax.bar(valid, monthly_avg[valid], color=bar_colors, alpha=0.7, edgecolor='black')
        ax.errorbar(valid, monthly_avg[valid], yerr=monthly_std[valid],
                   fmt='none', color='black', capsize=3)
        
        ax.set_xlabel('Month', fontsize=11)
        if idx == 0:
            ax.set_ylabel('Concentration (µg/m³)', fontsize=11)
        ax.set_title(info['label'], fontsize=12, fontweight='bold', color=info['color'])
        ax.set_xticks(valid)
        ax.set_xticklabels([calendar.month_abbr[m] for m in valid], rotation=45)
        ax.grid(True, alpha=0.3, axis='y')
        
        # Add sample count
        total_n = df[col].notna().sum()
        ax.text(0.95, 0.95, f'n={total_n}', transform=ax.transAxes,
                fontsize=9, va='top', ha='right',
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
    
    plt.suptitle('Monthly BC/EC by Method', fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 1: MONTHLY PATTERNS — ALL THREE METHODS")
print("="*80)

# Monthly statistics table
print("\nMonthly means (µg/m³):")
for col, info in METHODS.items():
    if col in df.columns:
        monthly = df.groupby('Month')[col].agg(['mean', 'count'])
        months_with_data = monthly[monthly['count'] > 0]
        print(f"\n  {info['label']}:")
        for m, row in months_with_data.iterrows():
            print(f"    {calendar.month_abbr[m]}: {row['mean']:.3f} (n={int(row['count'])})")

fig1 = plot_monthly_all_methods(df)
plt.savefig(os.path.join(dirs['plots'], 'monthly_all_methods.png'), dpi=150, bbox_inches='tight')
plt.show()

fig2 = plot_monthly_panel(df)
plt.savefig(os.path.join(dirs['plots'], 'monthly_panel.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 2: Weekly Patterns — Weekday vs Weekend

**Goal**: Compare weekday/weekend differences across all three methods.

In [None]:
def analyze_weekend_effect_all(df, methods=METHODS):
    """
    Weekend effect for each measurement method.
    """
    print("\nWeekend Effect Analysis — All Methods:")
    print("=" * 80)
    
    results = {}
    for col, info in methods.items():
        if col not in df.columns or df[col].notna().sum() < 5:
            continue
        
        weekday = df[df['DayOfWeek'] < 5][col].dropna()
        weekend = df[df['DayOfWeek'] >= 5][col].dropna()
        
        if len(weekday) < 2 or len(weekend) < 2:
            continue
        
        wd_avg = weekday.mean()
        we_avg = weekend.mean()
        reduction = ((wd_avg - we_avg) / wd_avg) * 100
        t_stat, p_value = stats.ttest_ind(weekday, weekend)
        
        results[col] = {
            'weekday_avg': wd_avg, 'weekend_avg': we_avg,
            'reduction_pct': reduction, 'p_value': p_value,
            'n_weekday': len(weekday), 'n_weekend': len(weekend)
        }
        
        sig = '*' if p_value < 0.05 else ''
        print(f"\n  {info['label']}:")
        print(f"    Weekday: {wd_avg:.3f} µg/m³ (n={len(weekday)})")
        print(f"    Weekend: {we_avg:.3f} µg/m³ (n={len(weekend)})")
        print(f"    Reduction: {reduction:.1f}%{sig} (p={p_value:.3e})")
    
    return results


def plot_weekly_all_methods(df, methods=METHODS):
    """
    Day-of-week boxplots: one row per method.
    """
    day_names = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    cols = [c for c in methods if c in df.columns and df[c].notna().sum() >= 5]
    n = len(cols)
    
    fig, axes = plt.subplots(1, n, figsize=(6*n, 5), sharey=True)
    if n == 1:
        axes = [axes]
    
    for idx, col in enumerate(cols):
        ax = axes[idx]
        info = methods[col]
        
        plot_data = []
        for dow in range(7):
            day_data = df[df['DayOfWeek'] == dow][col].dropna()
            plot_data.append(day_data)
        
        bp = ax.boxplot(plot_data, labels=day_names, patch_artist=True, showfliers=False)
        
        for i, patch in enumerate(bp['boxes']):
            patch.set_facecolor('#AED6F1' if i < 5 else '#F5B7B1')
            patch.set_alpha(0.8)
        
        ax.set_title(info['label'], fontsize=12, fontweight='bold', color=info['color'])
        ax.set_xlabel('Day of Week')
        if idx == 0:
            ax.set_ylabel('Concentration (µg/m³)')
        ax.grid(True, alpha=0.3, axis='y')
        
        n_samples = df[col].notna().sum()
        ax.text(0.95, 0.95, f'n={n_samples}', transform=ax.transAxes,
                fontsize=9, va='top', ha='right',
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
    
    plt.suptitle('Day-of-Week Patterns by Method', fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 2: WEEKLY PATTERNS")
print("="*80)
weekend_results = analyze_weekend_effect_all(df)
fig = plot_weekly_all_methods(df)
plt.savefig(os.path.join(dirs['plots'], 'weekly_all_methods.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 3: Ethiopian Seasonal Patterns — All Three Methods

**Goal**: Compare seasonal BC/EC distributions across all three methods.

In [None]:
def analyze_seasonal_all(df, methods=METHODS):
    """
    Seasonal statistics for all three methods.
    """
    print("\nSeasonal Statistics — All Methods:")
    print("=" * 80)
    
    results = {}
    for season in SEASONS_ORDER:
        sdata = df[df['Ethiopian_Season'] == season]
        results[season] = {}
        print(f"\n{season}:")
        
        for col, info in methods.items():
            if col not in df.columns:
                continue
            vals = sdata[col].dropna()
            if len(vals) == 0:
                continue
            
            results[season][col] = {
                'n': len(vals), 'mean': vals.mean(), 'median': vals.median(),
                'std': vals.std(), 'q25': vals.quantile(0.25), 'q75': vals.quantile(0.75)
            }
            r = results[season][col]
            print(f"  {info['label']:20s}: mean={r['mean']:.3f} ± {r['std']:.3f}, "
                  f"median={r['median']:.3f} [{r['q25']:.3f}–{r['q75']:.3f}], n={r['n']}")
    
    return results


def plot_seasonal_boxplot_all(df, methods=METHODS):
    """
    Side-by-side seasonal boxplots for all methods.
    """
    cols = [c for c in methods if c in df.columns and df[c].notna().sum() >= 3]
    n_methods = len(cols)
    
    fig, axes = plt.subplots(1, n_methods, figsize=(6*n_methods, 6), sharey=True)
    if n_methods == 1:
        axes = [axes]
    
    for idx, col in enumerate(cols):
        ax = axes[idx]
        info = methods[col]
        
        plot_data = []
        for season in SEASONS_ORDER:
            sdata = df[df['Ethiopian_Season'] == season][col].dropna()
            plot_data.append(sdata)
        
        bp = ax.boxplot(plot_data, labels=[s.replace(' Season', '') for s in SEASONS_ORDER],
                       patch_artist=True, showfliers=False)
        
        for patch, season in zip(bp['boxes'], SEASONS_ORDER):
            patch.set_facecolor(SEASON_COLORS[season])
            patch.set_alpha(0.7)
        
        ax.set_title(info['label'], fontsize=12, fontweight='bold', color=info['color'])
        ax.set_xlabel('Ethiopian Season')
        if idx == 0:
            ax.set_ylabel('Concentration (µg/m³)')
        ax.grid(True, alpha=0.3, axis='y')
        ax.tick_params(axis='x', rotation=15)
        
        # Add counts
        for i, season in enumerate(SEASONS_ORDER):
            n = len(df[df['Ethiopian_Season'] == season][col].dropna())
            ax.text(i + 1, ax.get_ylim()[1] * 0.95, f'n={n}', ha='center', fontsize=8)
    
    plt.suptitle('Seasonal BC/EC Distributions by Method', fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig


def plot_seasonal_grouped_bar(df, methods=METHODS):
    """
    Grouped bar chart: seasons on x-axis, one bar per method.
    """
    cols = [c for c in methods if c in df.columns and df[c].notna().sum() >= 3]
    
    fig, ax = plt.subplots(figsize=(12, 6))
    
    x = np.arange(len(SEASONS_ORDER))
    width = 0.25
    offsets = np.linspace(-width, width, len(cols))
    
    for i, col in enumerate(cols):
        info = methods[col]
        means = []
        stds = []
        for season in SEASONS_ORDER:
            vals = df[df['Ethiopian_Season'] == season][col].dropna()
            means.append(vals.mean() if len(vals) > 0 else 0)
            stds.append(vals.std() if len(vals) > 0 else 0)
        
        ax.bar(x + offsets[i], means, width * 0.9, yerr=stds, capsize=3,
               label=info['label'], color=info['color'], alpha=0.8, edgecolor='black')
    
    ax.set_xlabel('Ethiopian Season', fontsize=12)
    ax.set_ylabel('Mean Concentration (µg/m³)', fontsize=12)
    ax.set_title('Seasonal BC/EC: Method Comparison', fontsize=14, fontweight='bold')
    ax.set_xticks(x)
    ax.set_xticklabels(SEASONS_ORDER, rotation=15)
    ax.legend()
    ax.grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 3: ETHIOPIAN SEASONAL PATTERNS")
print("="*80)
seasonal_stats = analyze_seasonal_all(df)

fig1 = plot_seasonal_boxplot_all(df)
plt.savefig(os.path.join(dirs['plots'], 'seasonal_boxplot_all_methods.png'), dpi=150, bbox_inches='tight')
plt.show()

fig2 = plot_seasonal_grouped_bar(df)
plt.savefig(os.path.join(dirs['plots'], 'seasonal_grouped_bar.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 4: Time Series Comparison

**Goal**: Overlay all three methods on the same timeline to see temporal co-variation and divergences.

In [None]:
def plot_timeseries_all_methods(df, methods=METHODS):
    """
    Time series with all three methods overlaid.
    """
    fig, ax = plt.subplots(figsize=(16, 6))
    
    for col, info in methods.items():
        if col not in df.columns:
            continue
        valid = df[['date', col]].dropna()
        ax.plot(valid['date'], valid[col], 'o-', markersize=4, alpha=0.7,
                color=info['color'], linewidth=1, label=info['label'])
    
    # Seasonal shading
    if len(df) > 0:
        date_min, date_max = df['date'].min(), df['date'].max()
        for season, months in SEASONS.items():
            for date in pd.date_range(date_min, date_max, freq='D'):
                if date.month in months:
                    ax.axvspan(date, date + pd.Timedelta(days=1), alpha=0.05,
                              color=SEASON_COLORS.get(season, 'gray'), linewidth=0)
    
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Concentration (µg/m³)', fontsize=12)
    ax.set_title('BC/EC Time Series: Aethalometer vs FTIR vs HIPS', fontsize=14, fontweight='bold')
    ax.legend(loc='upper right', fontsize=10)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    return fig


def plot_method_ratios(df, methods=METHODS):
    """
    Time series of inter-method ratios.
    """
    pairs = [
        ('hips_fabs', 'ftir_ec', 'HIPS / FTIR EC'),
        ('ir_bcc', 'ftir_ec', 'Aeth BC / FTIR EC'),
        ('hips_fabs', 'ir_bcc', 'HIPS / Aeth BC'),
    ]
    valid_pairs = [(n, d, l) for n, d, l in pairs
                   if n in df.columns and d in df.columns]
    
    if not valid_pairs:
        print("No valid pairs for ratio analysis")
        return None
    
    fig, axes = plt.subplots(len(valid_pairs), 1, figsize=(16, 4*len(valid_pairs)), sharex=True)
    if len(valid_pairs) == 1:
        axes = [axes]
    
    for idx, (num_col, den_col, label) in enumerate(valid_pairs):
        ax = axes[idx]
        
        valid = df[['date', num_col, den_col, 'Ethiopian_Season']].dropna()
        ratio = valid[num_col] / valid[den_col].replace(0, np.nan)
        
        for season in SEASONS_ORDER:
            mask = valid['Ethiopian_Season'] == season
            ax.scatter(valid.loc[mask, 'date'], ratio[mask], s=40, alpha=0.7,
                      color=SEASON_COLORS[season], edgecolors='black', linewidth=0.3,
                      label=season)
        
        ax.axhline(y=1.0, color='black', linestyle='--', linewidth=1.5, alpha=0.5, label='1:1')
        
        mean_ratio = ratio.median()
        ax.axhline(y=mean_ratio, color='gray', linestyle=':', linewidth=1, alpha=0.7)
        
        ax.text(0.02, 0.95, f'Median ratio: {mean_ratio:.3f}\nn={len(ratio)}',
                transform=ax.transAxes, fontsize=10, va='top',
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
        
        ax.set_ylabel(label, fontsize=11)
        ax.legend(fontsize=8, loc='upper right')
        ax.grid(True, alpha=0.3)
    
    axes[-1].set_xlabel('Date', fontsize=12)
    plt.suptitle('Inter-Method Ratios Over Time', fontsize=14, fontweight='bold', y=1.01)
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 4: TIME SERIES COMPARISON")
print("="*80)

fig1 = plot_timeseries_all_methods(df)
plt.savefig(os.path.join(dirs['plots'], 'timeseries_all_methods.png'), dpi=150, bbox_inches='tight')
plt.show()

fig2 = plot_method_ratios(df)
if fig2:
    plt.savefig(os.path.join(dirs['plots'], 'method_ratios.png'), dpi=150, bbox_inches='tight')
    plt.show()

---

# Task 5: Inter-Method Agreement

**Goal**: Scatter plots and correlations between all three BC/EC measurement pairs, colored by season. Includes regression statistics and 1:1 line comparison.

In [None]:
def plot_inter_method_scatter(df):
    """
    Pairwise scatter plots between all three methods with regression and 1:1 line.
    """
    pairs = [
        ('ftir_ec', 'ir_bcc', 'FTIR EC (µg/m³)', 'Aeth IR BCc (µg/m³)'),
        ('ftir_ec', 'hips_fabs', 'FTIR EC (µg/m³)', 'HIPS Fabs/MAC (µg/m³)'),
        ('ir_bcc', 'hips_fabs', 'Aeth IR BCc (µg/m³)', 'HIPS Fabs/MAC (µg/m³)'),
    ]
    valid_pairs = [(x, y, xl, yl) for x, y, xl, yl in pairs
                   if x in df.columns and y in df.columns]
    
    n = len(valid_pairs)
    fig, axes = plt.subplots(1, n, figsize=(7*n, 6))
    if n == 1:
        axes = [axes]
    
    print("\nInter-Method Correlations:")
    print("=" * 80)
    
    for idx, (x_col, y_col, xlabel, ylabel) in enumerate(valid_pairs):
        ax = axes[idx]
        valid = df[[x_col, y_col, 'Ethiopian_Season']].dropna()
        
        if len(valid) < 3:
            ax.text(0.5, 0.5, f'n={len(valid)} (too few)', transform=ax.transAxes,
                    ha='center', fontsize=12)
            ax.set_title(f'{ylabel} vs {xlabel}')
            continue
        
        # Scatter by season
        for season in SEASONS_ORDER:
            sdata = valid[valid['Ethiopian_Season'] == season]
            if len(sdata) > 0:
                ax.scatter(sdata[x_col], sdata[y_col], s=50, alpha=0.7,
                          color=SEASON_COLORS[season], edgecolors='black', linewidth=0.3,
                          label=f'{season} (n={len(sdata)})')
        
        # Regression
        x = valid[x_col].values
        y = valid[y_col].values
        slope, intercept, r, p, se = stats.linregress(x, y)
        
        ax_max = max(x.max(), y.max()) * 1.05
        x_fit = np.linspace(0, ax_max, 100)
        ax.plot(x_fit, slope * x_fit + intercept, 'k-', linewidth=1.5, alpha=0.6)
        ax.plot([0, ax_max], [0, ax_max], 'k--', alpha=0.3, linewidth=1, label='1:1')
        
        sig = '***' if p < 0.001 else '**' if p < 0.01 else '*' if p < 0.05 else 'ns'
        ax.text(0.03, 0.97,
                f'y = {slope:.3f}x + {intercept:.3f}\nR² = {r**2:.3f} ({sig})\nn = {len(valid)}',
                transform=ax.transAxes, fontsize=9, va='top',
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
        
        ax.set_xlim(0, ax_max)
        ax.set_ylim(0, ax_max)
        ax.set_xlabel(xlabel, fontsize=11)
        ax.set_ylabel(ylabel, fontsize=11)
        ax.set_title(f'{ylabel} vs {xlabel}', fontsize=11, fontweight='bold')
        ax.legend(fontsize=8, loc='lower right')
        ax.grid(True, alpha=0.3)
        
        print(f"\n  {ylabel} vs {xlabel}:")
        print(f"    n={len(valid)}, R²={r**2:.3f}, slope={slope:.3f}, p={p:.2e}")
        
        # Per-season correlations
        for season in SEASONS_ORDER:
            sdata = valid[valid['Ethiopian_Season'] == season]
            if len(sdata) >= 5:
                rs, ps = stats.pearsonr(sdata[x_col], sdata[y_col])
                print(f"    {season}: n={len(sdata)}, r={rs:.3f}, p={ps:.2e}")
    
    plt.suptitle('Inter-Method Agreement: BC/EC Measurement Comparison',
                fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig


def print_summary_table(df, methods=METHODS):
    """
    Summary comparison table across all methods.
    """
    print("\n" + "="*80)
    print("SUMMARY: METHOD COMPARISON")
    print("="*80)
    
    cols = [c for c in methods if c in df.columns]
    
    print(f"\n{'Method':<20s} {'n':>5s} {'Mean':>8s} {'Median':>8s} {'Std':>8s} {'Min':>8s} {'Max':>8s}")
    print("-" * 65)
    for col in cols:
        info = methods[col]
        vals = df[col].dropna()
        if len(vals) == 0:
            continue
        print(f"{info['label']:<20s} {len(vals):>5d} {vals.mean():>8.3f} {vals.median():>8.3f} "
              f"{vals.std():>8.3f} {vals.min():>8.3f} {vals.max():>8.3f}")
    
    # Pairwise correlations
    print(f"\nPairwise Pearson correlations:")
    for i, c1 in enumerate(cols):
        for c2 in cols[i+1:]:
            valid = df[[c1, c2]].dropna()
            if len(valid) >= 3:
                r, p = stats.pearsonr(valid[c1], valid[c2])
                sig = '*' if p < 0.05 else ''
                print(f"  {methods[c1]['label']} vs {methods[c2]['label']}: "
                      f"r={r:.3f}{sig} (n={len(valid)}, p={p:.2e})")

print("="*80)
print("TASK 5: INTER-METHOD AGREEMENT")
print("="*80)

fig = plot_inter_method_scatter(df)
plt.savefig(os.path.join(dirs['plots'], 'inter_method_scatter.png'), dpi=150, bbox_inches='tight')
plt.show()

print_summary_table(df)

---

# Summary

## Data Sources:
- **Aethalometer IR BCc**: Daily 9am-to-9am resampled MA350 data (ng/m³ → µg/m³)
- **FTIR EC**: Filter-based thermal–optical EC (µg/m³)
- **HIPS Fabs**: Filter-based hybrid absorption (Mm⁻¹ → µg/m³ via MAC = 10 m²/g)

## Analyses:
1. **Monthly patterns** — monthly variation for each method
2. **Weekly patterns** — weekday vs weekend differences
3. **Seasonal patterns** — Ethiopian season distributions by method
4. **Time series** — all three methods on same timeline + inter-method ratios
5. **Inter-method agreement** — pairwise scatter plots, regressions, R² by season

In [None]:
print("="*80)
print("NOTEBOOK 02.5: TEMPORAL PATTERNS — AETH + FTIR EC + HIPS Fabs COMPLETE")
print("="*80)