# Addis Ababa: AERONET Columnar Aerosol Analysis

This notebook compares surface BC measurements (aethalometer) with columnar aerosol properties from AERONET.

## Tasks Covered:
1. **AOD overview** - time series and seasonal patterns of aerosol optical depth
2. **BC vs AOD correlation** - surface BC concentration vs columnar AOD by season
3. **Angstrom Exponent comparison** - columnar AE (440-870nm) vs surface AAE
4. **Fine Mode Fraction vs BC** - SDA fine mode fraction relationship with surface BC
5. **Precipitable Water vs BC** - column water vapor as washout proxy

## Data Sources:
- **Surface**: MA350 aethalometer 1-min BC (ng/m³ -> µg/m³)
- **Columnar**: AERONET Version 3 Level 2.0 (quality-assured) from AAU_Jackros_ET site

---

## Setup and Imports

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from matplotlib.dates import MonthLocator, DateFormatter

# Add scripts folder to path
notebook_dir = os.path.dirname(os.path.abspath('__file__'))
scripts_path = os.path.join(notebook_dir, 'scripts')
if scripts_path not in sys.path:
    sys.path.insert(0, scripts_path)

from config import SITES
from data_matching import load_etad_factors_with_filter_ids
print("Loaded config and data_matching")

# Configure matplotlib
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 11

# Create output directories
def setup_directories():
    dirs = {
        'plots': 'output/plots/addis_ababa/aeronet',
        'data': 'output/data/addis_ababa'
    }
    for dir_path in dirs.values():
        os.makedirs(dir_path, exist_ok=True)
    return dirs

dirs = setup_directories()
print("Setup complete!")

## Configuration

In [None]:
ADDIS_CONFIG = {
    'name': 'Addis_Ababa',
    'timezone': 'Africa/Addis_Ababa',
    'primary_bc_col': 'IR BCc',
    'seasons': {
        'Dry Season': [10, 11, 12, 1, 2],
        'Belg Rainy Season': [3, 4, 5],
        'Kiremt Rainy Season': [6, 7, 8, 9]
    }
}

SEASONS_ORDER = ['Dry Season', 'Belg Rainy Season', 'Kiremt Rainy Season']
SEASON_COLORS = {'Dry Season': '#E67E22', 'Belg Rainy Season': '#27AE60', 'Kiremt Rainy Season': '#3498DB'}

# AERONET missing value sentinel
AERONET_MISSING = -999.

print(f"Site: {ADDIS_CONFIG['name']}")

## Data Loading

In [None]:
BC_FILEPATH = "/Users/ahmadjalil/Library/CloudStorage/GoogleDrive-ahzs645@gmail.com/My Drive/University/Research/Grad/UC Davis Ann/NASA MAIA/Data/Aethelometry Data/JacrosMA350 60s Data20250804082112/df_jacros_cleaned_API_and_OG_manual_BC_all_wl.pkl"
AERONET_AOD_PATH = "/Users/ahmadjalil/Library/CloudStorage/GoogleDrive-ahzs645@gmail.com/My Drive/University/Research/Grad/UC Davis Ann/NASA MAIA/Data/AERONET/Jacros/20220101_20251231_AAU_Jackros_ET Daily/20220101_20251231_AAU_Jackros_ET.lev20"
AERONET_SDA_PATH = "/Users/ahmadjalil/Library/CloudStorage/GoogleDrive-ahzs645@gmail.com/My Drive/University/Research/Grad/UC Davis Ann/NASA MAIA/Data/AERONET/Jacros/20220101_20251231_AAU_Jackros_ET Daily/20220101_20251231_AAU_Jackros_ET.ONEILL_lev20"

def load_aethalometer_addis(filepath):
    """Load and preprocess Addis Ababa aethalometer data from pickle file."""
    df = pd.read_pickle(filepath)
    df['datetime_local'] = pd.to_datetime(df['datetime_local'])
    df.set_index('datetime_local', inplace=True)
    df.sort_index(inplace=True)
    
    bc_cols = ['UV BCc', 'IR BCc', 'UV BC1', 'IR BC1']
    for col in bc_cols:
        if col in df.columns:
            df[col] = df[col] / 1000  # ng/m³ -> µg/m³
    
    df['Month'] = df.index.month
    df['Hour'] = df.index.hour
    df['Ethiopian_Season'] = df['Month'].map(lambda m: 
        'Dry Season' if m in ADDIS_CONFIG['seasons']['Dry Season'] else
        'Belg Rainy Season' if m in ADDIS_CONFIG['seasons']['Belg Rainy Season'] else
        'Kiremt Rainy Season'
    )
    
    for col in bc_cols:
        if col in df.columns:
            df.loc[df[col] < 0, col] = np.nan
            mean, std = df[col].mean(), df[col].std()
            df.loc[df[col] > mean + 3*std, col] = np.nan
    return df


def load_aeronet_aod(filepath):
    """Load AERONET Version 3 Level 2.0 daily AOD data."""
    aod = pd.read_csv(filepath, skiprows=6)
    
    # Parse date (dd:mm:yyyy format)
    aod['Date'] = pd.to_datetime(aod['Date(dd:mm:yyyy)'], format='%d:%m:%Y')
    aod.set_index('Date', inplace=True)
    aod.sort_index(inplace=True)
    
    # Replace -999 with NaN
    aod = aod.replace(AERONET_MISSING, np.nan)
    
    # Add season
    aod['Month'] = aod.index.month
    aod['Ethiopian_Season'] = aod['Month'].map(lambda m: 
        'Dry Season' if m in ADDIS_CONFIG['seasons']['Dry Season'] else
        'Belg Rainy Season' if m in ADDIS_CONFIG['seasons']['Belg Rainy Season'] else
        'Kiremt Rainy Season'
    )
    return aod


def load_aeronet_sda(filepath):
    """Load AERONET SDA (Spectral Deconvolution Algorithm) Level 2.0 daily data."""
    sda = pd.read_csv(filepath, skiprows=6)
    
    sda['Date'] = pd.to_datetime(sda['Date_(dd:mm:yyyy)'], format='%d:%m:%Y')
    sda.set_index('Date', inplace=True)
    sda.sort_index(inplace=True)
    sda = sda.replace(AERONET_MISSING, np.nan)
    
    sda['Month'] = sda.index.month
    sda['Ethiopian_Season'] = sda['Month'].map(lambda m: 
        'Dry Season' if m in ADDIS_CONFIG['seasons']['Dry Season'] else
        'Belg Rainy Season' if m in ADDIS_CONFIG['seasons']['Belg Rainy Season'] else
        'Kiremt Rainy Season'
    )
    return sda


# Load all data
df = load_aethalometer_addis(BC_FILEPATH)
print(f"BC data: {len(df):,} records ({df.index.min().date()} to {df.index.max().date()})")

aod_df = load_aeronet_aod(AERONET_AOD_PATH)
print(f"AERONET AOD: {len(aod_df)} days ({aod_df.index.min().date()} to {aod_df.index.max().date()})")

sda_df = load_aeronet_sda(AERONET_SDA_PATH)
print(f"AERONET SDA: {len(sda_df)} days ({sda_df.index.min().date()} to {sda_df.index.max().date()})")

# Compute daily BC averages for merging with AERONET daily data
bc_daily = df[['IR BCc', 'UV BCc', 'Ethiopian_Season']].resample('D').agg({
    'IR BCc': 'mean',
    'UV BCc': 'mean',
    'Ethiopian_Season': 'first'
})
# Strip timezone for merge with timezone-naive AERONET index
bc_daily.index = bc_daily.index.tz_localize(None)
print(f"\nDaily BC averages: {bc_daily['IR BCc'].notna().sum()} valid days")

In [None]:
# --- Load PMF Factor Contributions ---
FACTOR_TO_FRAC = {
    'GF3 (Charcoal)':              'charcoal_frac',
    'GF2 (Wood Burning)':          'wood_frac',
    'GF5 (Fossil Fuel Combustion)':'fossil_fuel_frac',
    'GF4 (Polluted Marine)':       'polluted_marine_frac',
    'GF1 (Sea Salt Mixed)':        'sea_salt_frac',
}

factors_df = load_etad_factors_with_filter_ids()
factors_df = factors_df.rename(columns=FACTOR_TO_FRAC)
frac_cols = list(FACTOR_TO_FRAC.values())

# Normalize to relative source contributions (raw GFs are PM2.5 mass fractions, not relative)
frac_sum = factors_df[frac_cols].sum(axis=1)
for col in frac_cols:
    factors_df[col] = factors_df[col] / frac_sum

factor_map = factors_df.set_index('date')[frac_cols]

merge_dates = df.index.normalize()
if merge_dates.tz is not None:
    merge_dates = merge_dates.tz_localize(None)

for col in frac_cols:
    df[col] = merge_dates.map(factor_map[col])

# Add dominant source
df['dominant_source'] = df[frac_cols].idxmax(axis=1).str.replace('_frac', '')
df['dominant_fraction'] = df[frac_cols].max(axis=1)

n_with = df[frac_cols].notna().any(axis=1).sum()
print(f"Factor data merged: {n_with} rows with factor data out of {len(df)} total")
print(f"Dominant fraction: mean={df['dominant_fraction'].dropna().mean():.1%}, "
      f"≥50%: {(df['dominant_fraction'] >= 0.50).sum()}, "
      f"≥30%: {(df['dominant_fraction'] >= 0.30).sum()}")

---

# Task 1: AOD Overview

**Goal**: Visualize AOD time series and seasonal patterns at the Addis Ababa AERONET site.

In [None]:
def plot_aod_overview(aod_df):
    """Plot AOD time series at key wavelengths and seasonal boxplots."""
    wavelengths = ['AOD_440nm', 'AOD_500nm', 'AOD_675nm', 'AOD_870nm']
    wl_labels = ['440nm', '500nm', '675nm', '870nm']
    colors = ['#9C27B0', '#2196F3', '#4CAF50', '#F44336']
    
    # Figure 1: Time series
    fig1, ax = plt.subplots(figsize=(15, 6))
    for col, label, color in zip(wavelengths, wl_labels, colors):
        if col in aod_df.columns:
            valid = aod_df[col].dropna()
            ax.plot(valid.index, valid.values, 'o', markersize=2, alpha=0.5, color=color, label=label)
            rolling = valid.rolling(window=30, min_periods=7).mean()
            ax.plot(rolling.index, rolling.values, '-', linewidth=2, color=color)
    
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Aerosol Optical Depth', fontsize=12)
    ax.set_title('AERONET AOD Time Series - AAU Jackros ET (Level 2.0)', fontsize=14, fontweight='bold')
    ax.legend(loc='upper right', title='Wavelength')
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    
    # Figure 2: Seasonal boxplots at 500nm
    fig2, ax = plt.subplots(figsize=(10, 6))
    plot_data = []
    for season in SEASONS_ORDER:
        data = aod_df[aod_df['Ethiopian_Season'] == season]['AOD_500nm'].dropna()
        plot_data.append(data)
    
    bp = ax.boxplot(plot_data, labels=SEASONS_ORDER, patch_artist=True, showfliers=True,
                    flierprops=dict(marker='o', markersize=3, alpha=0.4))
    for patch, season in zip(bp['boxes'], SEASONS_ORDER):
        patch.set_facecolor(SEASON_COLORS[season])
        patch.set_alpha(0.7)
    
    for i, season in enumerate(SEASONS_ORDER):
        n = len(aod_df[aod_df['Ethiopian_Season'] == season]['AOD_500nm'].dropna())
        ax.text(i + 1, ax.get_ylim()[1] * 0.95, f'n={n}', ha='center', fontsize=9)
    
    ax.set_ylabel('AOD 500nm', fontsize=12)
    ax.set_title('AOD 500nm by Ethiopian Season', fontsize=14, fontweight='bold')
    ax.grid(True, alpha=0.3, axis='y')
    plt.tight_layout()
    
    # Summary stats
    print("\nAOD 500nm Statistics by Season:")
    print("=" * 60)
    for season in SEASONS_ORDER:
        data = aod_df[aod_df['Ethiopian_Season'] == season]['AOD_500nm'].dropna()
        if len(data) > 0:
            print(f"  {season}: n={len(data)}, mean={data.mean():.3f}, median={data.median():.3f}, std={data.std():.3f}")
    
    return fig1, fig2

print("="*80)
print("TASK 1: AOD OVERVIEW")
print("="*80)
fig1, fig2 = plot_aod_overview(aod_df)
fig1.savefig(os.path.join(dirs['plots'], 'aod_timeseries.png'), dpi=150, bbox_inches='tight')
fig2.savefig(os.path.join(dirs['plots'], 'aod_seasonal_boxplot.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 2: Surface BC vs Columnar AOD

**Goal**: Correlate daily surface BC concentrations with AERONET columnar AOD by season.

In [None]:
def analyze_bc_aod_correlation(bc_daily, aod_df, bc_col='IR BCc'):
    """Correlate daily BC with AOD 500nm by season."""
    merged = pd.merge(bc_daily[[bc_col]], aod_df[['AOD_500nm', 'AOD_870nm', 'Ethiopian_Season']],
                      left_index=True, right_index=True, how='inner').dropna(subset=[bc_col, 'AOD_500nm'])
    
    print(f"\nMerged BC-AOD dataset: {len(merged)} matched days")
    print("=" * 60)
    
    correlations = {}
    for season in SEASONS_ORDER:
        sdata = merged[merged['Ethiopian_Season'] == season]
        if len(sdata) > 5:
            r, p = stats.pearsonr(sdata[bc_col], sdata['AOD_500nm'])
            correlations[season] = {'r': r, 'p': p, 'n': len(sdata)}
            sig = '*' if p < 0.05 else ''
            print(f"  {season}: n={len(sdata)}, r={r:.3f}{sig}, p={p:.3e}")
    
    # Overall
    r_all, p_all = stats.pearsonr(merged[bc_col], merged['AOD_500nm'])
    correlations['All'] = {'r': r_all, 'p': p_all, 'n': len(merged)}
    print(f"\n  Overall: n={len(merged)}, r={r_all:.3f}, p={p_all:.3e}")
    
    return merged, correlations


def plot_bc_vs_aod(merged, correlations, bc_col='IR BCc'):
    """Scatter plots of daily BC vs AOD 500nm by season."""
    fig, axes = plt.subplots(1, 4, figsize=(22, 5))
    
    # Per season
    for idx, season in enumerate(SEASONS_ORDER):
        ax = axes[idx]
        sdata = merged[merged['Ethiopian_Season'] == season]
        
        ax.scatter(sdata[bc_col], sdata['AOD_500nm'], alpha=0.6, s=30,
                  color=SEASON_COLORS[season], edgecolors='black', linewidth=0.3)
        
        if season in correlations and len(sdata) > 5:
            z = np.polyfit(sdata[bc_col], sdata['AOD_500nm'], 1)
            p = np.poly1d(z)
            x_range = np.linspace(sdata[bc_col].min(), sdata[bc_col].max(), 100)
            ax.plot(x_range, p(x_range), 'k--', linewidth=2, alpha=0.7)
            
            c = correlations[season]
            ax.text(0.05, 0.95, f"r={c['r']:.3f}\np={c['p']:.2e}\nn={c['n']}",
                   transform=ax.transAxes, fontsize=9, va='top',
                   bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
        
        ax.set_xlabel('Daily Mean BC (µg/m³)', fontsize=10)
        ax.set_ylabel('AOD 500nm' if idx == 0 else '', fontsize=10)
        ax.set_title(season, fontsize=11, fontweight='bold')
        ax.grid(True, alpha=0.3)
    
    # Overall
    ax = axes[3]
    for season in SEASONS_ORDER:
        sdata = merged[merged['Ethiopian_Season'] == season]
        ax.scatter(sdata[bc_col], sdata['AOD_500nm'], alpha=0.5, s=30,
                  color=SEASON_COLORS[season], edgecolors='black', linewidth=0.3, label=season)
    
    c = correlations['All']
    ax.text(0.05, 0.95, f"r={c['r']:.3f}\np={c['p']:.2e}\nn={c['n']}",
           transform=ax.transAxes, fontsize=9, va='top',
           bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
    ax.set_xlabel('Daily Mean BC (µg/m³)', fontsize=10)
    ax.set_title('All Seasons', fontsize=11, fontweight='bold')
    ax.legend(fontsize=8)
    ax.grid(True, alpha=0.3)
    
    plt.suptitle('Surface BC vs Columnar AOD (500nm)', fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 2: BC vs AOD CORRELATION")
print("="*80)
merged_aod, aod_correlations = analyze_bc_aod_correlation(bc_daily, aod_df)
fig = plot_bc_vs_aod(merged_aod, aod_correlations)
plt.savefig(os.path.join(dirs['plots'], 'bc_vs_aod.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 3: Angstrom Exponent Comparison

**Goal**: Compare columnar Angstrom Exponent (440-870nm, from AOD spectral dependence) with surface Absorption Angstrom Exponent (AAE, from aethalometer UV/IR ratio). Both indicate particle size/source but measure different optical properties.

In [None]:
def compute_daily_aae(df_minute, uv_col='UV BCc', ir_col='IR BCc'):
    """Compute daily AAE from minute-level aethalometer data."""
    wavelength_ratio = np.log(880 / 375)
    
    valid = df_minute[[uv_col, ir_col]].copy()
    valid = valid[(valid[uv_col] > 0) & (valid[ir_col] > 0)]
    
    bc_ratio = valid[ir_col] / valid[uv_col]
    bc_ratio = bc_ratio[(bc_ratio > 0.1) & (bc_ratio < 10)]
    
    aae_minute = np.log(bc_ratio) / wavelength_ratio
    aae_minute = aae_minute.clip(-1, 3)
    
    # Daily mean AAE
    aae_daily = aae_minute.resample('D').mean()
    aae_daily.index = aae_daily.index.tz_localize(None)
    return aae_daily


def analyze_ae_comparison(aae_daily, aod_df):
    """Compare surface AAE with columnar AE (440-870nm)."""
    ae_col = '440-870_Angstrom_Exponent'
    
    merged = pd.merge(
        aae_daily.rename('Surface_AAE').to_frame(),
        aod_df[[ae_col, 'Ethiopian_Season']],
        left_index=True, right_index=True, how='inner'
    ).dropna()
    merged.rename(columns={ae_col: 'Columnar_AE'}, inplace=True)
    
    print(f"\nAngstrom Exponent Comparison: {len(merged)} matched days")
    print("=" * 60)
    
    r, p = stats.pearsonr(merged['Surface_AAE'], merged['Columnar_AE'])
    print(f"  Overall: r={r:.3f}, p={p:.3e}")
    print(f"  Surface AAE:  mean={merged['Surface_AAE'].mean():.3f}, std={merged['Surface_AAE'].std():.3f}")
    print(f"  Columnar AE:  mean={merged['Columnar_AE'].mean():.3f}, std={merged['Columnar_AE'].std():.3f}")
    
    for season in SEASONS_ORDER:
        sdata = merged[merged['Ethiopian_Season'] == season]
        if len(sdata) > 5:
            rs, ps = stats.pearsonr(sdata['Surface_AAE'], sdata['Columnar_AE'])
            print(f"  {season}: n={len(sdata)}, r={rs:.3f}, p={ps:.3e}")
    
    return merged


def plot_ae_comparison(merged):
    """Scatter + time series of surface AAE vs columnar AE."""
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # Scatter
    ax = axes[0]
    for season in SEASONS_ORDER:
        sdata = merged[merged['Ethiopian_Season'] == season]
        ax.scatter(sdata['Columnar_AE'], sdata['Surface_AAE'], alpha=0.5, s=40,
                  color=SEASON_COLORS[season], edgecolors='black', linewidth=0.3, label=season)
    
    r, _ = stats.pearsonr(merged['Surface_AAE'], merged['Columnar_AE'])
    ax.text(0.05, 0.95, f"r={r:.3f}\nn={len(merged)}", transform=ax.transAxes, fontsize=10,
            va='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
    
    lims = [min(merged[['Surface_AAE', 'Columnar_AE']].min()), max(merged[['Surface_AAE', 'Columnar_AE']].max())]
    ax.plot(lims, lims, 'k--', alpha=0.5, linewidth=1.5, label='1:1 line')
    ax.set_xlabel('Columnar AE (440-870nm)', fontsize=12)
    ax.set_ylabel('Surface AAE (375-880nm)', fontsize=12)
    ax.set_title('Surface AAE vs Columnar AE', fontsize=13, fontweight='bold')
    ax.legend(fontsize=9)
    ax.grid(True, alpha=0.3)
    
    # Time series
    ax = axes[1]
    ax.plot(merged.index, merged['Surface_AAE'], 'o-', markersize=3, alpha=0.6, color='#E74C3C', label='Surface AAE')
    ax.plot(merged.index, merged['Columnar_AE'], 's-', markersize=3, alpha=0.6, color='#3498DB', label='Columnar AE')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Angstrom Exponent', fontsize=12)
    ax.set_title('Angstrom Exponent Time Series', fontsize=13, fontweight='bold')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 3: ANGSTROM EXPONENT COMPARISON")
print("="*80)
aae_daily = compute_daily_aae(df)
merged_ae = analyze_ae_comparison(aae_daily, aod_df)
fig = plot_ae_comparison(merged_ae)
plt.savefig(os.path.join(dirs['plots'], 'ae_comparison.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 4: Fine Mode Fraction vs BC

**Goal**: Analyze the relationship between SDA fine mode fraction (eta at 500nm) and surface BC. Higher fine mode fraction suggests combustion-dominated aerosol.

In [None]:
def analyze_fmf_bc(bc_daily, sda_df, bc_col='IR BCc'):
    """Analyze Fine Mode Fraction vs surface BC."""
    fmf_col = 'FineModeFraction_500nm[eta]'
    fine_aod_col = 'Fine_Mode_AOD_500nm[tau_f]'
    coarse_aod_col = 'Coarse_Mode_AOD_500nm[tau_c]'
    
    merged = pd.merge(
        bc_daily[[bc_col]],
        sda_df[[fmf_col, fine_aod_col, coarse_aod_col, 'Ethiopian_Season']],
        left_index=True, right_index=True, how='inner'
    ).dropna(subset=[bc_col, fmf_col])
    
    print(f"\nFine Mode Fraction vs BC: {len(merged)} matched days")
    print("=" * 60)
    
    r, p = stats.pearsonr(merged[bc_col], merged[fmf_col])
    print(f"  Overall BC vs FMF: r={r:.3f}, p={p:.3e}")
    
    r2, p2 = stats.pearsonr(merged[bc_col], merged[fine_aod_col].dropna())
    print(f"  Overall BC vs Fine AOD: r={r2:.3f}, p={p2:.3e}")
    
    print(f"\n  FMF: mean={merged[fmf_col].mean():.3f}, std={merged[fmf_col].std():.3f}")
    
    for season in SEASONS_ORDER:
        sdata = merged[merged['Ethiopian_Season'] == season]
        if len(sdata) > 5:
            rs, ps = stats.pearsonr(sdata[bc_col], sdata[fmf_col])
            print(f"  {season}: n={len(sdata)}, FMF mean={sdata[fmf_col].mean():.3f}, r(BC,FMF)={rs:.3f}")
    
    return merged, fmf_col, fine_aod_col


def plot_fmf_bc(merged, fmf_col, fine_aod_col, bc_col='IR BCc'):
    """Plot Fine Mode Fraction and Fine AOD vs BC."""
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    # BC vs FMF
    ax = axes[0]
    for season in SEASONS_ORDER:
        sdata = merged[merged['Ethiopian_Season'] == season]
        ax.scatter(sdata[bc_col], sdata[fmf_col], alpha=0.5, s=30,
                  color=SEASON_COLORS[season], edgecolors='black', linewidth=0.3, label=season)
    r, _ = stats.pearsonr(merged[bc_col], merged[fmf_col])
    ax.text(0.05, 0.05, f"r={r:.3f}\nn={len(merged)}", transform=ax.transAxes, fontsize=10,
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
    ax.set_xlabel('Daily Mean BC (µg/m³)', fontsize=11)
    ax.set_ylabel('Fine Mode Fraction (500nm)', fontsize=11)
    ax.set_title('BC vs Fine Mode Fraction', fontsize=12, fontweight='bold')
    ax.legend(fontsize=8)
    ax.grid(True, alpha=0.3)
    
    # BC vs Fine AOD
    ax = axes[1]
    valid = merged.dropna(subset=[fine_aod_col])
    for season in SEASONS_ORDER:
        sdata = valid[valid['Ethiopian_Season'] == season]
        ax.scatter(sdata[bc_col], sdata[fine_aod_col], alpha=0.5, s=30,
                  color=SEASON_COLORS[season], edgecolors='black', linewidth=0.3, label=season)
    r2, _ = stats.pearsonr(valid[bc_col], valid[fine_aod_col])
    ax.text(0.05, 0.95, f"r={r2:.3f}\nn={len(valid)}", transform=ax.transAxes, fontsize=10,
            va='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
    ax.set_xlabel('Daily Mean BC (µg/m³)', fontsize=11)
    ax.set_ylabel('Fine Mode AOD (500nm)', fontsize=11)
    ax.set_title('BC vs Fine Mode AOD', fontsize=12, fontweight='bold')
    ax.legend(fontsize=8)
    ax.grid(True, alpha=0.3)
    
    # Seasonal FMF boxplot
    ax = axes[2]
    plot_data = []
    for season in SEASONS_ORDER:
        plot_data.append(merged[merged['Ethiopian_Season'] == season][fmf_col].dropna())
    bp = ax.boxplot(plot_data, labels=[s.replace(' Season', '') for s in SEASONS_ORDER],
                   patch_artist=True, showfliers=False)
    for patch, season in zip(bp['boxes'], SEASONS_ORDER):
        patch.set_facecolor(SEASON_COLORS[season])
        patch.set_alpha(0.7)
    ax.set_ylabel('Fine Mode Fraction (500nm)', fontsize=11)
    ax.set_title('FMF by Season', fontsize=12, fontweight='bold')
    ax.grid(True, alpha=0.3, axis='y')
    
    plt.suptitle('SDA Fine Mode Analysis vs Surface BC', fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 4: FINE MODE FRACTION vs BC")
print("="*80)
merged_fmf, fmf_col, fine_aod_col = analyze_fmf_bc(bc_daily, sda_df)
fig = plot_fmf_bc(merged_fmf, fmf_col, fine_aod_col)
plt.savefig(os.path.join(dirs['plots'], 'fmf_vs_bc.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 5: Precipitable Water vs BC

**Goal**: Analyze column precipitable water (from AERONET 935nm) as a proxy for atmospheric moisture conditions and its relationship with BC washout.

In [None]:
def analyze_precipitable_water_bc(bc_daily, aod_df, bc_col='IR BCc'):
    """Analyze precipitable water vs surface BC."""
    pw_col = 'Precipitable_Water(cm)'
    
    merged = pd.merge(
        bc_daily[[bc_col]],
        aod_df[[pw_col, 'Ethiopian_Season']],
        left_index=True, right_index=True, how='inner'
    ).dropna(subset=[bc_col, pw_col])
    
    print(f"\nPrecipitable Water vs BC: {len(merged)} matched days")
    print("=" * 60)
    
    r, p = stats.pearsonr(merged[bc_col], merged[pw_col])
    print(f"  Overall: r={r:.3f}, p={p:.3e}")
    print(f"  PW: mean={merged[pw_col].mean():.3f} cm, std={merged[pw_col].std():.3f}")
    
    correlations = {}
    for season in SEASONS_ORDER:
        sdata = merged[merged['Ethiopian_Season'] == season]
        if len(sdata) > 5:
            rs, ps = stats.pearsonr(sdata[bc_col], sdata[pw_col])
            correlations[season] = {'r': rs, 'p': ps, 'n': len(sdata)}
            sig = '*' if ps < 0.05 else ''
            print(f"  {season}: n={len(sdata)}, PW mean={sdata[pw_col].mean():.2f} cm, r={rs:.3f}{sig}")
    
    return merged, pw_col, correlations


def plot_precipitable_water_bc(merged, pw_col, correlations, bc_col='IR BCc'):
    """Plot precipitable water vs BC and seasonal patterns."""
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    # Scatter: BC vs PW
    ax = axes[0]
    for season in SEASONS_ORDER:
        sdata = merged[merged['Ethiopian_Season'] == season]
        ax.scatter(sdata[pw_col], sdata[bc_col], alpha=0.5, s=30,
                  color=SEASON_COLORS[season], edgecolors='black', linewidth=0.3, label=season)
    
    r, _ = stats.pearsonr(merged[bc_col], merged[pw_col])
    ax.text(0.95, 0.95, f"r={r:.3f}\nn={len(merged)}", transform=ax.transAxes, fontsize=10,
            va='top', ha='right', bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
    ax.set_xlabel('Precipitable Water (cm)', fontsize=11)
    ax.set_ylabel('Daily Mean BC (µg/m³)', fontsize=11)
    ax.set_title('BC vs Precipitable Water', fontsize=12, fontweight='bold')
    ax.legend(fontsize=8)
    ax.grid(True, alpha=0.3)
    
    # PW time series with BC overlay
    ax = axes[1]
    ax2 = ax.twinx()
    ax.plot(merged.index, merged[pw_col], 'o-', markersize=2, alpha=0.5, color='#3498DB', label='Precipitable Water')
    ax2.plot(merged.index, merged[bc_col], 'o-', markersize=2, alpha=0.5, color='#E74C3C', label='BC')
    ax.set_xlabel('Date', fontsize=11)
    ax.set_ylabel('Precipitable Water (cm)', fontsize=11, color='#3498DB')
    ax2.set_ylabel('BC (µg/m³)', fontsize=11, color='#E74C3C')
    ax.set_title('PW and BC Time Series', fontsize=12, fontweight='bold')
    lines1, labels1 = ax.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax.legend(lines1 + lines2, labels1 + labels2, fontsize=8, loc='upper right')
    ax.grid(True, alpha=0.3)
    
    # Seasonal PW boxplot
    ax = axes[2]
    plot_data = []
    for season in SEASONS_ORDER:
        plot_data.append(merged[merged['Ethiopian_Season'] == season][pw_col].dropna())
    bp = ax.boxplot(plot_data, labels=[s.replace(' Season', '') for s in SEASONS_ORDER],
                   patch_artist=True, showfliers=False)
    for patch, season in zip(bp['boxes'], SEASONS_ORDER):
        patch.set_facecolor(SEASON_COLORS[season])
        patch.set_alpha(0.7)
    ax.set_ylabel('Precipitable Water (cm)', fontsize=11)
    ax.set_title('PW by Season', fontsize=12, fontweight='bold')
    ax.grid(True, alpha=0.3, axis='y')
    
    plt.suptitle('Precipitable Water and Surface BC', fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 5: PRECIPITABLE WATER vs BC")
print("="*80)
merged_pw, pw_col, pw_correlations = analyze_precipitable_water_bc(bc_daily, aod_df)
fig = plot_precipitable_water_bc(merged_pw, pw_col, pw_correlations)
plt.savefig(os.path.join(dirs['plots'], 'precipitable_water_vs_bc.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Summary

## Data Sources:
- **Surface BC**: MA350 aethalometer 1-min data (1.56M records), aggregated to daily means
- **AERONET AOD**: Version 3 Level 2.0 daily (625 days, Oct 2022 - Nov 2024)
- **AERONET SDA**: Fine/coarse mode decomposition Level 2.0 daily (624 days)

## Analyses:
1. **AOD Overview** - multi-wavelength AOD time series and seasonal boxplots
2. **BC vs AOD** - surface BC correlation with columnar AOD 500nm by season
3. **Angstrom Exponent** - surface AAE (absorption) vs columnar AE (extinction) comparison
4. **Fine Mode Fraction** - SDA fine mode fraction and fine AOD vs surface BC
5. **Precipitable Water** - column water vapor as moisture/washout indicator vs BC

In [None]:
print("="*80)
print("NOTEBOOK 04: AERONET ANALYSIS COMPLETE")
print("="*80)