# Addis Ababa: Meteorological Analysis

This notebook covers precipitation and temperature relationships with BC concentrations.

## Tasks Covered:
1. **Precipitation patterns** - monthly/daily rainfall analysis
2. **BC-precipitation relationship** - BC levels by rain intensity
3. **Dynamic season definitions** - precipitation-based seasons
4. **Temperature correlations** - BC vs temperature by season
5. **Precipitation threshold sensitivity** - test different season definitions

---

## Setup and Imports

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from matplotlib.dates import MonthLocator, DateFormatter
import itertools

# Add scripts folder to path
notebook_dir = os.path.dirname(os.path.abspath('__file__'))
scripts_path = os.path.join(notebook_dir, 'scripts')
if scripts_path not in sys.path:
    sys.path.insert(0, scripts_path)

from config import SITES
from data_matching import load_etad_factors_with_filter_ids
print("Loaded config and data_matching")

# Configure matplotlib
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 11

# Create output directories
def setup_directories():
    dirs = {
        'plots': 'output/plots/addis_ababa',
        'data': 'output/data/addis_ababa'
    }
    for dir_path in dirs.values():
        os.makedirs(dir_path, exist_ok=True)
    return dirs

dirs = setup_directories()
print("Setup complete!")

## Configuration

In [None]:
# Addis Ababa configuration
ADDIS_CONFIG = {
    'name': 'Addis_Ababa',
    'timezone': 'Africa/Addis_Ababa',
    'primary_bc_col': 'IR BCc',
    'seasons': {
        'Dry Season': [10, 11, 12, 1, 2],
        'Belg Rainy Season': [3, 4, 5],
        'Kiremt Rainy Season': [6, 7, 8, 9]
    }
}

SEASONS_ORDER = ['Dry Season', 'Belg Rainy Season', 'Kiremt Rainy Season']
SEASON_COLORS = {'Dry Season': '#E67E22', 'Belg Rainy Season': '#27AE60', 'Kiremt Rainy Season': '#3498DB'}

# Precipitation thresholds to test for dynamic seasons
PRECIP_THRESHOLD_SETS = [
    (3, 10),   # Conservative
    (5, 15),   # Standard
    (7, 20)    # Liberal
]

print(f"Site: {ADDIS_CONFIG['name']}")
print(f"Threshold sets to test: {PRECIP_THRESHOLD_SETS}")

## Data Loading Functions

In [None]:
BC_FILEPATH = "/Users/ahmadjalil/Library/CloudStorage/GoogleDrive-ahzs645@gmail.com/My Drive/University/Research/Grad/UC Davis Ann/NASA MAIA/Data/Aethelometry Data/JacrosMA350 60s Data20250804082112/df_jacros_cleaned_API_and_OG_manual_BC_all_wl.pkl"
METEO_FILEPATH = "/Users/ahmadjalil/Library/CloudStorage/GoogleDrive-ahzs645@gmail.com/My Drive/University/Research/Grad/UC Davis Ann/NASA MAIA/Data/Weather Data/Meteostat/addis_ababa_weather_data_cleaned.csv"

def load_aethalometer_addis(filepath):
    """
    Load and preprocess Addis Ababa aethalometer data from pickle file.
    """
    df = pd.read_pickle(filepath)
    
    # Set datetime index
    df['datetime_local'] = pd.to_datetime(df['datetime_local'])
    df.set_index('datetime_local', inplace=True)
    df.sort_index(inplace=True)
    
    # Convert ng/m³ to µg/m³
    bc_cols = ['UV BCc', 'IR BCc', 'UV BC1', 'IR BC1']
    for col in bc_cols:
        if col in df.columns:
            df[col] = df[col] / 1000
    
    # Add time columns
    df['Month'] = df.index.month
    df['Hour'] = df.index.hour
    
    # Add static Ethiopian seasons
    df['Ethiopian_Season'] = df['Month'].map(lambda m: 
        'Dry Season' if m in ADDIS_CONFIG['seasons']['Dry Season'] else
        'Belg Rainy Season' if m in ADDIS_CONFIG['seasons']['Belg Rainy Season'] else
        'Kiremt Rainy Season'
    )
    
    # Handle outliers
    for col in bc_cols:
        if col in df.columns:
            df.loc[df[col] < 0, col] = np.nan
            mean, std = df[col].mean(), df[col].std()
            df.loc[df[col] > mean + 3*std, col] = np.nan
    
    return df


def load_meteorological_data(filepath, target_tz='Africa/Addis_Ababa'):
    """
    Load and process Meteostat meteorological data for Addis Ababa.
    
    Columns used: timestamp, precipitation_mm, temperature_c
    Meteostat provides hourly data in UTC.
    """
    meteo_df = pd.read_csv(filepath, parse_dates=['timestamp'])
    meteo_df.set_index('timestamp', inplace=True)
    
    # Meteostat timestamps are UTC - localize and convert
    meteo_df.index = meteo_df.index.tz_localize('UTC').tz_convert(target_tz)
    
    # Calculate daily and monthly precipitation
    daily_precip = meteo_df['precipitation_mm'].resample('D').sum()
    monthly_precip = meteo_df['precipitation_mm'].resample('ME').sum()
    
    return meteo_df, daily_precip, monthly_precip

# Load BC data
df = load_aethalometer_addis(BC_FILEPATH)
print(f"BC data loaded: {len(df):,} records")
print(f"BC date range: {df.index.min()} to {df.index.max()}")

# Load meteorological data
meteo_df, daily_precip, monthly_precip = load_meteorological_data(METEO_FILEPATH)
print(f"\nMeteo data loaded: {len(meteo_df):,} records")
print(f"Meteo date range: {meteo_df.index.min()} to {meteo_df.index.max()}")
print(f"Precipitation: {meteo_df['precipitation_mm'].notna().sum():,} non-null hourly values")
print(f"Temperature: {meteo_df['temperature_c'].notna().sum():,} non-null hourly values")

In [None]:
# --- Load PMF Factor Contributions ---
FACTOR_TO_FRAC = {
    'GF3 (Charcoal)':              'charcoal_frac',
    'GF2 (Wood Burning)':          'wood_frac',
    'GF5 (Fossil Fuel Combustion)':'fossil_fuel_frac',
    'GF4 (Polluted Marine)':       'polluted_marine_frac',
    'GF1 (Sea Salt Mixed)':        'sea_salt_frac',
}

factors_df = load_etad_factors_with_filter_ids()
factors_df = factors_df.rename(columns=FACTOR_TO_FRAC)
frac_cols = list(FACTOR_TO_FRAC.values())

# Normalize to relative source contributions (raw GFs are PM2.5 mass fractions, not relative)
frac_sum = factors_df[frac_cols].sum(axis=1)
for col in frac_cols:
    factors_df[col] = factors_df[col] / frac_sum

factor_map = factors_df.set_index('date')[frac_cols]

merge_dates = df.index.normalize()
if merge_dates.tz is not None:
    merge_dates = merge_dates.tz_localize(None)

for col in frac_cols:
    df[col] = merge_dates.map(factor_map[col])

# Add dominant source
df['dominant_source'] = df[frac_cols].idxmax(axis=1).str.replace('_frac', '')
df['dominant_fraction'] = df[frac_cols].max(axis=1)

n_with = df[frac_cols].notna().any(axis=1).sum()
print(f"Factor data merged: {n_with} rows with factor data out of {len(df)} total")
print(f"Dominant fraction: mean={df['dominant_fraction'].dropna().mean():.1%}, "
      f"≥50%: {(df['dominant_fraction'] >= 0.50).sum()}, "
      f"≥30%: {(df['dominant_fraction'] >= 0.30).sum()}")

---

# Task 1: Precipitation Patterns

**Goal**: Visualize precipitation patterns during the BC measurement period.

In [None]:
def plot_precipitation_patterns(daily_precip, monthly_precip, bc_df):
    """
    Create visualizations of monthly and daily precipitation patterns.
    """
    # Filter to BC measurement period
    start_date = bc_df.index.min()
    end_date = bc_df.index.max()
    
    daily_filtered = daily_precip[start_date:end_date]
    monthly_filtered = monthly_precip[start_date:end_date]
    
    # Figure 1: Monthly precipitation
    fig1, ax = plt.subplots(figsize=(15, 6))
    
    bars = ax.bar(range(len(monthly_filtered)), monthly_filtered.values,
                  color='#2196F3', alpha=0.8, edgecolor='black', linewidth=0.5)
    
    ax.set_xlabel('Month', fontsize=12)
    ax.set_ylabel('Total Precipitation (mm)', fontsize=12)
    ax.set_title('Monthly Total Precipitation During BC Measurement Period', fontsize=14, fontweight='bold')
    ax.set_xticks(range(len(monthly_filtered)))
    ax.set_xticklabels([d.strftime('%Y-%m') for d in monthly_filtered.index], rotation=45, ha='right')
    ax.grid(True, alpha=0.3, axis='y')
    
    # Add value labels
    for bar in bars:
        height = bar.get_height()
        if height > 0:
            ax.text(bar.get_x() + bar.get_width()/2., height,
                   f'{int(height)}', ha='center', va='bottom', fontsize=8)
    
    plt.tight_layout()
    
    # Figure 2: Daily precipitation with rolling average
    fig2, ax = plt.subplots(figsize=(15, 6))
    
    ax.fill_between(daily_filtered.index, daily_filtered.values,
                   color='#90CAF9', alpha=0.4, label='Daily Precipitation')
    
    rolling_avg = daily_filtered.rolling(window=30, min_periods=7).mean()
    ax.plot(rolling_avg.index, rolling_avg.values,
           color='#1565C0', linewidth=2, label='30-day Moving Average')
    
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Precipitation (mm)', fontsize=12)
    ax.set_title('Daily Precipitation with 30-day Moving Average', fontsize=14, fontweight='bold')
    ax.xaxis.set_major_locator(MonthLocator(interval=3))
    ax.xaxis.set_major_formatter(DateFormatter('%b\n%Y'))
    ax.legend(loc='upper right')
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    
    # Summary statistics
    print("\nPrecipitation Summary:")
    print("=" * 50)
    print(f"Total precipitation: {daily_filtered.sum():.1f} mm")
    print(f"Average daily: {daily_filtered.mean():.2f} mm")
    print(f"Maximum daily: {daily_filtered.max():.1f} mm")
    print(f"Days with precipitation: {(daily_filtered > 0).sum()}")
    
    return fig1, fig2, daily_filtered, monthly_filtered

print("="*80)
print("TASK 1: PRECIPITATION PATTERNS")
print("="*80)
fig1, fig2, daily_filtered, monthly_filtered = plot_precipitation_patterns(daily_precip, monthly_precip, df)
fig1.savefig(os.path.join(dirs['plots'], 'monthly_precipitation.png'), dpi=150, bbox_inches='tight')
fig2.savefig(os.path.join(dirs['plots'], 'daily_precipitation.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 2: BC-Precipitation Relationship

**Goal**: Analyze how BC concentrations vary with precipitation intensity.

In [None]:
def analyze_bc_precipitation_relationship(bc_df, meteo_df, bc_col='IR BCc'):
    """
    Analyze relationship between BC and precipitation.
    """
    # Merge BC and precipitation data
    merged = pd.merge(
        bc_df[[bc_col, 'Ethiopian_Season']],
        meteo_df['precipitation_mm'],
        left_index=True,
        right_index=True,
        how='inner'
    )
    
    # Create precipitation categories
    merged['precip_category'] = pd.cut(
        merged['precipitation_mm'],
        bins=[-np.inf, 0, 1, 5, np.inf],
        labels=['No Rain', 'Light (<1mm)', 'Moderate (1-5mm)', 'Heavy (>5mm)']
    )
    
    # Statistics by category
    stats_df = merged.groupby('precip_category', observed=True)[bc_col].agg(['mean', 'std', 'count'])
    
    print("\nBC Concentrations by Precipitation Category:")
    print("=" * 60)
    print(stats_df.round(3))
    
    # Category percentages
    total = len(merged)
    print("\nTime in each category:")
    for cat in merged['precip_category'].cat.categories:
        n = (merged['precip_category'] == cat).sum()
        print(f"  {cat}: {n:,} ({n/total*100:.1f}%)")
    
    return merged, stats_df


def plot_bc_by_precipitation(merged, bc_col='IR BCc'):
    """
    Create violin plot of BC by precipitation category.
    """
    fig, ax = plt.subplots(figsize=(12, 6))
    
    categories = ['No Rain', 'Light (<1mm)', 'Moderate (1-5mm)', 'Heavy (>5mm)']
    colors = ['#FFF176', '#81C784', '#4FC3F7', '#1565C0']
    
    # Filter to categories that have data
    plot_data = []
    valid_categories = []
    valid_colors = []
    for cat, color in zip(categories, colors):
        data = merged[merged['precip_category'] == cat][bc_col].dropna()
        if len(data) > 0:
            plot_data.append(data)
            valid_categories.append(cat)
            valid_colors.append(color)
    
    parts = ax.violinplot(
        plot_data,
        positions=range(len(valid_categories)),
        showmeans=True,
        showmedians=True
    )
    
    # Color the violins
    for pc, color in zip(parts['bodies'], valid_colors):
        pc.set_facecolor(color)
        pc.set_alpha(0.7)
    
    ax.set_xticks(range(len(valid_categories)))
    ax.set_xticklabels(valid_categories)
    ax.set_xlabel('Precipitation Category', fontsize=12)
    ax.set_ylabel('BC Concentration (µg/m³)', fontsize=12)
    ax.set_title('BC Concentrations by Precipitation Intensity', fontsize=14, fontweight='bold')
    ax.grid(True, alpha=0.3, axis='y')
    
    # Add counts
    for i, cat in enumerate(valid_categories):
        n = len(merged[merged['precip_category'] == cat])
        ax.text(i, ax.get_ylim()[1] * 0.95, f'n={n:,}', ha='center', fontsize=9)
    
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 2: BC-PRECIPITATION RELATIONSHIP")
print("="*80)
merged_precip, precip_stats = analyze_bc_precipitation_relationship(df, meteo_df)
fig = plot_bc_by_precipitation(merged_precip)
plt.savefig(os.path.join(dirs['plots'], 'bc_by_precipitation.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 3: Dynamic Season Definitions

**Goal**: Define seasons based on actual precipitation patterns rather than fixed months.

In [None]:
def define_dynamic_seasons(daily_precip, lower_threshold=5, upper_threshold=15):
    """
    Define seasons dynamically based on precipitation patterns.
    
    Parameters:
    -----------
    daily_precip : Series
        Daily precipitation data
    lower_threshold : float
        mm/day threshold for dry vs light rainy
    upper_threshold : float
        mm/day threshold for light vs heavy rainy
    
    Returns:
    --------
    Series with dynamic season labels
    """
    # Calculate 30-day rolling sum
    rolling_precip = daily_precip.rolling(window=30, min_periods=15).sum()
    
    # Define seasons based on rolling precipitation
    seasons = pd.Series(index=daily_precip.index, dtype='str')
    seasons[rolling_precip <= lower_threshold * 30] = 'Dry Season'
    seasons[(rolling_precip > lower_threshold * 30) & (rolling_precip <= upper_threshold * 30)] = 'Belg Rainy Season'
    seasons[rolling_precip > upper_threshold * 30] = 'Kiremt Rainy Season'
    
    # Resample to hourly
    hourly_seasons = seasons.resample('h').ffill()
    
    return hourly_seasons


def compare_season_definitions(bc_df, dynamic_seasons, bc_col='IR BCc'):
    """
    Compare static (month-based) vs dynamic (precipitation-based) season definitions.
    """
    # Merge dynamic seasons with BC data
    bc_filtered = bc_df.copy()
    aligned_seasons = dynamic_seasons.reindex(bc_filtered.index, method='ffill')
    bc_filtered['Dynamic_Season'] = aligned_seasons
    
    print("\nComparison: Static vs Dynamic Season Definitions")
    print("=" * 70)
    
    for season in SEASONS_ORDER:
        print(f"\n{season}:")
        
        # Static
        static_data = bc_filtered[bc_filtered['Ethiopian_Season'] == season][bc_col]
        print(f"  Static:  n={len(static_data):,}, mean={static_data.mean():.3f}, median={static_data.median():.3f}")
        
        # Dynamic
        dynamic_data = bc_filtered[bc_filtered['Dynamic_Season'] == season][bc_col]
        if len(dynamic_data) > 0:
            print(f"  Dynamic: n={len(dynamic_data):,}, mean={dynamic_data.mean():.3f}, median={dynamic_data.median():.3f}")
        else:
            print(f"  Dynamic: n=0 (no data classified in this season)")
    
    return bc_filtered


def plot_season_comparison(bc_filtered, bc_col='IR BCc'):
    """
    Create side-by-side comparison of static vs dynamic seasonal BC.
    """
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    for idx, season in enumerate(SEASONS_ORDER):
        ax = axes[idx]
        
        static_data = bc_filtered[bc_filtered['Ethiopian_Season'] == season][bc_col].dropna()
        dynamic_data = bc_filtered[bc_filtered['Dynamic_Season'] == season][bc_col].dropna()
        
        # Handle case where dynamic season has no data
        if len(dynamic_data) == 0:
            bp = ax.boxplot([static_data], labels=['Static'], patch_artist=True, showfliers=False)
            bp['boxes'][0].set_facecolor('#2196F3')
            bp['boxes'][0].set_alpha(0.7)
            ax.text(0.5, 0.5, 'No dynamic\ndata', transform=ax.transAxes, ha='center', fontsize=10)
        else:
            bp = ax.boxplot([static_data, dynamic_data],
                           labels=['Static', 'Dynamic'],
                           patch_artist=True,
                           showfliers=False)
            bp['boxes'][0].set_facecolor('#2196F3')
            bp['boxes'][0].set_alpha(0.7)
            bp['boxes'][1].set_facecolor('#FFA726')
            bp['boxes'][1].set_alpha(0.7)
            ax.text(0.95, 0.95, f'Dynamic: n={len(dynamic_data):,}\nMean: {dynamic_data.mean():.2f}',
                    transform=ax.transAxes, fontsize=9, va='top', ha='right',
                    bbox=dict(facecolor='#FFA726', alpha=0.2))
        
        ax.set_title(f'{season}', fontsize=12, fontweight='bold')
        ax.set_ylabel('BC Concentration (µg/m³)' if idx == 0 else '')
        ax.grid(True, alpha=0.3, axis='y')
        
        ax.text(0.05, 0.95, f'Static: n={len(static_data):,}\nMean: {static_data.mean():.2f}',
                transform=ax.transAxes, fontsize=9, va='top',
                bbox=dict(facecolor='#2196F3', alpha=0.2))
    
    plt.suptitle('Static vs Dynamic Season Definitions: BC Concentrations', fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 3: DYNAMIC SEASON DEFINITIONS")
print("="*80)
dynamic_seasons = define_dynamic_seasons(daily_precip)
bc_with_dynamic = compare_season_definitions(df, dynamic_seasons)
fig = plot_season_comparison(bc_with_dynamic)
plt.savefig(os.path.join(dirs['plots'], 'season_comparison.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 4: Temperature Correlations

**Goal**: Analyze BC-temperature relationships by season.

In [None]:
def analyze_temperature_bc_correlation(bc_df, meteo_df, bc_col='IR BCc'):
    """
    Analyze correlation between temperature and BC by season.
    """
    # Merge data
    merged = pd.merge(
        bc_df[[bc_col, 'Ethiopian_Season']],
        meteo_df['temperature_c'],
        left_index=True,
        right_index=True,
        how='inner'
    ).dropna()
    
    correlations = {}
    
    print("\nTemperature-BC Correlations by Season:")
    print("=" * 60)
    
    for season in SEASONS_ORDER:
        season_data = merged[merged['Ethiopian_Season'] == season]
        
        if len(season_data) > 2:
            corr, p_value = stats.pearsonr(season_data['temperature_c'], season_data[bc_col])
            correlations[season] = {'r': corr, 'p': p_value, 'n': len(season_data)}
            
            sig = '*' if p_value < 0.05 else ''
            print(f"\n{season}:")
            print(f"  n = {len(season_data):,}")
            print(f"  r = {corr:.3f}{sig}")
            print(f"  p = {p_value:.3e}")
    
    return merged, correlations


def plot_temperature_bc_correlation(merged, correlations, bc_col='IR BCc'):
    """
    Create scatter plots with hexbin of BC vs temperature by season.
    """
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    for idx, season in enumerate(SEASONS_ORDER):
        ax = axes[idx]
        season_data = merged[merged['Ethiopian_Season'] == season]
        
        if len(season_data) == 0:
            ax.text(0.5, 0.5, 'No data', transform=ax.transAxes, ha='center')
            ax.set_title(f'{season}', fontsize=12, fontweight='bold')
            continue
        
        hb = ax.hexbin(season_data['temperature_c'], season_data[bc_col],
                      gridsize=30, cmap='YlOrRd')
        
        # Trend line
        z = np.polyfit(season_data['temperature_c'], season_data[bc_col], 1)
        p = np.poly1d(z)
        x_range = np.linspace(season_data['temperature_c'].min(),
                             season_data['temperature_c'].max(), 100)
        ax.plot(x_range, p(x_range), 'b--', linewidth=2, alpha=0.8)
        
        # Correlation stats
        if season in correlations:
            c = correlations[season]
            ax.text(0.05, 0.95, f"r = {c['r']:.3f}\np = {c['p']:.2e}\nn = {c['n']:,}",
                   transform=ax.transAxes, fontsize=10, va='top',
                   bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
        
        ax.set_xlabel('Temperature (°C)', fontsize=11)
        ax.set_ylabel('BC Concentration (µg/m³)' if idx == 0 else '', fontsize=11)
        ax.set_title(f'{season}', fontsize=12, fontweight='bold')
        ax.grid(True, alpha=0.3)
        
        plt.colorbar(hb, ax=ax, label='Count')
    
    plt.suptitle('BC vs Temperature Correlation by Season', fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 4: TEMPERATURE CORRELATIONS")
print("="*80)
merged_temp, temp_correlations = analyze_temperature_bc_correlation(df, meteo_df)
fig = plot_temperature_bc_correlation(merged_temp, temp_correlations)
plt.savefig(os.path.join(dirs['plots'], 'temperature_correlation.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 5: Precipitation Threshold Sensitivity

**Goal**: Test different precipitation thresholds for defining seasons.

In [None]:
def test_precipitation_thresholds(daily_precip, bc_df, threshold_sets, bc_col='IR BCc'):
    """
    Test different precipitation threshold combinations.
    """
    results = {}
    
    print("\nPrecipitation Threshold Sensitivity Analysis")
    print("=" * 80)
    
    for lower, upper in threshold_sets:
        print(f"\nThresholds: lower={lower}mm, upper={upper}mm")
        print("-" * 40)
        
        # Define seasons with this threshold
        dynamic_seasons = define_dynamic_seasons(daily_precip, lower, upper)
        
        # Merge with BC data
        aligned = dynamic_seasons.reindex(bc_df.index, method='ffill')
        bc_df_temp = bc_df.copy()
        bc_df_temp['Dynamic_Season'] = aligned
        
        # Calculate stats for each season
        threshold_results = {}
        for season in SEASONS_ORDER:
            season_data = bc_df_temp[bc_df_temp['Dynamic_Season'] == season][bc_col].dropna()
            threshold_results[season] = {
                'n': len(season_data),
                'mean': season_data.mean() if len(season_data) > 0 else 0,
                'median': season_data.median() if len(season_data) > 0 else 0,
                'std': season_data.std() if len(season_data) > 0 else 0
            }
            print(f"  {season}: n={len(season_data):,}, mean={threshold_results[season]['mean']:.3f}")
        
        results[(lower, upper)] = threshold_results
    
    return results


def calculate_threshold_agreement(daily_precip, threshold_sets):
    """
    Calculate agreement between different threshold classifications.
    """
    # Define seasons for each threshold set
    season_series = {}
    for lower, upper in threshold_sets:
        seasons = define_dynamic_seasons(daily_precip, lower, upper)
        season_series[(lower, upper)] = seasons.resample('D').last()
    
    # Calculate pairwise agreement
    pairs = list(itertools.combinations(threshold_sets, 2))
    
    print("\nThreshold Classification Agreement:")
    print("=" * 60)
    
    for (l1, u1), (l2, u2) in pairs:
        s1 = season_series[(l1, u1)]
        s2 = season_series[(l2, u2)]
        
        # Align
        common = s1.index.intersection(s2.index)
        agreement = (s1.loc[common] == s2.loc[common]).mean() * 100
        
        print(f"({l1},{u1}) vs ({l2},{u2}): {agreement:.1f}% agreement")
    
    return season_series


def plot_threshold_comparison(results, bc_col='IR BCc'):
    """
    Plot BC distributions for different threshold definitions.
    """
    n_thresholds = len(results)
    fig, axes = plt.subplots(1, n_thresholds, figsize=(6*n_thresholds, 5))
    
    if n_thresholds == 1:
        axes = [axes]
    
    for idx, ((lower, upper), seasonal_stats) in enumerate(results.items()):
        ax = axes[idx]
        
        means = [seasonal_stats[s]['mean'] for s in SEASONS_ORDER]
        stds = [seasonal_stats[s]['std'] for s in SEASONS_ORDER]
        colors = [SEASON_COLORS[s] for s in SEASONS_ORDER]
        
        bars = ax.bar(range(len(SEASONS_ORDER)), means, yerr=stds,
                     color=colors, alpha=0.7, edgecolor='black', capsize=5)
        
        ax.set_xticks(range(len(SEASONS_ORDER)))
        ax.set_xticklabels([s.replace(' Season', '') for s in SEASONS_ORDER], rotation=15)
        ax.set_xlabel('Season', fontsize=11)
        ax.set_ylabel('Mean BC (µg/m³)' if idx == 0 else '', fontsize=11)
        ax.set_title(f'Thresholds: {lower}mm, {upper}mm', fontsize=12, fontweight='bold')
        ax.grid(True, alpha=0.3, axis='y')
        
        # Add counts
        for i, (s, bar) in enumerate(zip(SEASONS_ORDER, bars)):
            n = seasonal_stats[s]['n']
            if n > 0:
                ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + stds[i] + 0.1,
                       f'n={n:,}', ha='center', fontsize=8)
    
    plt.suptitle('BC Concentrations with Different Precipitation Thresholds', 
                fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 5: PRECIPITATION THRESHOLD SENSITIVITY")
print("="*80)
threshold_results = test_precipitation_thresholds(daily_precip, df, PRECIP_THRESHOLD_SETS)
season_series = calculate_threshold_agreement(daily_precip, PRECIP_THRESHOLD_SETS)
fig = plot_threshold_comparison(threshold_results)
plt.savefig(os.path.join(dirs['plots'], 'threshold_sensitivity.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Summary

## Functions Defined:
- `load_meteorological_data()` - Load weather data
- `plot_precipitation_patterns()` - Visualize rainfall patterns
- `analyze_bc_precipitation_relationship()` - BC vs rain intensity
- `define_dynamic_seasons()` - Precipitation-based season definition
- `compare_season_definitions()` - Static vs dynamic seasons
- `analyze_temperature_bc_correlation()` - BC-temperature correlations
- `test_precipitation_thresholds()` - Threshold sensitivity analysis

## To Run This Notebook:
1. Update the file paths for BC and meteorological data
2. Uncomment the data loading and analysis cells
3. Run all cells

In [None]:
print("="*80)
print("NOTEBOOK COMPLETE")
print("="*80)
print("\nTo run this analysis:")
print("1. Update 'bc_filepath' and 'meteo_filepath' with your data paths")
print("2. Uncomment the analysis cells")
print("3. Run all cells")