# Addis Ababa: Temporal Patterns Analysis

This notebook covers diurnal, weekly, and seasonal patterns in BC concentrations.

## Tasks Covered:
1. **Diurnal patterns** - hourly BC variations by wavelength
2. **Weekly patterns** - weekday vs weekend analysis
3. **Ethiopian seasonal patterns** - BC by Dry/Belg/Kiremt seasons
4. **Extreme events analysis** - high BC event characterization
5. **Rate of change analysis** - BC buildup and decay patterns

---

## Setup and Imports

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import calendar

# Add scripts folder to path
notebook_dir = os.path.dirname(os.path.abspath('__file__'))
scripts_path = os.path.join(notebook_dir, 'scripts')
if scripts_path not in sys.path:
    sys.path.insert(0, scripts_path)

# Try to import from existing modules
try:
    from config import SITES
    print("Loaded config")
except ImportError:
    print("Config not found - using inline definitions")

try:
    from plotting import PlotConfig
    print("Loaded plotting utilities")
except ImportError:
    print("Plotting utilities not found - will define inline")

# Configure matplotlib
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 11

# Create output directories
def setup_directories():
    dirs = {
        'plots': 'output/plots/addis_ababa',
        'data': 'output/data/addis_ababa'
    }
    for dir_path in dirs.values():
        os.makedirs(dir_path, exist_ok=True)
    return dirs

dirs = setup_directories()
print("Setup complete!")

## Site Configuration

In [None]:
# Addis Ababa site configuration
ADDIS_CONFIG = {
    'name': 'Addis_Ababa',
    'timezone': 'Africa/Addis_Ababa',
    'wavelengths': {'UV': 375, 'Blue': 470, 'Green': 528, 'Red': 625, 'IR': 880},
    'bc_columns': [
        'UV BC1', 'UV BC2', 'UV BCc',
        'Blue BC1', 'Blue BC2', 'Blue BCc',
        'Green BC1', 'Green BC2', 'Green BCc',
        'Red BC1', 'Red BC2', 'Red BCc',
        'IR BC1', 'IR BC2', 'IR BCc'
    ],
    'primary_bc_col': 'IR BCc',  # Primary BC measurement for most analyses
    'seasons': {
        'Dry Season': [10, 11, 12, 1, 2],
        'Belg Rainy Season': [3, 4, 5],
        'Kiremt Rainy Season': [6, 7, 8, 9]
    }
}

SEASONS_ORDER = ['Dry Season', 'Belg Rainy Season', 'Kiremt Rainy Season']
SEASON_COLORS = {'Dry Season': '#E67E22', 'Belg Rainy Season': '#27AE60', 'Kiremt Rainy Season': '#3498DB'}

print(f"Site: {ADDIS_CONFIG['name']}")
print(f"Primary BC column: {ADDIS_CONFIG['primary_bc_col']}")

## Data Loading

Uses the same loading function from the source apportionment notebook.

In [None]:
DATA_FILEPATH = "/Users/ahmadjalil/Library/CloudStorage/GoogleDrive-ahzs645@gmail.com/My Drive/University/Research/Grad/UC Davis Ann/NASA MAIA/Data/Aethelometry Data/JacrosMA350 60s Data20250804082112/df_jacros_cleaned_API_and_OG_manual_BC_all_wl.pkl"

def load_aethalometer_addis(filepath):
    """
    Load and preprocess Addis Ababa aethalometer data from pickle file.
    """
    df = pd.read_pickle(filepath)
    
    # Set datetime index
    df['datetime_local'] = pd.to_datetime(df['datetime_local'])
    df.set_index('datetime_local', inplace=True)
    df.sort_index(inplace=True)
    
    # Convert from ng/m³ to µg/m³
    bc_columns = ADDIS_CONFIG['bc_columns']
    for col in bc_columns:
        if col in df.columns:
            df[col] = df[col] / 1000
    
    # Add time-based columns
    df['Month'] = df.index.month
    df['Hour'] = df.index.hour
    df['DayOfWeek'] = df.index.dayofweek
    
    # Add Ethiopian seasons
    df['Ethiopian_Season'] = df['Month'].map(lambda m: 
        'Dry Season' if m in ADDIS_CONFIG['seasons']['Dry Season'] else
        'Belg Rainy Season' if m in ADDIS_CONFIG['seasons']['Belg Rainy Season'] else
        'Kiremt Rainy Season'
    )
    
    # Handle outliers
    for col in bc_columns:
        if col in df.columns:
            df.loc[df[col] < 0, col] = np.nan
            mean = df[col].mean()
            std = df[col].std()
            df.loc[df[col] > mean + 3*std, col] = np.nan
    
    return df

df = load_aethalometer_addis(DATA_FILEPATH)
print(f"Loaded {len(df):,} records")
print(f"Date range: {df.index.min()} to {df.index.max()}")

---

# Task 1: Diurnal Patterns

**Goal**: Analyze hourly BC concentration patterns.

In [None]:
def plot_diurnal_pattern(df, bc_cols=['IR BCc', 'UV BCc'], title_suffix=""):
    """
    Plot average diurnal pattern for specified BC columns.
    """
    fig, ax = plt.subplots(figsize=(12, 6))
    
    colors = ['darkred', 'purple', 'blue', 'green', 'orange']
    
    for i, col in enumerate(bc_cols):
        if col not in df.columns:
            continue
        hourly_avg = df.groupby('Hour')[col].mean()
        hourly_std = df.groupby('Hour')[col].std()
        
        wave = col.split()[0]
        wavelength = ADDIS_CONFIG['wavelengths'].get(wave, '')
        label = f'{wave} ({wavelength}nm)' if wavelength else col
        
        ax.plot(hourly_avg.index, hourly_avg.values, marker='o', 
                color=colors[i % len(colors)], linewidth=2, label=label)
        ax.fill_between(hourly_avg.index, 
                       hourly_avg - hourly_std, 
                       hourly_avg + hourly_std,
                       alpha=0.2, color=colors[i % len(colors)])
    
    ax.set_xlabel('Hour of Day', fontsize=12)
    ax.set_ylabel('BC Concentration (µg/m³)', fontsize=12)
    ax.set_title(f'Average Diurnal Pattern of Black Carbon{title_suffix}', fontsize=14, fontweight='bold')
    ax.set_xticks(range(24))
    ax.set_xticklabels([f'{h:02d}:00' for h in range(24)], rotation=45)
    ax.legend(loc='upper right')
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    return fig


def plot_diurnal_by_season(df, bc_col='IR BCc'):
    """
    Plot diurnal patterns separated by Ethiopian season.
    """
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    for idx, season in enumerate(SEASONS_ORDER):
        ax = axes[idx]
        season_data = df[df['Ethiopian_Season'] == season]
        
        hourly_avg = season_data.groupby('Hour')[bc_col].mean()
        hourly_std = season_data.groupby('Hour')[bc_col].std()
        
        ax.plot(hourly_avg.index, hourly_avg.values, marker='o',
                color=SEASON_COLORS[season], linewidth=2)
        ax.fill_between(hourly_avg.index,
                       hourly_avg - hourly_std,
                       hourly_avg + hourly_std,
                       alpha=0.3, color=SEASON_COLORS[season])
        
        ax.set_xlabel('Hour of Day', fontsize=11)
        ax.set_ylabel('BC Concentration (µg/m³)' if idx == 0 else '', fontsize=11)
        ax.set_title(f'{season}', fontsize=12, fontweight='bold')
        ax.set_xticks(range(0, 24, 3))
        ax.grid(True, alpha=0.3)
        
        # Add stats
        n = len(season_data[bc_col].dropna())
        peak_hour = hourly_avg.idxmax()
        ax.text(0.95, 0.95, f'n={n:,}\nPeak: {peak_hour:02d}:00',
                transform=ax.transAxes, fontsize=9, va='top', ha='right',
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
    
    plt.suptitle('Diurnal BC Patterns by Ethiopian Season', fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 1: DIURNAL PATTERNS")
print("="*80)
fig1 = plot_diurnal_pattern(df, bc_cols=['IR BCc', 'UV BCc'])
plt.savefig(os.path.join(dirs['plots'], 'diurnal_pattern.png'), dpi=150, bbox_inches='tight')
plt.show()

fig2 = plot_diurnal_by_season(df)
plt.savefig(os.path.join(dirs['plots'], 'diurnal_by_season.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 2: Weekly Patterns

**Goal**: Analyze weekday vs weekend BC differences.

In [None]:
def analyze_weekend_effect(df, bc_col='IR BCc'):
    """
    Calculate weekend effect (reduction) by season.
    """
    results = {}
    
    print("\nWeekend Effect Analysis:")
    print("=" * 60)
    
    for season in SEASONS_ORDER:
        season_data = df[df['Ethiopian_Season'] == season]
        
        weekday_data = season_data[season_data['DayOfWeek'] < 5][bc_col]
        weekend_data = season_data[season_data['DayOfWeek'] >= 5][bc_col]
        
        weekday_avg = weekday_data.mean()
        weekend_avg = weekend_data.mean()
        reduction = ((weekday_avg - weekend_avg) / weekday_avg) * 100
        
        # T-test for significance
        t_stat, p_value = stats.ttest_ind(weekday_data.dropna(), weekend_data.dropna())
        
        results[season] = {
            'weekday_avg': weekday_avg,
            'weekend_avg': weekend_avg,
            'reduction_pct': reduction,
            'p_value': p_value,
            'n_weekday': len(weekday_data.dropna()),
            'n_weekend': len(weekend_data.dropna())
        }
        
        sig = '*' if p_value < 0.05 else ''
        print(f"\n{season}:")
        print(f"  Weekday avg: {weekday_avg:.3f} µg/m³ (n={results[season]['n_weekday']:,})")
        print(f"  Weekend avg: {weekend_avg:.3f} µg/m³ (n={results[season]['n_weekend']:,})")
        print(f"  Reduction: {reduction:.1f}%{sig} (p={p_value:.3e})")
    
    return results


def plot_weekly_heatmap(df, bc_col='IR BCc'):
    """
    Create weekly pattern heatmap (hour x day of week) by season.
    """
    fig, axes = plt.subplots(1, 3, figsize=(20, 6))
    
    day_names = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    
    for idx, season in enumerate(SEASONS_ORDER):
        ax = axes[idx]
        season_data = df[df['Ethiopian_Season'] == season]
        
        # Create pivot table
        weekly_pattern = season_data.pivot_table(
            values=bc_col,
            index='Hour',
            columns='DayOfWeek',
            aggfunc='mean'
        )
        
        sns.heatmap(weekly_pattern, ax=ax, cmap='YlOrRd',
                   xticklabels=day_names,
                   yticklabels=[f'{h:02d}:00' for h in range(24)],
                   cbar_kws={'label': 'BC (µg/m³)'})
        
        ax.set_title(f'{season}', fontsize=12, fontweight='bold')
        ax.set_xlabel('Day of Week')
        ax.set_ylabel('Hour of Day' if idx == 0 else '')
    
    plt.suptitle('Weekly BC Patterns by Ethiopian Season', fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 2: WEEKLY PATTERNS")
print("="*80)
weekend_results = analyze_weekend_effect(df)
fig = plot_weekly_heatmap(df)
plt.savefig(os.path.join(dirs['plots'], 'weekly_heatmap.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 3: Ethiopian Seasonal Patterns

**Goal**: Comprehensive analysis of BC patterns across Ethiopian seasons.

In [None]:
def analyze_seasonal_statistics(df, bc_col='IR BCc'):
    """
    Calculate detailed statistics for each Ethiopian season.
    """
    results = {}
    
    print("\nSeasonal Statistics:")
    print("=" * 80)
    
    for season in SEASONS_ORDER:
        season_data = df[df['Ethiopian_Season'] == season][bc_col].dropna()
        
        results[season] = {
            'n': len(season_data),
            'mean': season_data.mean(),
            'median': season_data.median(),
            'std': season_data.std(),
            'min': season_data.min(),
            'max': season_data.max(),
            'q25': season_data.quantile(0.25),
            'q75': season_data.quantile(0.75),
            'peak_hour': df[df['Ethiopian_Season'] == season].groupby('Hour')[bc_col].mean().idxmax()
        }
        
        r = results[season]
        print(f"\n{season}:")
        print(f"  n = {r['n']:,}")
        print(f"  Mean +/- Std: {r['mean']:.3f} +/- {r['std']:.3f} µg/m³")
        print(f"  Median [IQR]: {r['median']:.3f} [{r['q25']:.3f} - {r['q75']:.3f}] µg/m³")
        print(f"  Range: {r['min']:.3f} - {r['max']:.3f} µg/m³")
        print(f"  Peak hour: {r['peak_hour']:02d}:00")
    
    return results


def plot_seasonal_boxplot(df, bc_col='IR BCc'):
    """
    Create boxplot comparing BC across Ethiopian seasons.
    """
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Prepare data
    plot_data = []
    for season in SEASONS_ORDER:
        season_data = df[df['Ethiopian_Season'] == season][bc_col].dropna()
        plot_data.append(season_data)
    
    bp = ax.boxplot(plot_data, labels=SEASONS_ORDER, patch_artist=True, showfliers=False)
    
    # Color boxes
    for patch, season in zip(bp['boxes'], SEASONS_ORDER):
        patch.set_facecolor(SEASON_COLORS[season])
        patch.set_alpha(0.7)
    
    ax.set_xlabel('Ethiopian Season', fontsize=12)
    ax.set_ylabel('BC Concentration (µg/m³)', fontsize=12)
    ax.set_title('BC Concentration Distribution by Ethiopian Season', fontsize=14, fontweight='bold')
    ax.grid(True, alpha=0.3, axis='y')
    
    # Add counts
    for i, season in enumerate(SEASONS_ORDER):
        n = len(df[df['Ethiopian_Season'] == season][bc_col].dropna())
        ax.text(i + 1, ax.get_ylim()[1] * 0.95, f'n={n:,}', ha='center', fontsize=9)
    
    plt.tight_layout()
    return fig


def plot_seasonal_timeseries(df, bc_col='IR BCc'):
    """
    Plot rolling average time series with seasonal shading.
    """
    fig, ax = plt.subplots(figsize=(15, 6))
    
    # Daily average
    daily_avg = df[bc_col].resample('D').mean()
    rolling_avg = daily_avg.rolling(window=30, center=True, min_periods=7).mean()
    
    # Plot
    ax.plot(daily_avg.index, daily_avg, alpha=0.3, color='gray', linewidth=0.5)
    ax.plot(rolling_avg.index, rolling_avg, color='darkblue', linewidth=2, label='30-day rolling mean')
    
    # Add seasonal shading by month (more efficient than per-day)
    for season, months in ADDIS_CONFIG['seasons'].items():
        for date in daily_avg.index:
            if date.month in months:
                ax.axvspan(date, date + pd.Timedelta(days=1), alpha=0.1, 
                          color=SEASON_COLORS.get(season, 'gray'), linewidth=0)
    
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('BC Concentration (µg/m³)', fontsize=12)
    ax.set_title('BC Time Series with Seasonal Context', fontsize=14, fontweight='bold')
    ax.legend(loc='upper right')
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 3: ETHIOPIAN SEASONAL PATTERNS")
print("="*80)
seasonal_stats = analyze_seasonal_statistics(df)
fig1 = plot_seasonal_boxplot(df)
plt.savefig(os.path.join(dirs['plots'], 'seasonal_boxplot.png'), dpi=150, bbox_inches='tight')
plt.show()

fig2 = plot_seasonal_timeseries(df)
plt.savefig(os.path.join(dirs['plots'], 'seasonal_timeseries.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 4: Extreme Events Analysis

**Goal**: Characterize high BC concentration events.

In [None]:
def analyze_extreme_events(df, bc_col='IR BCc', percentile=95):
    """
    Identify and characterize extreme BC events.
    """
    threshold = df[bc_col].quantile(percentile / 100)
    extreme_mask = df[bc_col] > threshold
    extreme_events = df[extreme_mask].copy()
    
    print(f"\nExtreme Events Analysis (>{percentile}th percentile):")
    print("=" * 60)
    print(f"Threshold: {threshold:.3f} µg/m³")
    print(f"Total extreme measurements: {extreme_mask.sum():,}")
    
    # Seasonal distribution
    print("\nExtreme events by season:")
    seasonal_counts = extreme_events['Ethiopian_Season'].value_counts()
    for season in SEASONS_ORDER:
        count = seasonal_counts.get(season, 0)
        pct = count / extreme_mask.sum() * 100
        print(f"  {season}: {count:,} ({pct:.1f}%)")
    
    # Hourly distribution
    print("\nPeak hours for extreme events:")
    hourly_counts = extreme_events['Hour'].value_counts().sort_index()
    top_hours = hourly_counts.nlargest(5)
    for hour, count in top_hours.items():
        print(f"  {hour:02d}:00 - {count:,} events")
    
    return extreme_events, threshold


def plot_extreme_event_distribution(df, extreme_events, threshold, bc_col='IR BCc'):
    """
    Visualize extreme event distribution by hour and season.
    """
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Hourly distribution
    ax = axes[0]
    hourly_counts = extreme_events['Hour'].value_counts().sort_index()
    ax.bar(hourly_counts.index, hourly_counts.values, color='#E74C3C', alpha=0.7, edgecolor='black')
    ax.set_xlabel('Hour of Day', fontsize=11)
    ax.set_ylabel('Number of Extreme Events', fontsize=11)
    ax.set_title('Extreme Events by Hour', fontsize=12, fontweight='bold')
    ax.set_xticks(range(0, 24, 2))
    ax.grid(True, alpha=0.3, axis='y')
    
    # Seasonal distribution
    ax = axes[1]
    seasonal_counts = [extreme_events[extreme_events['Ethiopian_Season'] == s].shape[0] for s in SEASONS_ORDER]
    colors = [SEASON_COLORS[s] for s in SEASONS_ORDER]
    ax.bar(SEASONS_ORDER, seasonal_counts, color=colors, alpha=0.7, edgecolor='black')
    ax.set_xlabel('Ethiopian Season', fontsize=11)
    ax.set_ylabel('Number of Extreme Events', fontsize=11)
    ax.set_title('Extreme Events by Season', fontsize=12, fontweight='bold')
    ax.tick_params(axis='x', rotation=15)
    ax.grid(True, alpha=0.3, axis='y')
    
    plt.suptitle(f'Extreme BC Events (>{threshold:.2f} µg/m³)', fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 4: EXTREME EVENTS ANALYSIS")
print("="*80)
extreme_events, threshold = analyze_extreme_events(df)
fig = plot_extreme_event_distribution(df, extreme_events, threshold)
plt.savefig(os.path.join(dirs['plots'], 'extreme_events.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Task 5: Rate of Change Analysis

**Goal**: Analyze BC concentration buildup and decay patterns.

In [None]:
def analyze_rate_of_change(df, bc_col='IR BCc'):
    """
    Calculate and analyze rate of change in BC concentrations.
    """
    # Calculate rate of change per minute
    df_temp = df.copy()
    df_temp['BC_RateOfChange'] = df_temp[bc_col].diff()
    
    print("\nRate of Change Analysis:")
    print("=" * 60)
    
    roc = df_temp['BC_RateOfChange'].dropna()
    print(f"Overall statistics:")
    print(f"  Mean rate: {roc.mean():.4f} µg/m³/min")
    print(f"  Std: {roc.std():.4f} µg/m³/min")
    
    # Hourly patterns
    print("\nAverage rate by hour (positive = increasing):")
    hourly_roc = df_temp.groupby('Hour')['BC_RateOfChange'].mean()
    increasing_hours = hourly_roc[hourly_roc > 0].index.tolist()
    decreasing_hours = hourly_roc[hourly_roc < 0].index.tolist()
    print(f"  Increasing hours: {increasing_hours}")
    print(f"  Decreasing hours: {decreasing_hours}")
    
    return df_temp['BC_RateOfChange'], hourly_roc


def plot_rate_of_change(df, roc, hourly_roc, bc_col='IR BCc'):
    """
    Visualize rate of change patterns.
    """
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Hourly average rate of change
    ax = axes[0]
    colors = ['#2ECC71' if v > 0 else '#E74C3C' for v in hourly_roc.values]
    ax.bar(hourly_roc.index, hourly_roc.values, color=colors, alpha=0.7, edgecolor='black')
    ax.axhline(y=0, color='black', linestyle='-', linewidth=1)
    ax.set_xlabel('Hour of Day', fontsize=11)
    ax.set_ylabel('Avg Rate of Change (µg/m³/min)', fontsize=11)
    ax.set_title('Average Hourly Rate of Change', fontsize=12, fontweight='bold')
    ax.set_xticks(range(0, 24, 2))
    ax.grid(True, alpha=0.3, axis='y')
    
    # Distribution of rate of change
    ax = axes[1]
    ax.hist(roc.clip(-0.5, 0.5), bins=100, alpha=0.7, color='steelblue', edgecolor='black', linewidth=0.5)
    ax.axvline(x=0, color='red', linestyle='--', linewidth=2)
    ax.set_xlabel('Rate of Change (µg/m³/min)', fontsize=11)
    ax.set_ylabel('Frequency', fontsize=11)
    ax.set_title('Distribution of Rate of Change', fontsize=12, fontweight='bold')
    ax.grid(True, alpha=0.3)
    
    plt.suptitle('BC Concentration Rate of Change Analysis', fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    return fig

print("="*80)
print("TASK 5: RATE OF CHANGE ANALYSIS")
print("="*80)
roc, hourly_roc = analyze_rate_of_change(df)
fig = plot_rate_of_change(df, roc, hourly_roc)
plt.savefig(os.path.join(dirs['plots'], 'rate_of_change.png'), dpi=150, bbox_inches='tight')
plt.show()

---

# Summary

## Functions Defined:
- `plot_diurnal_pattern()` - Hourly BC patterns
- `plot_diurnal_by_season()` - Diurnal patterns by Ethiopian season
- `analyze_weekend_effect()` - Weekday vs weekend analysis
- `plot_weekly_heatmap()` - Hour x Day heatmap by season
- `analyze_seasonal_statistics()` - Seasonal statistical summary
- `plot_seasonal_boxplot()` - Seasonal distribution comparison
- `analyze_extreme_events()` - High BC event characterization
- `analyze_rate_of_change()` - BC buildup/decay patterns

## To Run This Notebook:
1. Update the `filepath` variable with your data path
2. Uncomment the data loading and analysis cells
3. Run all cells

In [None]:
print("="*80)
print("NOTEBOOK COMPLETE")
print("="*80)
print("\nTo run this analysis:")
print("1. Update 'filepath' with your Addis Ababa aethalometer data path")
print("2. Uncomment the analysis cells")
print("3. Run all cells")