# Sentinel-5P Air Pollution Analysis - Delhi NCR

## Complete Analysis Workflow

This notebook provides a complete, reproducible workflow for analyzing Sentinel-5P TROPOMI data over Delhi NCR.

### Overview
- **Time Period:** January 2022 - January 2024 (24 months)
- **Pollutants:** NO₂, SO₂, CO, HCHO
- **Analysis:** Trajectory analysis, hotspot detection, source attribution

### Workflow Steps
1. Setup and data verification
2. Data processing
3. Trajectory analysis (local vs. advected pollution)
4. Hotspot analysis
5. Visualization
6. Interpretation

## Step 1: Setup and Imports

In [None]:
import sys
import os
from pathlib import Path

# Add project root to path
project_root = Path().resolve().parent.parent
sys.path.insert(0, str(project_root))
sys.path.insert(0, str(project_root / 'scripts'))

# Standard imports
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# Project imports
import config

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

print("Setup complete!")

## Step 2: Verify Data Availability

In [None]:
def check_data_availability():
    """Check if required data files exist."""
    from glob import glob
    
    required_files = {
        'ERA5': 'data/era5/*_daily.nc',
        'Sentinel-5P Composites': 'data/processed/*_monthly_composite.nc',
        'Time Series': 'data/processed/*_timeseries.csv',
        'Classified Data': 'data/processed/*_classified.csv'
    }
    
    print("Checking data availability...")
    print("="*60)
    
    for name, pattern in required_files.items():
        files = glob(pattern)
        status = "✓" if files else "✗"
        print(f"{status} {name}: {len(files)} files")
        if files:
            for f in sorted(files)[:3]:  # Show first 3
                print(f"    - {os.path.basename(f)}")
            if len(files) > 3:
                print(f"    ... and {len(files) - 3} more")
    
    print("="*60)

check_data_availability()

## Step 3: Load and Explore Data

In [None]:
# Load time series data
pollutants = ['NO2', 'SO2', 'CO', 'HCHO']
time_series = {}

for code in pollutants:
    file_path = f'data/processed/{code}_timeseries.csv'
    if os.path.exists(file_path):
        df = pd.read_csv(file_path, index_col=0, parse_dates=True)
        time_series[code] = df
        print(f"\n{code} Time Series:")
        print(f"  Period: {df.index.min()} to {df.index.max()}")
        print(f"  Mean value: {df['value'].mean():.4f} {config.POLLUTANTS[code]['unit']}")
        print(f"  Data points: {len(df)}")

## Step 4: Trajectory Analysis Results

In [None]:
# Load classified data (local vs. advected)
classified_data = {}

for code in pollutants:
    file_path = f'data/processed/{code}_classified.csv'
    if os.path.exists(file_path):
        df = pd.read_csv(file_path, index_col=0, parse_dates=True)
        classified_data[code] = df
        
        # Summary statistics
        local = df[df['regime'] == 'local']
        advected = df[df['regime'] == 'advected']
        
        print(f"\n{code} Regime Classification:")
        print(f"  Local: {len(local)} months ({len(local)/len(df)*100:.1f}%)")
        print(f"  Advected: {len(advected)} months ({len(advected)/len(df)*100:.1f}%)")
        if len(local) > 0 and len(advected) > 0:
            print(f"  Local mean: {local['value'].mean():.4f} {config.POLLUTANTS[code]['unit']}")
            print(f"  Advected mean: {advected['value'].mean():.4f} {config.POLLUTANTS[code]['unit']}")

## Step 5: Key Findings Summary

In [None]:
print("="*70)
print("KEY FINDINGS SUMMARY")
print("="*70)

for code in pollutants:
    if code in classified_data:
        df = classified_data[code]
        local_pct = len(df[df['regime'] == 'local']) / len(df) * 100
        
        print(f"\n{config.POLLUTANTS[code]['name']}:")
        print(f"  - Local pollution: {local_pct:.1f}% of time")
        print(f"  - Regional transport: {100-local_pct:.1f}% of time")
        print(f"  - Mean concentration: {df['value'].mean():.4f} {config.POLLUTANTS[code]['unit']}")

print("\n" + "="*70)
print("For detailed analysis, see:")
print("  - outputs/reports/Interpretive_Note_Sentinel5P_Delhi.md")
print("  - outputs/maps/ (seasonal anomalies, source attribution)")
print("  - outputs/time_series/ (detailed plots)")
print("="*70)