# Example: Modular Analysis Framework

This notebook demonstrates the new modular scripts for FTIR/HIPS/Aethalometer analysis.

## Key Features:
- **PlotConfig**: Set defaults once, apply to all plots
- **FilterId matching**: Match by physical filter, not just date
- **Flexible layouts**: Individual, grid, or combined plots
- **Site selection**: Plot all sites or specific ones

## 1. Setup and Imports

In [None]:
import sys
sys.path.insert(0, './scripts')

# Core imports
from config import SITES, MAC_VALUE
from data_matching import (
    load_aethalometer_data, 
    load_filter_data,
    add_base_filter_id,
    match_by_filter_id,
    match_aeth_filter_data,
    match_hips_with_smooth_raw
)
from flow_periods import (
    add_flow_period,
    has_before_after_data,
    print_flow_period_summary
)

# Plotting imports
from plotting import PlotConfig, crossplots, timeseries, distributions, comparisons

print("Imports successful!")

## 2. Configure Plot Defaults

Set these once at the top of your notebook. All subsequent plots will use these settings.

In [None]:
# Option 1: Plot all sites, individual figures
PlotConfig.set(
    sites='all',
    layout='individual',
    figsize=(10, 8),
    show_stats=True,
    show_1to1=True
)

# Show current settings
PlotConfig.show()

In [None]:
# Option 2: Plot specific sites in a grid
# PlotConfig.set(
#     sites=['Beijing', 'JPL'],
#     layout='grid'
# )

# Option 3: Just one site
# PlotConfig.set(sites='JPL', layout='individual')

## 3. Load Data

In [None]:
# Load aethalometer data (all sites)
aethalometer_data = load_aethalometer_data()

In [None]:
# Load filter data
filter_data = load_filter_data()

# Add base_filter_id for proper matching
filter_data = add_base_filter_id(filter_data)
print(f"\nAdded base_filter_id column")
print(f"Example: {filter_data['FilterId'].iloc[0]} -> {filter_data['base_filter_id'].iloc[0]}")

## 4. Match Data by FilterId (Recommended)

This ensures you're comparing measurements from the **same physical filter**.

In [None]:
# Match FTIR EC and HIPS by FilterId for each site
matched_by_filter = {}

for site_name, config in SITES.items():
    site_code = config['code']
    
    matched = match_by_filter_id(
        filter_data, 
        site_code=site_code,
        params=['EC_ftir', 'HIPS_Fabs', 'ChemSpec_Iron_PM2.5']
    )
    
    if matched is not None:
        # Convert HIPS to BC equivalent (divide by MAC)
        matched['hips_fabs'] = matched['hips_fabs'] / MAC_VALUE
        matched_by_filter[site_name] = matched
        print(f"{site_name}: {len(matched)} filters with matched data")
    else:
        print(f"{site_name}: No matched data")

## 5. Time Series Plots

In [None]:
# BC time series - uses PlotConfig defaults (all sites, individual)
timeseries.bc(aethalometer_data, wavelength='IR')

In [None]:
# Override to show all sites in a grid
timeseries.bc(aethalometer_data, wavelength='IR', layout='grid')

In [None]:
# Multi-wavelength BC for each site
timeseries.bc_multiwavelength(aethalometer_data, sites=['JPL', 'Beijing'])

In [None]:
# Flow ratio over time
timeseries.flow_ratio(aethalometer_data, layout='grid')

## 6. Cross-Plots (Scatter)

In [None]:
# HIPS vs FTIR EC - should be close to 1:1 if same physical filter
results = crossplots.scatter(
    matched_by_filter,
    x_col='ftir_ec',
    y_col='hips_fabs',
    xlabel='FTIR EC (µg/m³)',
    ylabel='HIPS Fabs / MAC (µg/m³)',
    title='FTIR EC vs HIPS (same filter)',
    layout='grid'
)

In [None]:
# With iron as color gradient
crossplots.with_iron_gradient(
    matched_by_filter,
    x_col='ftir_ec',
    y_col='hips_fabs',
    xlabel='FTIR EC (µg/m³)',
    ylabel='HIPS Fabs / MAC (µg/m³)',
    title='FTIR vs HIPS (colored by Iron)',
    sites=['JPL']  # Just one site for this example
)

## 7. Match Aethalometer with HIPS (including smooth/raw info)

In [None]:
# Match HIPS with aethalometer data (by date)
hips_aeth_matched = {}

for site_name, config in SITES.items():
    if site_name not in aethalometer_data:
        continue
        
    matched = match_hips_with_smooth_raw(
        site_name,
        aethalometer_data[site_name],
        filter_data,
        config['code']
    )
    
    if matched is not None:
        hips_aeth_matched[site_name] = matched
        print(f"{site_name}: {len(matched)} matched pairs")

In [None]:
# HIPS vs Aethalometer crossplot
crossplots.scatter(
    hips_aeth_matched,
    x_col='ir_bcc',
    y_col='hips_fabs',
    xlabel='Aethalometer IR BCc (µg/m³)',
    ylabel='HIPS Fabs / MAC (µg/m³)',
    title='HIPS vs Aethalometer',
    layout='grid'
)

## 8. Distributions

In [None]:
# BC distribution boxplot across sites
distributions.bc_boxplot(aethalometer_data, wavelength='IR')

In [None]:
# Smooth/raw difference histogram
distributions.smooth_raw_histogram(
    hips_aeth_matched,
    col='smooth_raw_abs_pct',
    thresholds=[1, 2.5, 4, 5],
    layout='grid'
)

## 9. Flow Period Analysis

In [None]:
# Show which sites have before/after data
print_flow_period_summary()

In [None]:
# Add flow period to matched data
for site_name, df in hips_aeth_matched.items():
    hips_aeth_matched[site_name] = add_flow_period(df, site_name, date_col='date')
    periods = hips_aeth_matched[site_name]['flow_period'].value_counts()
    print(f"{site_name}: {dict(periods)}")

In [None]:
# Flow period comparison (only for sites with before/after data)
# JPL is the only site with data in both periods
if 'JPL' in hips_aeth_matched:
    comparisons.flow_periods(
        {'JPL': hips_aeth_matched['JPL']},
        x_col='ir_bcc',
        y_col='hips_fabs',
        period_col='flow_period',
        xlabel='Aethalometer IR BCc (µg/m³)',
        ylabel='HIPS Fabs / MAC (µg/m³)'
    )

## 10. Threshold Analysis

In [None]:
# Analyze effect of smooth/raw thresholds
comparisons.threshold_analysis(
    hips_aeth_matched,
    x_col='ir_bcc',
    y_col='hips_fabs',
    threshold_col='smooth_raw_abs_pct',
    thresholds=[1, 2.5, 4, 5],
    sites=['JPL'],  # Just one site for clarity
    xlabel='Aethalometer IR BCc (µg/m³)',
    ylabel='HIPS Fabs / MAC (µg/m³)'
)

## 11. Quick Reference: Available Functions

### PlotConfig
```python
PlotConfig.set(sites='all', layout='grid')  # Set defaults
PlotConfig.show()                            # Show current settings
PlotConfig.reset()                           # Reset to defaults
```

### crossplots
```python
crossplots.scatter(data, x_col, y_col)      # Generic scatter
crossplots.bc_vs_ec(data)                   # Preset: Aeth BC vs Filter EC
crossplots.hips_vs_ftir(data)               # Preset: HIPS vs FTIR
crossplots.with_iron_gradient(data, ...)   # Color by iron concentration
```

### timeseries
```python
timeseries.bc(data, wavelength='IR')        # BC time series
timeseries.bc_multiwavelength(data)         # All wavelengths
timeseries.flow_ratio(data)                 # Flow ratio over time
timeseries.data_completeness(data)          # Data availability
```

### distributions
```python
distributions.bc_boxplot(data)              # BC distribution
distributions.smooth_raw_histogram(data)    # Smooth/raw difference
distributions.correlation_matrix(data, cols) # Correlation heatmap
```

### comparisons
```python
comparisons.before_after_outliers(data)     # Outlier removal impact
comparisons.threshold_analysis(data, ...)   # Test different thresholds
comparisons.flow_periods(data)              # Before/after flow fix
```

### data_matching
```python
add_base_filter_id(filter_data)             # Strip -N suffix
match_by_filter_id(data, site_code, params) # Match by physical filter
match_hips_with_smooth_raw(...)             # HIPS + aethalometer
```

### flow_periods
```python
add_flow_period(df, site_name)              # Add flow_period column
has_before_after_data(site_name)            # Check data availability
print_flow_period_summary()                 # Show all sites status
```