# UFSCAT-Chem Fire Emission Model Demo

This notebook demonstrates the full pipeline for the `UFSCATChemFireGenerator` model, including:
1. Initialization
2. Loading and generating mock data
3. Running the training process
4. Exporting the final LUT

In [None]:
import numpy as np
import xarray as xr
import pandas as pd
from sofiev_model.ufscat_fire_generator import UFSCATChemFireGenerator

## 1. Initialize the Model

In [None]:
fire_sys = UFSCATChemFireGenerator(target_res=0.5) # Use a coarse resolution for demo

## 2. Generate Mock Data

In a real scenario, you would load your `gbbepx` and `ufs` meteorology datasets here. For this demo, we'll create synthetic data that matches the expected structure.

In [None]:
def create_mock_satellite_data(lats, lons, time_steps=36):
    """Creates a mock 500m satellite dataset."""
    coords = {'lat': lats, 'lon': lons, 'time': pd.to_datetime(pd.date_range('2022-01-01', periods=time_steps, freq='M'))}
    
    # Make FRP and LAI vary with location and time
    lat_factor = np.abs(np.cos(np.deg2rad(lats))).reshape(-1, 1)
    lon_factor = (np.sin(np.deg2rad(lons)) + 1).reshape(1, -1)
    time_factor = np.sin(np.arange(time_steps) / 12 * np.pi).reshape(1, 1, -1) # Seasonal cycle

    frp_data = 100 * lat_factor * lon_factor * time_factor * np.random.rand(len(lats), len(lons), time_steps)
    lai_data = 5 * lat_factor * time_factor
    igbp_data = np.random.randint(1, 17, size=(len(lats), len(lons)))
    
    return xr.Dataset({
        'FRP': (['lat', 'lon', 'time'], frp_data),
        'LAI': (['lat', 'lon', 'time'], np.broadcast_to(lai_data[:, :, np.newaxis], frp_data.shape)),
        'IGBP': (['lat', 'lon'], igbp_data)
    }, coords=coords)

In [None]:
def create_mock_met_data(lats, lons, time_steps=36):
    """Creates a mock 4km meteorology dataset."""
    coords = {'lat': lats, 'lon': lons, 'time': pd.to_datetime(pd.date_range('2022-01-01', periods=time_steps, freq='M'))}
    
    vpd_data = np.random.uniform(0, 30, size=(len(lats), len(lons), time_steps))
    soil_m_data = np.random.uniform(0.1, 0.5, size=(len(lats), len(lons), time_steps))

    return xr.Dataset({
        'vpd': (['lat', 'lon', 'time'], vpd_data),
        'soil_m': (['lat', 'lon', 'time'], soil_m_data)
    }, coords=coords)

In [None]:
# Note: Using very coarse resolution for speed
mock_lats_500m = np.arange(-90, 90, 2.0)
mock_lons_500m = np.arange(-180, 180, 2.0)
ds_500m = create_mock_satellite_data(mock_lats_500m, mock_lons_500m)

# The met data should be at the target resolution
met_4km = create_mock_met_data(fire_sys.target_lats, fire_sys.target_lons)

print("Mock 500m Satellite Data:")
print(ds_500m)
print("\nMock 4km Meteorology Data:")
print(met_4km)

## 3. Run the Full Pipeline

In [None]:
# Step 1: Aggregate raw data
# We need to aggregate each time step individually. We can do this with a list comprehension.
ds_4km_slices = [fire_sys.aggregate_raw_data(ds_500m.isel(time=t)) for t in range(len(ds_500m.time))]

# Concatenate the slices along the time dimension
ds_4km_time = xr.concat(ds_4km_slices, dim=ds_500m.time)

print("Aggregated 4km Data over Time:")
print(ds_4km_time)

In [None]:
# Step 2: Generate features
training_df = fire_sys.generate_features(ds_4km_time, met_4km)

print("\nGenerated Training DataFrame:")
print(training_df.head())

In [None]:
# Step 3: Train the XGBoost model
xgb_model = fire_sys.train_xgboost(training_df)

## 4. Export the LUT

In [None]:
# Step 4: Export the binary LUT for Fortran
fire_sys.export_binary_lut(xgb_model, filename="fire_scaling_lut_demo.bin")