In [1]:
# CELL 1: Install dependencies
import sys, subprocess
pkgs = ["pandas", "numpy", "xarray", "netCDF4", "scipy", "ipywidgets"]
subprocess.check_call([sys.executable, "-m", "pip", "install"] + pkgs + ["--quiet"])
print("Done!")

Done!


In [2]:
# CELL 2: Imports
import os
import re
import warnings
from pathlib import Path
from datetime import datetime, timedelta
from collections import defaultdict
from typing import Dict, List, Tuple, Optional

import numpy as np
import pandas as pd
import xarray as xr
from scipy.interpolate import RegularGridInterpolator
import ipywidgets as widgets
from IPython.display import display, clear_output

warnings.filterwarnings('ignore')

print("="*60)
print("PACE Preprocessor for Fishing Correlation Analysis")
print(f"numpy {np.__version__}, pandas {pd.__version__}, xarray {xr.__version__}")
print("="*60)

PACE Preprocessor for Fishing Correlation Analysis
numpy 2.3.5, pandas 2.3.3, xarray 2025.12.0


In [3]:
# CELL 3: Configuration and PACE product definitions

# PACE product configuration
PACE_PRODUCTS = {
    'carbon': {'var': 'carbon_phyto', 'output_name': 'carbon_phyto', 'has_wavelength': False},
    'chl': {'var': 'chlor_a', 'output_name': 'chlor_a', 'has_wavelength': False},
    'iop': {'var': 'bbp_s', 'output_name': 'bbp_s', 'has_wavelength': False},
    'poc': {'var': 'poc', 'output_name': 'poc', 'has_wavelength': False},
    'kd': {'var': 'Kd', 'output_name': 'Kd_490', 'has_wavelength': True, 'target_wl': 490},
    'rrs': {'var': 'Rrs', 'output_name': 'Rrs', 'has_wavelength': True, 'keep_all_wl': True},
}

# Temporal window for composite (¬±days)
TEMPORAL_WINDOW = 4

print(f"Configured products: {list(PACE_PRODUCTS.keys())}")
print(f"Temporal window for composite: ¬±{TEMPORAL_WINDOW} days")

Configured products: ['carbon', 'chl', 'iop', 'poc', 'kd', 'rrs']
Temporal window for composite: ¬±4 days


In [4]:
# CELL 4: Core processing functions

def discover_pace_files(data_dir: Path) -> Dict[str, Dict[datetime, Path]]:
    """
    Discover PACE files organized by product and date.
    Expected pattern: pace_{product}_{YYYYMMDD}.nc
    """
    files_by_product = defaultdict(dict)
    
    if not data_dir.exists():
        return dict(files_by_product)
    
    pattern = re.compile(r'pace_([a-z]+)_(\d{8})\.nc', re.IGNORECASE)
    
    for f in data_dir.glob('*.nc'):
        match = pattern.match(f.name)
        if match:
            product = match.group(1).lower()
            date_str = match.group(2)
            try:
                date = datetime.strptime(date_str, '%Y%m%d')
                files_by_product[product][date] = f
            except ValueError:
                continue
    
    return dict(files_by_product)


def read_pace_variable(filepath: Path, var_name: str, 
                       wavelength_target: Optional[int] = None,
                       keep_all_wavelengths: bool = False) -> Tuple:
    """
    Read a variable from a PACE file.
    Returns (data, lats, lons, wavelengths) or (None, None, None, None) on error.
    """
    try:
        with xr.open_dataset(filepath) as ds:
            if var_name not in ds.data_vars:
                return None, None, None, None
            
            data = ds[var_name]
            lats = ds['lat'].values
            lons = ds['lon'].values
            wavelengths = None
            
            # Handle wavelength dimension
            if 'wavelength' in data.dims:
                wavelengths = ds['wavelength'].values
                if keep_all_wavelengths:
                    # Keep all wavelengths
                    pass
                elif wavelength_target is not None:
                    idx = np.argmin(np.abs(wavelengths - wavelength_target))
                    data = data.isel(wavelength=idx)
                    wavelengths = np.array([wavelengths[idx]])
            
            # Convert and handle fill values
            values = data.values.astype(np.float32)
            fill_val = data.attrs.get('_FillValue', -32767)
            values = np.where(values == fill_val, np.nan, values)
            values = np.where(np.abs(values) > 1e10, np.nan, values)
            
            return values, lats, lons, wavelengths
            
    except Exception as e:
        return None, None, None, None


def get_dates_by_priority(target: datetime, available: Dict[datetime, Path], 
                          window: int = 4) -> List[datetime]:
    """
    Return dates ordered by priority for temporal filling.
    Priority: exact date > closer dates > past dates on tie.
    """
    candidates = []
    for dt in available:
        delta = (dt - target).days
        if -window <= delta <= window:
            candidates.append((abs(delta), 0 if delta <= 0 else 1, dt))
    
    candidates.sort()
    return [c[2] for c in candidates]


def process_single_date(target_date: datetime,
                        pace_files: Dict[str, Dict[datetime, Path]],
                        mode: str = 'daily',
                        window: int = 4) -> Optional[xr.Dataset]:
    """
    Process PACE data for a single date.
    
    Args:
        target_date: Date to process
        pace_files: Dict of available files by product/date
        mode: 'daily' (strict, exact date only) or 'composite' (search within window)
        window: Temporal window for composite mode
    
    Returns:
        xarray Dataset with all variables, or None if no data
    """
    data_arrays = {}
    lats, lons = None, None
    rrs_wavelengths = None
    
    for product, config in PACE_PRODUCTS.items():
        if product not in pace_files:
            continue
        
        var_name = config['var']
        output_name = config['output_name']
        has_wl = config.get('has_wavelength', False)
        target_wl = config.get('target_wl')
        keep_all_wl = config.get('keep_all_wl', False)
        
        # Determine which date to use
        if mode == 'daily':
            # Strict mode: only exact date
            if target_date not in pace_files[product]:
                continue
            filepath = pace_files[product][target_date]
        else:
            # Composite mode: search within window
            dates_to_try = get_dates_by_priority(target_date, pace_files[product], window)
            if not dates_to_try:
                continue
            filepath = pace_files[product][dates_to_try[0]]
        
        # Read data
        data, lat_arr, lon_arr, wl = read_pace_variable(
            filepath, var_name, target_wl, keep_all_wl
        )
        
        if data is not None:
            if keep_all_wl and wl is not None:
                # For Rrs, store with wavelength dimension
                data_arrays[output_name] = (data, wl)
                rrs_wavelengths = wl
            else:
                data_arrays[output_name] = data
            
            if lats is None:
                lats, lons = lat_arr, lon_arr
    
    if not data_arrays or lats is None:
        return None
    
    # Calculate derived variables
    if 'chlor_a' in data_arrays and 'carbon_phyto' in data_arrays:
        chl = data_arrays['chlor_a']
        carbon = data_arrays['carbon_phyto']
        if not isinstance(chl, tuple) and not isinstance(carbon, tuple):
            with np.errstate(divide='ignore', invalid='ignore'):
                chl_c = chl / carbon
                chl_c = np.where(np.isfinite(chl_c), chl_c, np.nan)
            data_arrays['chl_c_ratio'] = chl_c
    
    # Build xarray Dataset
    data_vars = {}
    for name, arr in data_arrays.items():
        if isinstance(arr, tuple):
            # Variable with wavelength dimension (Rrs)
            data_vars[name] = (['lat', 'lon', 'wavelength'], arr[0])
        else:
            data_vars[name] = (['lat', 'lon'], arr)
    
    coords = {'lat': lats, 'lon': lons}
    if rrs_wavelengths is not None:
        coords['wavelength'] = rrs_wavelengths
    
    ds = xr.Dataset(data_vars, coords=coords)
    
    # Add attributes
    ds.attrs['title'] = f'PACE OCI {mode.capitalize()} Composite for Fishing Analysis'
    ds.attrs['date'] = target_date.strftime('%Y-%m-%d')
    ds.attrs['mode'] = mode
    ds.attrs['source'] = 'NASA PACE OCI L3 products'
    if mode == 'composite':
        ds.attrs['temporal_window'] = f'¬±{window} days'
    
    # Variable attributes
    var_attrs = {
        'chlor_a': {'long_name': 'Chlorophyll-a concentration', 'units': 'mg m^-3'},
        'carbon_phyto': {'long_name': 'Phytoplankton Carbon', 'units': 'mg m^-3'},
        'bbp_s': {'long_name': 'Backscattering spectral slope (eta)', 'units': 'dimensionless'},
        'poc': {'long_name': 'Particulate Organic Carbon', 'units': 'mg m^-3'},
        'Kd_490': {'long_name': 'Diffuse attenuation coefficient at 490nm', 'units': 'm^-1'},
        'chl_c_ratio': {'long_name': 'Chlorophyll:Carbon ratio', 'units': 'mg Chl / mg C'},
        'Rrs': {'long_name': 'Remote sensing reflectance', 'units': 'sr^-1'},
    }
    for var in ds.data_vars:
        if var in var_attrs:
            ds[var].attrs.update(var_attrs[var])
    
    return ds


print("Processing functions defined.")

Processing functions defined.


In [5]:
# CELL 5: GUI Components - Folder and File Browsers

class FolderBrowser:
    """Interactive folder browser widget."""
    
    def __init__(self, start='.', label='Folder', must_be_in=None):
        self.cur = Path(start).resolve()
        self.sel = self.cur
        self.must_be_in = Path(must_be_in).resolve() if must_be_in else None
        self.label = label
        
        self.html = widgets.HTML(f"<code>{self.cur}</code>")
        self.dd = widgets.Select(
            options=self._list(), 
            layout=widgets.Layout(width='100%', height='100px')
        )
        self.b_up = widgets.Button(description='‚Üë Up', layout=widgets.Layout(width='70px'))
        self.b_in = widgets.Button(description='‚Üí Enter', layout=widgets.Layout(width='80px'))
        self.b_sel = widgets.Button(description='‚úì Select', button_style='success', 
                                    layout=widgets.Layout(width='80px'))
        self.txt = widgets.Text(placeholder='new folder', layout=widgets.Layout(width='150px'))
        self.b_new = widgets.Button(description='+New', layout=widgets.Layout(width='60px'))
        self.selhtml = widgets.HTML(f"<b>Selected:</b> <code>{self.sel}</code>")
        
        self.b_up.on_click(lambda b: self._up())
        self.b_in.on_click(lambda b: self._enter())
        self.b_sel.on_click(lambda b: self._select())
        self.b_new.on_click(lambda b: self._create())
        
        self.w = widgets.VBox([
            widgets.HTML(f"<b>{label}</b>"),
            self.html, self.dd,
            widgets.HBox([self.b_up, self.b_in, self.b_sel, self.txt, self.b_new]),
            self.selhtml
        ])
    
    def _list(self):
        try:
            items = ['.']
            for x in sorted(self.cur.iterdir()):
                if x.is_dir() and not x.name.startswith('.'):
                    items.append(x.name)
            return items
        except:
            return ['.']
    
    def _refresh(self):
        self.html.value = f"<code>{self.cur}</code>"
        self.dd.options = self._list()
    
    def _up(self):
        if self.cur.parent != self.cur:
            self.cur = self.cur.parent
            self._refresh()
    
    def _enter(self):
        if self.dd.value and self.dd.value != '.':
            p = self.cur / self.dd.value
            if p.is_dir():
                self.cur = p
                self._refresh()
    
    def _select(self):
        self.sel = self.cur
        status = ""
        if self.must_be_in and not str(self.sel).startswith(str(self.must_be_in)):
            status = " <span style='color:red'>(‚ö† must be inside data/)</span>"
        self.selhtml.value = f"<b>Selected:</b> <code>{self.sel}</code>{status}"
    
    def _create(self):
        n = self.txt.value.strip()
        if n:
            p = self.cur / n
            p.mkdir(parents=True, exist_ok=True)
            self.cur = p
            self.sel = p
            self.txt.value = ''
            self._refresh()
            self._select()
    
    def path(self):
        return self.sel
    
    def is_valid(self):
        if self.must_be_in:
            return str(self.sel).startswith(str(self.must_be_in))
        return True


class FileBrowser:
    """Interactive file browser widget for date list files."""
    
    def __init__(self, start='.', extensions=None):
        self.cur = Path(start).resolve()
        self.sel = None
        self.ext = extensions or ['.txt', '.csv', '.dat']
        
        self.html = widgets.HTML(f"<code>{self.cur}</code>")
        self.dd = widgets.Select(
            options=self._list(), 
            layout=widgets.Layout(width='100%', height='120px')
        )
        self.b_up = widgets.Button(description='‚Üë Up', layout=widgets.Layout(width='70px'))
        self.b_in = widgets.Button(description='‚Üí Enter', layout=widgets.Layout(width='80px'))
        self.b_sel = widgets.Button(description='‚úì Select File', button_style='success',
                                    layout=widgets.Layout(width='100px'))
        self.selhtml = widgets.HTML("<i>No file selected</i>")
        
        self.b_up.on_click(lambda b: self._up())
        self.b_in.on_click(lambda b: self._enter())
        self.b_sel.on_click(lambda b: self._select())
        
        self.w = widgets.VBox([
            widgets.HTML("<b>Date List File</b>"),
            self.html, self.dd,
            widgets.HBox([self.b_up, self.b_in, self.b_sel]),
            self.selhtml
        ])
    
    def _list(self):
        try:
            items = []
            for x in sorted(self.cur.iterdir()):
                if x.name.startswith('.'):
                    continue
                if x.is_dir():
                    items.append(f"üìÅ {x.name}")
                elif x.suffix.lower() in self.ext:
                    items.append(f"üìÑ {x.name}")
            return items if items else ['(empty)']
        except:
            return ['(error)']
    
    def _refresh(self):
        self.html.value = f"<code>{self.cur}</code>"
        self.dd.options = self._list()
    
    def _up(self):
        if self.cur.parent != self.cur:
            self.cur = self.cur.parent
            self._refresh()
    
    def _enter(self):
        v = self.dd.value
        if v and v.startswith('üìÅ'):
            p = self.cur / v.replace('üìÅ ', '')
            if p.is_dir():
                self.cur = p
                self._refresh()
    
    def _select(self):
        v = self.dd.value
        if v and v.startswith('üìÑ'):
            self.sel = self.cur / v.replace('üìÑ ', '')
            self.selhtml.value = f"<b>Selected:</b> <code>{self.sel}</code>"
    
    def file(self):
        return self.sel


print("Browser widgets defined.")

Browser widgets defined.


In [6]:
# CELL 6: Date parsing utilities

def parse_date_file(filepath):
    """Parse dates from a text/csv file."""
    dates = []
    skip_prefixes = ('#', '=', '-', 'lista', 'total', 'date', 'unique', 'list', 'start', 'end')
    
    with open(filepath, 'r') as f:
        for line in f:
            line = line.split('#')[0].strip()
            if not line:
                continue
            if any(line.lower().startswith(s) for s in skip_prefixes):
                continue
            try:
                dt = pd.to_datetime(line)
                dates.append(dt.to_pydatetime())
            except:
                pass
    
    return sorted(set(dates))


def get_dates_from_mode(mode, single_date, start_date, end_date, file_browser):
    """Get list of dates based on selected mode."""
    if mode == 'Single Date':
        if single_date:
            return [datetime.combine(single_date, datetime.min.time())]
        return []
    
    elif mode == 'Date Range':
        if start_date and end_date:
            return pd.date_range(start_date, end_date, freq='D').to_pydatetime().tolist()
        return []
    
    else:  # Date List File
        filepath = file_browser.file()
        if filepath and filepath.exists():
            return parse_date_file(filepath)
        return []


print("Date utilities defined.")

Date utilities defined.


In [7]:
# CELL 7: Main processing function

def run_processing(pace_dir, daily_dir, composite_dir, dates, 
                   process_daily, process_composite,
                   progress_bar, log_output):
    """
    Main processing loop with progress tracking.
    """
    with log_output:
        clear_output()
        
        # Validate inputs
        if not dates:
            print("‚ùå No dates to process!")
            return
        
        if not process_daily and not process_composite:
            print("‚ùå Select at least one output type (Daily or Composite)!")
            return
        
        pace_path = Path(pace_dir)
        if not pace_path.exists():
            print(f"‚ùå PACE data directory not found: {pace_path}")
            return
        
        # Create output directories
        if process_daily:
            daily_path = Path(daily_dir)
            daily_path.mkdir(parents=True, exist_ok=True)
            print(f"üìÅ Daily output: {daily_path}")
        
        if process_composite:
            composite_path = Path(composite_dir)
            composite_path.mkdir(parents=True, exist_ok=True)
            print(f"üìÅ Composite output: {composite_path}")
        
        # Discover PACE files
        print(f"\nüîç Scanning PACE files in: {pace_path}")
        pace_files = discover_pace_files(pace_path)
        
        if not pace_files:
            print("‚ùå No PACE files found! Expected pattern: pace_{product}_{YYYYMMDD}.nc")
            return
        
        print("Found products:")
        for product, files in pace_files.items():
            print(f"   ‚Ä¢ {product}: {len(files)} files")
        
        # Setup progress
        total_tasks = len(dates) * (int(process_daily) + int(process_composite))
        progress_bar.max = total_tasks
        progress_bar.value = 0
        
        print(f"\nüìÖ Processing {len(dates)} dates...")
        print(f"   Range: {dates[0].strftime('%Y-%m-%d')} to {dates[-1].strftime('%Y-%m-%d')}")
        print("=" * 60)
        
        stats = {'daily_ok': 0, 'daily_skip': 0, 'daily_fail': 0,
                 'composite_ok': 0, 'composite_skip': 0, 'composite_fail': 0}
        
        for i, date in enumerate(dates):
            date_str = date.strftime('%Y%m%d')
            
            # Process Daily
            if process_daily:
                outfile = daily_path / f"pace_daily_{date_str}.nc"
                if outfile.exists():
                    print(f"  ‚è≠ SKIP daily {date_str} (exists)")
                    stats['daily_skip'] += 1
                else:
                    try:
                        ds = process_single_date(date, pace_files, mode='daily')
                        if ds is not None:
                            # Clear encoding before saving
                            for var in list(ds.data_vars) + list(ds.coords):
                                if var in ds:
                                    ds[var].encoding.clear()
                            ds.to_netcdf(outfile)
                            ds.close()
                            print(f"  ‚úÖ daily {date_str} ({len(ds.data_vars)} vars)")
                            stats['daily_ok'] += 1
                        else:
                            print(f"  ‚ö† daily {date_str} (no data)")
                            stats['daily_fail'] += 1
                    except Exception as e:
                        print(f"  ‚ùå daily {date_str}: {str(e)[:50]}")
                        stats['daily_fail'] += 1
                
                progress_bar.value += 1
            
            # Process Composite
            if process_composite:
                outfile = composite_path / f"pace_composite_{date_str}.nc"
                if outfile.exists():
                    print(f"  ‚è≠ SKIP composite {date_str} (exists)")
                    stats['composite_skip'] += 1
                else:
                    try:
                        ds = process_single_date(date, pace_files, mode='composite', 
                                                 window=TEMPORAL_WINDOW)
                        if ds is not None:
                            for var in list(ds.data_vars) + list(ds.coords):
                                if var in ds:
                                    ds[var].encoding.clear()
                            ds.to_netcdf(outfile)
                            ds.close()
                            print(f"  ‚úÖ composite {date_str} ({len(ds.data_vars)} vars)")
                            stats['composite_ok'] += 1
                        else:
                            print(f"  ‚ö† composite {date_str} (no data)")
                            stats['composite_fail'] += 1
                    except Exception as e:
                        print(f"  ‚ùå composite {date_str}: {str(e)[:50]}")
                        stats['composite_fail'] += 1
                
                progress_bar.value += 1
        
        # Summary
        print("\n" + "=" * 60)
        print("üìä PROCESSING SUMMARY")
        print("=" * 60)
        if process_daily:
            print(f"Daily:     ‚úÖ {stats['daily_ok']} created, "
                  f"‚è≠ {stats['daily_skip']} skipped, ‚ö† {stats['daily_fail']} no data")
        if process_composite:
            print(f"Composite: ‚úÖ {stats['composite_ok']} created, "
                  f"‚è≠ {stats['composite_skip']} skipped, ‚ö† {stats['composite_fail']} no data")
        print("\n‚úÖ Done!")


print("Main processing function defined.")

Main processing function defined.


In [8]:
# CELL 8: Build and display GUI

# Find data directory
data_dir = Path('./data')
for p in ['./data', '../data', '../../data']:
    if Path(p).exists():
        data_dir = Path(p).resolve()
        break

# Initialize browsers
fb_pace = FolderBrowser(start=str(data_dir), label='üìÇ PACE Input Data Directory')
fb_daily = FolderBrowser(start=str(data_dir), label='üìÅ Daily Output (strict, no filling)', 
                         must_be_in=str(data_dir))
fb_composite = FolderBrowser(start=str(data_dir), label='üìÅ Composite Output (¬±4 days filling)',
                             must_be_in=str(data_dir))
fb_datefile = FileBrowser(start=str(data_dir))

# Date mode widgets
w_mode = widgets.Dropdown(
    options=['Single Date', 'Date Range', 'Date List File'],
    value='Single Date',
    description='Mode:',
    layout=widgets.Layout(width='250px')
)

w_single = widgets.DatePicker(description='Date:', value=datetime(2025, 3, 28).date())
w_start = widgets.DatePicker(description='Start:', value=datetime(2025, 3, 1).date())
w_end = widgets.DatePicker(description='End:', value=datetime(2025, 3, 31).date())

w_datebox = widgets.VBox([w_single])

def on_mode_change(change):
    if change['new'] == 'Single Date':
        w_datebox.children = [w_single]
    elif change['new'] == 'Date Range':
        w_datebox.children = [widgets.HBox([w_start, w_end])]
    else:
        w_datebox.children = [fb_datefile.w]

w_mode.observe(on_mode_change, 'value')

# Output type checkboxes
w_do_daily = widgets.Checkbox(value=True, description='Generate Daily (strict)', indent=False)
w_do_composite = widgets.Checkbox(value=True, description='Generate Composite (¬±4 days)', indent=False)

# Progress and log
w_progress = widgets.IntProgress(min=0, max=1, description='Progress:', 
                                  layout=widgets.Layout(width='100%'))
w_log = widgets.Output(layout=widgets.Layout(border='1px solid #ccc', 
                                              max_height='400px', overflow='auto'))

# Process button
w_btn = widgets.Button(
    description='üöÄ PROCESS PACE DATA',
    button_style='success',
    layout=widgets.Layout(width='100%', height='50px')
)

def on_process_click(b):
    # Get dates
    dates = get_dates_from_mode(
        w_mode.value, w_single.value, w_start.value, w_end.value, fb_datefile
    )
    
    # Validate output directories are different
    if w_do_daily.value and w_do_composite.value:
        if fb_daily.path() == fb_composite.path():
            with w_log:
                clear_output()
                print("‚ùå Daily and Composite output directories must be different!")
            return
    
    # Run processing
    run_processing(
        pace_dir=fb_pace.path(),
        daily_dir=fb_daily.path(),
        composite_dir=fb_composite.path(),
        dates=dates,
        process_daily=w_do_daily.value,
        process_composite=w_do_composite.value,
        progress_bar=w_progress,
        log_output=w_log
    )

w_btn.on_click(on_process_click)

# Scan button to preview available data
w_scan_btn = widgets.Button(
    description='üîç Scan PACE Files',
    button_style='info',
    layout=widgets.Layout(width='200px')
)

def on_scan_click(b):
    with w_log:
        clear_output()
        pace_path = fb_pace.path()
        print(f"Scanning: {pace_path}")
        print("=" * 50)
        
        pace_files = discover_pace_files(pace_path)
        
        if not pace_files:
            print("‚ùå No PACE files found!")
            print("Expected pattern: pace_{product}_{YYYYMMDD}.nc")
            return
        
        all_dates = set()
        for product, files in sorted(pace_files.items()):
            dates = sorted(files.keys())
            all_dates.update(dates)
            date_range = f"{dates[0].strftime('%Y-%m-%d')} to {dates[-1].strftime('%Y-%m-%d')}"
            print(f"üì¶ {product:10s}: {len(files):4d} files ({date_range})")
        
        all_dates = sorted(all_dates)
        print("\n" + "=" * 50)
        print(f"üìÖ Total unique dates: {len(all_dates)}")
        print(f"   Range: {all_dates[0].strftime('%Y-%m-%d')} to {all_dates[-1].strftime('%Y-%m-%d')}")

w_scan_btn.on_click(on_scan_click)

# Build UI
ui = widgets.VBox([
    widgets.HTML("""
        <div style="background: linear-gradient(135deg, #1a5276 0%, #2e86ab 100%); 
                    padding: 15px; border-radius: 8px; margin-bottom: 15px;">
            <h2 style="color: white; margin: 0;">üõ∞Ô∏è PACE Data Preprocessor</h2>
            <p style="color: #d5dbdb; margin: 5px 0 0 0;">Generate NetCDF files for Correlation Dashboard</p>
        </div>
    """),
    
    # Input directory
    widgets.HTML("<h3>1Ô∏è‚É£ Input Data</h3>"),
    fb_pace.w,
    w_scan_btn,
    
    widgets.HTML("<hr>"),
    
    # Output directories
    widgets.HTML("<h3>2Ô∏è‚É£ Output Directories</h3>"),
    widgets.HTML("<i style='color:#666'>Both must be inside <code>data/</code> for the Correlation Dashboard to find them.</i>"),
    widgets.HBox([w_do_daily, w_do_composite]),
    widgets.HTML("<b>Daily Output</b> (exact date, NaN if not available):"),
    fb_daily.w,
    widgets.HTML("<b>Composite Output</b> (searches ¬±4 days for valid data):"),
    fb_composite.w,
    
    widgets.HTML("<hr>"),
    
    # Date selection
    widgets.HTML("<h3>3Ô∏è‚É£ Date Selection</h3>"),
    w_mode,
    w_datebox,
    
    widgets.HTML("<hr>"),
    
    # Process button
    widgets.HTML("<h3>4Ô∏è‚É£ Process</h3>"),
    w_btn,
    w_progress,
    
    widgets.HTML("<br><b>Log:</b>"),
    w_log
])

display(ui)

VBox(children=(HTML(value='\n        <div style="background: linear-gradient(135deg, #1a5276 0%, #2e86ab 100%)‚Ä¶

---
## Output Structure

### Daily Files (`pace_daily_YYYYMMDD.nc`)
- Values from the exact date only
- NaN if product not available on that date
- Best for: strict temporal matching with fishing data

### Composite Files (`pace_composite_YYYYMMDD.nc`)
- Searches ¬±4 days for valid data (priority: D > D¬±1 > D¬±2 > ... > past on tie)
- Higher data coverage
- Best for: maximizing data availability

### Variables in each file:
| Variable | Description | Units |
|----------|-------------|-------|
| `chlor_a` | Chlorophyll-a concentration | mg m‚Åª¬≥ |
| `carbon_phyto` | Phytoplankton Carbon (C_phyto) | mg m‚Åª¬≥ |
| `bbp_s` | Backscattering spectral slope (Œ∑) | dimensionless |
| `poc` | Particulate Organic Carbon | mg m‚Åª¬≥ |
| `Kd_490` | Diffuse attenuation at 490nm | m‚Åª¬π |
| `chl_c_ratio` | Chl:C ratio (growth rate proxy) | mg Chl / mg C |
| `Rrs` | Remote sensing reflectance (all wavelengths) | sr‚Åª¬π |

### Usage with Correlation Dashboard
1. Place output folders inside the `data/` directory
2. Use folder names like `pace_daily` and `pace_composite`
3. The dashboard will auto-detect them when you click "Scan Folders"