In [None]:
# In terminal:
# pip install "zarr<3" --force-reinstall

import zarr
print(zarr.__version__) # Must be version 2.18.7 for copernicusmarine to work
from copernicusmarine import subset

## Download Ocean Color Bio-Geo-Chemical Data

From: https://data.marine.copernicus.eu/product/OCEANCOLOUR_GLO_BGC_L3_MY_009_103/description  

Variables: 
* CHL - mass concentration of chlorophyll-a in seawater (mg/m3)
* Phytoplankton Function types and sizes:  
  * DIATO - mass concentration of diatoms expressed as chlorophyll in seawater (mg/m3)
  * DINO - mass concentration of dinophytes expressed as chlorophyll in seawater (mg/m3)
  * GREEN - mass concentration of green algae expressed as chlorophyll in seawater (mg/m3)
  * HAPTO - mass concentration of haptophytes expressed as chlorophyll in seawater (mg/m3)
  * MICRO - mass concentration of microphytoplankton expressed as chlorophyll in seawater (mg/m3)
  * NANO - mass concentration of nanophytoplankton expressed as chlorophyll in seawater (mg/m3)
  * PICO - mass concentration of picophytoplankton expressed as chlorophyll in seawater (mg/m3)
  * PROCHLO - mass concentration of prochlorophytes expressed as chlorophyll in seawater (mg/m3)
  * PROKAR - mass concentration of prokaryotes expressed as chlorophyll in seawater (mg/m3)
* SPM - mass concentation of suspended matter in sea water (g/m3)
* ZSD - secchi depth of sea water (m)
* KD490 - volume attenuation coefficient of downwelling radiative flux in sea water (m-1)
* BBP - volume backwards scattering coefficient of radiative flux in seawater due to particules (m-1)
* CDM - volume absorption coefficient of radiative flux in sea water due to dissolved organic matter and non algal particles (m-1)
* Remote sensing reflectance (RRS):
  * RRS412 - surface ratio of upwelling radiance emerging from seawater to downwelling radiative flux in air (sr-1)
  * RRS443 - surface ratio of upwelling radiance emerging from seawater to downwelling radiative flux in air (sr-1)
  * RRS490 - surface ratio of upwelling radiance emerging from seawater to downwelling radiative flux in air (sr-1)
  * RRS555 - surface ratio of upwelling radiance emerging from seawater to downwelling radiative flux in air (sr-1)
  * RRS670 - surface ratio of upwelling radiance emerging from seawater to downwelling radiative flux in air (sr-1)

In [None]:
# Download plankton data
ds = subset(
    dataset_id="cmems_obs-oc_glo_bgc-plankton_my_l3-multi-4km_P1D",
    variables=["CHL", "DIATO", "DINO", "GREEN", "HAPTO", "MICRO", "NANO", "PICO", "PROCHLO", "PROKAR"],
    minimum_longitude=-140,
    maximum_longitude=-100,
    minimum_latitude=16,
    maximum_latitude=44,
    start_datetime="1997-09-04T00:00:00",
    end_datetime="2020-01-25T00:00:00",
    output_directory="../data/",
    file_format="netcdf",  # default, can be omitted
    netcdf_compression_level=5,  # optional compression, from 0 to 9
)

In [None]:
# Download optics data
ds = subset(
    dataset_id="cmems_obs-oc_glo_bgc-optics_my_l3-multi-4km_P1D",
    variables=["BBP", "CDM"],
    minimum_longitude=-140,
    maximum_longitude=-100,
    minimum_latitude=16,
    maximum_latitude=44,
    start_datetime="1997-09-04T00:00:00",
    end_datetime="2020-01-25T00:00:00",
    output_directory="../data/",
    file_format="netcdf",  # default, can be omitted
    netcdf_compression_level=5,  # optional compression, from 0 to 9
)

In [None]:
# Download reflectance data
ds = subset(
    dataset_id="cmems_obs-oc_glo_bgc-reflectance_my_l3-multi-4km_P1D",
    variables=["RRS412", "RRS443", "RRS490", "RRS555", "RRS670"],
    minimum_longitude=-140,
    maximum_longitude=-100,
    minimum_latitude=16,
    maximum_latitude=44,
    start_datetime="1997-09-04T00:00:00",
    end_datetime="2020-01-25T00:00:00",
    output_directory="../data/",
    file_format="netcdf",  # default, can be omitted
    netcdf_compression_level=5,  # optional compression, from 0 to 9
)

In [None]:
# Download transparency data
ds = subset(
    dataset_id="cmems_obs-oc_glo_bgc-reflectance_my_l3-multi-4km_P1D",
    variables=["SPM", "ZSD", "KD490"],
    minimum_longitude=-140,
    maximum_longitude=-100,
    minimum_latitude=16,
    maximum_latitude=44,
    start_datetime="1997-09-04T00:00:00",
    end_datetime="2020-01-25T00:00:00",
    output_directory="../data/",
    file_format="netcdf",  # default, can be omitted
    netcdf_compression_level=5,  # optional compression, from 0 to 9
)

## Download SST Data

Dowloaded from GlobColour data search https://hermes.acri.fr/index.php?class=archive

In [None]:
# Download SST data
ds = subset(
    dataset_id="cmems_obs-sst_glo_phy_my_l3s_P1D-m",
    variables=["sea_surface_temperature", "adjusted_sea_surface_temperature"],
    minimum_longitude= -127.02,
    maximum_longitude= -109.02,
    minimum_latitude= 22.02,
    maximum_latitude= 41.02,
    start_datetime="1997-09-04T00:00:00",
    end_datetime="2020-01-25T00:00:00",
    output_directory="./sst_subset",
    file_format="netcdf",  # default, can be omitted
    netcdf_compression_level=5,  # optional compression, from 0 to 9
)

## Download PAR Data

Downloaded from GlobColour data search

In [None]:
# Load individual .nc files and process to match copernicus_marine

def preprocess_par_dataset(file_path):
    keep_attrs = [
        'parameter_code', 'parameter', 'publication', 'sensor', 'grid_type',
        'spatial_resolution', 'nb_equ_bins', 'lat_step', 'lon_step', 
        'max_north_grid', 'max_south_grid', 'max_west_grid', 'max_east_grid', 
        'northernmost_latitude', 'southernmost_latitude', 
        'westernmost_longitude', 'easternmost_longitude', 
        'software_name', 'software_version', 'institution', 'references', 
        'contact', 'copyright'
    ]

    # Open as a regular NumPy-backed Dataset (not dask-backed)
    ds = xr.open_dataset(file_path, chunks=None)

    # Extract time from filename or attribute
    # Try from filename: assumes file name includes yyyymmdd like "L3m_19970909_..."
    try:
        basename = os.path.basename(file_path)
        yyyymmdd = basename.split('_')[1][:8]
        time = pd.to_datetime(yyyymmdd, format="%Y%m%d")
    except Exception as e:
        raise ValueError(f"Could not extract time from filename: {file_path}") from e

    # Add 'time' as a new dimension
    ds = ds.expand_dims(time=[time])

    # Keep only 'PAR_mean' and drop others
    ds = ds[['PAR_mean']]

    # Remove Dask (ensure it's a NumPy array)
    ds['PAR_mean'] = ds['PAR_mean'].load()

    # Drop all attributes and retain only selected ones
    new_attrs = {k: v for k, v in ds.attrs.items() if k in keep_attrs}
    ds.attrs = new_attrs

    # Format to match copernicusmarine
    ds = ds.rename({'lat': 'latitude', 'lon': 'longitude', 'PAR_mean': 'PAR'})

    # Sort latitude and longitude to match chl_ds order
    ds = ds.sortby('latitude')
    ds = ds.sortby('longitude')

    return ds

data_dir = "/Users/deliacarpenter/globcolour_par"
file_paths = sorted([os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.nc')])

# Parallel loading with Dask
processed_datasets = [delayed(preprocess_par_dataset)(fp) for fp in file_paths]
datasets = dask.compute(*processed_datasets)

# Concatenate along time
combined = xr.concat(datasets, dim='time')

In [None]:
# Save the combined xarray

with tqdm(total=1, desc="Saving NetCDF") as pbar:
    combined.to_netcdf(
        "PAR_globcolour_4km_daily_merged_L3m.nc",
        encoding={"PAR": {"zlib": True, "complevel": 5}},
        engine="netcdf4",
        compute=True,
    )
    pbar.update(1)

In [None]:
# Reload the combined ds to check format
par_ds = xr.open_dataset('PAR_globcolour_4km_daily_merged_L3m.nc')

## Download BBP Data

In [None]:
# Download BBP data
ds = subset(
    dataset_id="cmems_obs-oc_glo_bgc-optics_my_l3-multi-4km_P1D",
    variables=["BBP"],
    minimum_longitude=-127.02,
    maximum_longitude=-109.02,
    minimum_latitude=22.02,
    maximum_latitude=41.02,
    start_datetime="1997-09-04T00:00:00",
    end_datetime="2020-01-25T00:00:00",
    output_directory="./bbp_subset",
    file_format="netcdf",  # default, can be omitted
    netcdf_compression_level=5,  # optional compression, from 0 to 9
)

In [None]:
# Convert bbp_443 to phytoplankton carbon (Behrenfeld et al. 2005)

import xarray as xr

# Open the file
ds = xr.open_dataset('/Users/dcarp/Desktop/classes/2024-2025 Year/SIO 236 Satellite Remote Sensing/Final Project/bbp_subset/cmems_obs-oc_glo_bgc-optics_my_l3-multi-4km_P1D_BBP_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc')

# Calculate phytoplankton C
ds["C"] = 13000 * (ds["BBP"] - 0.00035)

ds

In [None]:
# Save with netcdf compression level 5

from tqdm import tqdm

# Create an encoding dictionary to compress all variables
encoding = {var: {"zlib": True, "complevel": 5} for var in ds.data_vars}

output_filename = (
    "cmems_obs-oc_glo_bgc-optics_my_l3-multi-4km_P1D_"
    "BBP_and_phyto_C_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc"
)

with tqdm(total=1, desc="Saving NetCDF") as pbar:
    ds.to_netcdf(
        output_filename,
        encoding=encoding,
        engine="netcdf4",
        compute=True,
    )
    pbar.update(1)

## Get CHL:C ds

In [None]:
import numpy as np

# Get CHL:C ds
chl_ds = xr.open_dataset('cmems_obs-oc_glo_bgc-plankton_my_l3-multi-4km_P1D_CHL_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc')
C_ds = xr.open_dataset('cmems_obs-oc_glo_bgc-optics_my_l3-multi-4km_P1D_BBP_and_phyto_C_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc')

# Extract variables
chl_var = chl_ds['CHL']
c_var = C_ds['C']  # change to actual variable name if different

# Ensure matching dimensions (time, lat, lon)
assert chl_var.shape == c_var.shape, "CHL and C datasets do not match in shape"

# Compute CHL:C ratio and mask where C == 0 or nan
chlc_data = xr.where(c_var > 0, chl_var / c_var, np.nan)
chlc_ds = chlc_data.to_dataset(name='CHL:C')

# Create an encoding dictionary to compress all variables
encoding = {var: {"zlib": True, "complevel": 5} for var in chlc_ds.data_vars}

output_filename = (
    "cmems_obs-oc_glo_bgc-optics_my_l3-multi-4km_P1D_"
    "CHLC_ratio_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc"
)

print("Saving NetCDF...")
chlc_ds.to_netcdf(
    output_filename,
    encoding=encoding,
    engine="netcdf4",
    compute=True,
)

## Get Wind Speed Data

In [None]:
output_dir = "wind_subset"
os.makedirs(output_dir, exist_ok=True)

client = cdsapi.Client()

dataset = "derived-era5-single-levels-daily-statistics"

base_request = {
    "product_type": "reanalysis",
    "variable": [
        "10m_u_component_of_wind",
        "10m_v_component_of_wind"
    ],
    "day": [
        "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
        "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
        "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"
    ],
    "daily_statistic": "daily_mean",
    "time_zone": "utc+00:00",
    "area": [41.02, -127.02, 22.02, -109.02],  # North, West, South, East
    "format": "netcdf",
}

def download_month(year, month):
    request = base_request.copy()
    request["year"] = str(year)
    request["month"] = f"{month:02d}"

    temp_filename = os.path.join(output_dir, f"temp_{year}_{month:02d}.nc")
    compressed_filename = os.path.join(output_dir, f"era5_daily_wind_{year}_{month:02d}.nc")

    print(f"Requesting data for {year}-{month:02d}...")

    try:
        client.retrieve(dataset, request).download(temp_filename)
    except Exception as e:
        print(f"Error downloading {year}-{month}: {e}")

def main():
    years = range(1997, 2021)
    months = range(1, 13)

    # Limit max workers to 8 or fewer to be nice to the server
    max_workers = 8

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = []
        for year in years:
            for month in months:
                futures.append(executor.submit(download_month, year, month))

        for future in as_completed(futures):
            # Just to catch exceptions if any
            try:
                future.result()
            except Exception as e:
                print(f"Error in a worker thread: {e}")

if __name__ == "__main__":
    main()

In [None]:
# Unzip files and combine into a single xarray

zip_dir = "/Users/deliacarpenter/Desktop/Research/Primary Productivity CalCOFI/wind_subset"
extract_base_dir = "/Users/deliacarpenter/Desktop/Research/Primary Productivity CalCOFI/wind_subset/unzipped"

zip_files = glob(os.path.join(zip_dir, "*.nc"))  # if zipped with .nc extension

for zfile in zip_files:
    year_month = os.path.basename(zfile).split('_')[1:3]  # e.g., ['1997', '01']
    extract_dir = os.path.join(extract_base_dir, '_'.join(year_month))
    os.makedirs(extract_dir, exist_ok=True)
    with zipfile.ZipFile(zfile, 'r') as zf:
        zf.extractall(path=extract_dir)

unzipped_dir = "/Users/deliacarpenter/Desktop/Research/Primary Productivity CalCOFI/wind_subset/unzipped"

# Recursively find all u and v component files inside subfolders
u_files = sorted(glob(os.path.join(unzipped_dir, "**", "*u_component*.nc"), recursive=True))
v_files = sorted(glob(os.path.join(unzipped_dir, "**", "*v_component*.nc"), recursive=True))

print(f"Found {len(u_files)} u-component files and {len(v_files)} v-component files.")

datasets = []
for u_file, v_file in zip(u_files, v_files):
    ds_u = xr.open_dataset(u_file)
    ds_v = xr.open_dataset(v_file)

    # Merge u and v components into one dataset
    ds = xr.merge([ds_u, ds_v])
    datasets.append(ds)

# Concatenate along time dimension "valid_time"
combined_ds = xr.concat(datasets, dim="valid_time")
combined_ds = combined_ds.sortby("valid_time")
combined_ds = combined_ds.sortby('latitude') # ensure latitude is increasing for correct wind stress curl calculations

combined_ds

In [None]:
# Add wind stress curl to the wind xarray

def compute_wind_stress_curl(ds, rho_air=1.225):
    """
    Compute wind stress curl from u10, v10 wind components.

    Parameters:
    ds: xarray.Dataset with 'u10' and 'v10' variables
    rho_air: air density in kg/m3 (default 1.225)

    Returns:
    ds with new variable 'wind_stress_curl'
    """
    lat = ds.latitude.values
    lon = ds.longitude.values

    # Mesh grid for lat/lon
    lon2d, lat2d = np.meshgrid(lon, lat)

    # Conversion factors (meters per degree)
    meters_per_deg_lat = 111000
    meters_per_deg_lon = 111000 * np.cos(np.deg2rad(lat2d))

    # Grid spacing
    dy = meters_per_deg_lat * np.gradient(lat)
    dx = np.gradient(lon) * meters_per_deg_lon[0, :]

    dy2d = np.repeat(dy[:, np.newaxis], len(lon), axis=1)
    dx2d = np.repeat(dx[np.newaxis, :], len(lat), axis=0)

    wind_stress_curl = np.empty_like(ds.u10.values)

    for t in range(ds.valid_time.size):
        u = ds.u10.isel(valid_time=t).values
        v = ds.v10.isel(valid_time=t).values

        # Compute wind speed magnitude
        wind_speed = np.sqrt(u**2 + v**2)

        # Drag coefficient Cd (simplified, can be refined)
        # Typical Cd ~1.3e-3 for moderate wind speeds
        Cd = 1.3e-3

        # Compute wind stress components (N/mÂ²)
        tau_x = rho_air * Cd * u * wind_speed
        tau_y = rho_air * Cd * v * wind_speed

        # Compute spatial derivatives
        dtau_y_dx = np.gradient(tau_y, axis=1) / dx2d
        dtau_x_dy = np.gradient(tau_x, axis=0) / dy2d

        # Curl of wind stress
        wind_stress_curl[t, :, :] = dtau_y_dx - dtau_x_dy
    
    ds = ds.assign(wind_stress_curl=(("valid_time", "latitude", "longitude"), wind_stress_curl))

    return ds

combined_ds = compute_wind_stress_curl(combined_ds)

In [None]:
# Save wind ds as a .nc file
encoding = {var: {"zlib": True, "complevel": 5} for var in combined_ds.data_vars}
output_filename = (
    'temporary_wind_ds.nc'
)
print("Saving PHYT NetCDF...")
combined_ds.to_netcdf(
    output_filename,
    encoding=encoding,
    engine="netcdf4",
    compute=True,
)

## Download Nitrate Data

In [None]:
# Download nitrate data
ds = subset(
    dataset_id="cmems_mod_glo_bgc_my_0.25deg_P1D-m",
    variables=["no3"],
    minimum_longitude=-127.02,
    maximum_longitude=-109.02,
    minimum_latitude=22.02,
    maximum_latitude=41.02,
    minimum_depth=0.5057600140571594,
    maximum_depth=0.5057600140571594,
    start_datetime="1997-09-04T00:00:00",
    end_datetime="2020-01-25T00:00:00",
    file_format="netcdf",  # default, can be omitted
    netcdf_compression_level=5,  # optional compression, from 0 to 9
)

## Get El Nino (ONI) Data

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import calendar

# Define the mapping of 3-month period codes to their central month
period_to_month = {
    'DJF': 1,   # January (central month of Dec-Jan-Feb)
    'JFM': 2,   # February (central month of Jan-Feb-Mar)
    'FMA': 3,   # March (central month of Feb-Mar-Apr)
    'MAM': 4,   # April (central month of Mar-Apr-May)
    'AMJ': 5,   # May (central month of Apr-May-Jun)
    'MJJ': 6,   # June (central month of May-Jun-Jul)
    'JJA': 7,   # July (central month of Jun-Jul-Aug)
    'JAS': 8,   # August (central month of Jul-Aug-Sep)
    'ASO': 9,   # September (central month of Aug-Sep-Oct)
    'SON': 10,  # October (central month of Sep-Oct-Nov)
    'OND': 11,  # November (central month of Oct-Nov-Dec)
    'NDJ': 12   # December (central month of Nov-Dec-Jan)
}

# Your complete ONI data
oni_data = {
    1950: {'DJF': -1.5, 'JFM': -1.3, 'FMA': -1.2, 'MAM': -1.2, 'AMJ': -1.1, 'MJJ': -0.9, 'JJA': -0.5, 'JAS': -0.4, 'ASO': -0.4, 'SON': -0.4, 'OND': -0.6, 'NDJ': -0.8},
    1951: {'DJF': -0.8, 'JFM': -0.5, 'FMA': -0.2, 'MAM': 0.2, 'AMJ': 0.4, 'MJJ': 0.6, 'JJA': 0.7, 'JAS': 0.9, 'ASO': 1.0, 'SON': 1.2, 'OND': 1.0, 'NDJ': 0.8},
    1952: {'DJF': 0.5, 'JFM': 0.4, 'FMA': 0.3, 'MAM': 0.3, 'AMJ': 0.2, 'MJJ': 0.0, 'JJA': -0.1, 'JAS': 0.0, 'ASO': 0.2, 'SON': 0.1, 'OND': 0.0, 'NDJ': 0.1},
    1953: {'DJF': 0.4, 'JFM': 0.6, 'FMA': 0.6, 'MAM': 0.7, 'AMJ': 0.8, 'MJJ': 0.8, 'JJA': 0.7, 'JAS': 0.7, 'ASO': 0.8, 'SON': 0.8, 'OND': 0.8, 'NDJ': 0.8},
    1954: {'DJF': 0.8, 'JFM': 0.5, 'FMA': 0.0, 'MAM': -0.4, 'AMJ': -0.5, 'MJJ': -0.5, 'JJA': -0.6, 'JAS': -0.8, 'ASO': -0.9, 'SON': -0.8, 'OND': -0.7, 'NDJ': -0.7},
    1955: {'DJF': -0.7, 'JFM': -0.6, 'FMA': -0.7, 'MAM': -0.8, 'AMJ': -0.8, 'MJJ': -0.7, 'JJA': -0.7, 'JAS': -0.7, 'ASO': -1.1, 'SON': -1.4, 'OND': -1.7, 'NDJ': -1.5},
    1956: {'DJF': -1.1, 'JFM': -0.8, 'FMA': -0.6, 'MAM': -0.5, 'AMJ': -0.5, 'MJJ': -0.5, 'JJA': -0.6, 'JAS': -0.6, 'ASO': -0.5, 'SON': -0.4, 'OND': -0.4, 'NDJ': -0.4},
    1957: {'DJF': -0.2, 'JFM': 0.1, 'FMA': 0.4, 'MAM': 0.7, 'AMJ': 0.9, 'MJJ': 1.1, 'JJA': 1.3, 'JAS': 1.3, 'ASO': 1.3, 'SON': 1.4, 'OND': 1.5, 'NDJ': 1.7},
    1958: {'DJF': 1.8, 'JFM': 1.7, 'FMA': 1.3, 'MAM': 0.9, 'AMJ': 0.7, 'MJJ': 0.6, 'JJA': 0.6, 'JAS': 0.4, 'ASO': 0.4, 'SON': 0.4, 'OND': 0.5, 'NDJ': 0.6},
    1959: {'DJF': 0.6, 'JFM': 0.6, 'FMA': 0.5, 'MAM': 0.3, 'AMJ': 0.2, 'MJJ': -0.1, 'JJA': -0.2, 'JAS': -0.3, 'ASO': -0.1, 'SON': 0.0, 'OND': 0.0, 'NDJ': 0.0},
    1960: {'DJF': -0.1, 'JFM': -0.1, 'FMA': -0.1, 'MAM': 0.0, 'AMJ': 0.0, 'MJJ': 0.0, 'JJA': 0.1, 'JAS': 0.2, 'ASO': 0.3, 'SON': 0.2, 'OND': 0.1, 'NDJ': 0.1},
    1961: {'DJF': 0.0, 'JFM': 0.0, 'FMA': 0.0, 'MAM': 0.1, 'AMJ': 0.2, 'MJJ': 0.3, 'JJA': 0.1, 'JAS': -0.1, 'ASO': -0.3, 'SON': -0.3, 'OND': -0.2, 'NDJ': -0.2},
    1962: {'DJF': -0.2, 'JFM': -0.2, 'FMA': -0.2, 'MAM': -0.3, 'AMJ': -0.3, 'MJJ': -0.2, 'JJA': 0.0, 'JAS': -0.1, 'ASO': -0.1, 'SON': -0.2, 'OND': -0.3, 'NDJ': -0.4},
    1963: {'DJF': -0.4, 'JFM': -0.2, 'FMA': 0.2, 'MAM': 0.3, 'AMJ': 0.3, 'MJJ': 0.5, 'JJA': 0.9, 'JAS': 1.1, 'ASO': 1.2, 'SON': 1.3, 'OND': 1.4, 'NDJ': 1.3},
    1964: {'DJF': 1.1, 'JFM': 0.6, 'FMA': 0.1, 'MAM': -0.3, 'AMJ': -0.6, 'MJJ': -0.6, 'JJA': -0.6, 'JAS': -0.7, 'ASO': -0.8, 'SON': -0.8, 'OND': -0.8, 'NDJ': -0.8},
    1965: {'DJF': -0.6, 'JFM': -0.3, 'FMA': -0.1, 'MAM': 0.2, 'AMJ': 0.5, 'MJJ': 0.8, 'JJA': 1.2, 'JAS': 1.5, 'ASO': 1.9, 'SON': 2.0, 'OND': 2.0, 'NDJ': 1.7},
    1966: {'DJF': 1.4, 'JFM': 1.2, 'FMA': 1.0, 'MAM': 0.7, 'AMJ': 0.4, 'MJJ': 0.2, 'JJA': 0.2, 'JAS': 0.1, 'ASO': -0.1, 'SON': -0.1, 'OND': -0.2, 'NDJ': -0.3},
    1967: {'DJF': -0.4, 'JFM': -0.5, 'FMA': -0.5, 'MAM': -0.4, 'AMJ': -0.2, 'MJJ': 0.0, 'JJA': 0.0, 'JAS': -0.2, 'ASO': -0.3, 'SON': -0.4, 'OND': -0.3, 'NDJ': -0.4},
    1968: {'DJF': -0.6, 'JFM': -0.7, 'FMA': -0.6, 'MAM': -0.4, 'AMJ': 0.0, 'MJJ': 0.3, 'JJA': 0.6, 'JAS': 0.5, 'ASO': 0.4, 'SON': 0.5, 'OND': 0.7, 'NDJ': 1.0},
    1969: {'DJF': 1.1, 'JFM': 1.1, 'FMA': 0.9, 'MAM': 0.8, 'AMJ': 0.6, 'MJJ': 0.4, 'JJA': 0.4, 'JAS': 0.5, 'ASO': 0.8, 'SON': 0.9, 'OND': 0.8, 'NDJ': 0.6},
    1970: {'DJF': 0.5, 'JFM': 0.3, 'FMA': 0.3, 'MAM': 0.2, 'AMJ': 0.0, 'MJJ': -0.3, 'JJA': -0.6, 'JAS': -0.8, 'ASO': -0.8, 'SON': -0.7, 'OND': -0.9, 'NDJ': -1.1},
    1971: {'DJF': -1.4, 'JFM': -1.4, 'FMA': -1.1, 'MAM': -0.8, 'AMJ': -0.7, 'MJJ': -0.7, 'JJA': -0.8, 'JAS': -0.8, 'ASO': -0.8, 'SON': -0.9, 'OND': -1.0, 'NDJ': -0.9},
    1972: {'DJF': -0.7, 'JFM': -0.4, 'FMA': 0.1, 'MAM': 0.4, 'AMJ': 0.7, 'MJJ': 0.9, 'JJA': 1.1, 'JAS': 1.4, 'ASO': 1.6, 'SON': 1.8, 'OND': 2.1, 'NDJ': 2.1},
    1973: {'DJF': 1.8, 'JFM': 1.2, 'FMA': 0.5, 'MAM': -0.1, 'AMJ': -0.5, 'MJJ': -0.9, 'JJA': -1.1, 'JAS': -1.3, 'ASO': -1.5, 'SON': -1.7, 'OND': -1.9, 'NDJ': -2.0},
    1974: {'DJF': -1.8, 'JFM': -1.6, 'FMA': -1.2, 'MAM': -1.0, 'AMJ': -0.9, 'MJJ': -0.8, 'JJA': -0.5, 'JAS': -0.4, 'ASO': -0.4, 'SON': -0.6, 'OND': -0.8, 'NDJ': -0.6},
    1975: {'DJF': -0.5, 'JFM': -0.6, 'FMA': -0.7, 'MAM': -0.7, 'AMJ': -0.8, 'MJJ': -1.0, 'JJA': -1.1, 'JAS': -1.2, 'ASO': -1.4, 'SON': -1.4, 'OND': -1.6, 'NDJ': -1.7},
    1976: {'DJF': -1.6, 'JFM': -1.2, 'FMA': -0.7, 'MAM': -0.5, 'AMJ': -0.3, 'MJJ': 0.0, 'JJA': 0.2, 'JAS': 0.4, 'ASO': 0.6, 'SON': 0.8, 'OND': 0.9, 'NDJ': 0.8},
    1977: {'DJF': 0.7, 'JFM': 0.6, 'FMA': 0.3, 'MAM': 0.2, 'AMJ': 0.2, 'MJJ': 0.3, 'JJA': 0.4, 'JAS': 0.4, 'ASO': 0.6, 'SON': 0.7, 'OND': 0.8, 'NDJ': 0.8},
    1978: {'DJF': 0.7, 'JFM': 0.4, 'FMA': 0.1, 'MAM': -0.2, 'AMJ': -0.3, 'MJJ': -0.3, 'JJA': -0.4, 'JAS': -0.4, 'ASO': -0.4, 'SON': -0.3, 'OND': -0.1, 'NDJ': 0.0},
    1979: {'DJF': 0.0, 'JFM': 0.1, 'FMA': 0.2, 'MAM': 0.3, 'AMJ': 0.2, 'MJJ': 0.0, 'JJA': 0.0, 'JAS': 0.2, 'ASO': 0.3, 'SON': 0.5, 'OND': 0.5, 'NDJ': 0.6},
    1980: {'DJF': 0.6, 'JFM': 0.5, 'FMA': 0.3, 'MAM': 0.4, 'AMJ': 0.5, 'MJJ': 0.5, 'JJA': 0.3, 'JAS': 0.0, 'ASO': -0.1, 'SON': 0.0, 'OND': 0.1, 'NDJ': 0.0},
    1981: {'DJF': -0.3, 'JFM': -0.5, 'FMA': -0.5, 'MAM': -0.4, 'AMJ': -0.3, 'MJJ': -0.3, 'JJA': -0.3, 'JAS': -0.2, 'ASO': -0.2, 'SON': -0.1, 'OND': -0.2, 'NDJ': -0.1},
    1982: {'DJF': 0.0, 'JFM': 0.1, 'FMA': 0.2, 'MAM': 0.5, 'AMJ': 0.7, 'MJJ': 0.7, 'JJA': 0.8, 'JAS': 1.1, 'ASO': 1.6, 'SON': 2.0, 'OND': 2.2, 'NDJ': 2.2},
    1983: {'DJF': 2.2, 'JFM': 1.9, 'FMA': 1.5, 'MAM': 1.3, 'AMJ': 1.1, 'MJJ': 0.7, 'JJA': 0.3, 'JAS': -0.1, 'ASO': -0.5, 'SON': -0.8, 'OND': -1.0, 'NDJ': -0.9},
    1984: {'DJF': -0.6, 'JFM': -0.4, 'FMA': -0.3, 'MAM': -0.4, 'AMJ': -0.5, 'MJJ': -0.4, 'JJA': -0.3, 'JAS': -0.2, 'ASO': -0.2, 'SON': -0.6, 'OND': -0.9, 'NDJ': -1.1},
    1985: {'DJF': -1.0, 'JFM': -0.8, 'FMA': -0.8, 'MAM': -0.8, 'AMJ': -0.8, 'MJJ': -0.6, 'JJA': -0.5, 'JAS': -0.5, 'ASO': -0.4, 'SON': -0.3, 'OND': -0.3, 'NDJ': -0.4},
    1986: {'DJF': -0.5, 'JFM': -0.5, 'FMA': -0.3, 'MAM': -0.2, 'AMJ': -0.1, 'MJJ': 0.0, 'JJA': 0.2, 'JAS': 0.4, 'ASO': 0.7, 'SON': 0.9, 'OND': 1.1, 'NDJ': 1.2},
    1987: {'DJF': 1.2, 'JFM': 1.2, 'FMA': 1.1, 'MAM': 0.9, 'AMJ': 1.0, 'MJJ': 1.2, 'JJA': 1.5, 'JAS': 1.7, 'ASO': 1.6, 'SON': 1.5, 'OND': 1.3, 'NDJ': 1.1},
    1988: {'DJF': 0.8, 'JFM': 0.5, 'FMA': 0.1, 'MAM': -0.3, 'AMJ': -0.9, 'MJJ': -1.3, 'JJA': -1.3, 'JAS': -1.1, 'ASO': -1.2, 'SON': -1.5, 'OND': -1.8, 'NDJ': -1.8},
    1989: {'DJF': -1.7, 'JFM': -1.4, 'FMA': -1.1, 'MAM': -0.8, 'AMJ': -0.6, 'MJJ': -0.4, 'JJA': -0.3, 'JAS': -0.3, 'ASO': -0.2, 'SON': -0.2, 'OND': -0.2, 'NDJ': -0.1},
    1990: {'DJF': 0.1, 'JFM': 0.2, 'FMA': 0.3, 'MAM': 0.3, 'AMJ': 0.3, 'MJJ': 0.3, 'JJA': 0.3, 'JAS': 0.4, 'ASO': 0.4, 'SON': 0.3, 'OND': 0.4, 'NDJ': 0.4},
    1991: {'DJF': 0.4, 'JFM': 0.3, 'FMA': 0.2, 'MAM': 0.3, 'AMJ': 0.5, 'MJJ': 0.6, 'JJA': 0.7, 'JAS': 0.6, 'ASO': 0.6, 'SON': 0.8, 'OND': 1.2, 'NDJ': 1.5},
    1992: {'DJF': 1.7, 'JFM': 1.6, 'FMA': 1.5, 'MAM': 1.3, 'AMJ': 1.1, 'MJJ': 0.7, 'JJA': 0.4, 'JAS': 0.1, 'ASO': -0.1, 'SON': -0.2, 'OND': -0.3, 'NDJ': -0.1},
    1993: {'DJF': 0.1, 'JFM': 0.3, 'FMA': 0.5, 'MAM': 0.7, 'AMJ': 0.7, 'MJJ': 0.6, 'JJA': 0.3, 'JAS': 0.3, 'ASO': 0.2, 'SON': 0.1, 'OND': 0.0, 'NDJ': 0.1},
    1994: {'DJF': 0.1, 'JFM': 0.1, 'FMA': 0.2, 'MAM': 0.3, 'AMJ': 0.4, 'MJJ': 0.4, 'JJA': 0.4, 'JAS': 0.4, 'ASO': 0.6, 'SON': 0.7, 'OND': 1.0, 'NDJ': 1.1},
    1995: {'DJF': 1.0, 'JFM': 0.7, 'FMA': 0.5, 'MAM': 0.3, 'AMJ': 0.1, 'MJJ': 0.0, 'JJA': -0.2, 'JAS': -0.5, 'ASO': -0.8, 'SON': -1.0, 'OND': -1.0, 'NDJ': -1.0},
    1996: {'DJF': -0.9, 'JFM': -0.8, 'FMA': -0.6, 'MAM': -0.4, 'AMJ': -0.3, 'MJJ': -0.3, 'JJA': -0.3, 'JAS': -0.3, 'ASO': -0.4, 'SON': -0.4, 'OND': -0.4, 'NDJ': -0.5},
    1997: {'DJF': -0.5, 'JFM': -0.4, 'FMA': -0.1, 'MAM': 0.3, 'AMJ': 0.8, 'MJJ': 1.2, 'JJA': 1.6, 'JAS': 1.9, 'ASO': 2.1, 'SON': 2.3, 'OND': 2.4, 'NDJ': 2.4},
    1998: {'DJF': 2.2, 'JFM': 1.9, 'FMA': 1.4, 'MAM': 1.0, 'AMJ': 0.5, 'MJJ': -0.1, 'JJA': -0.8, 'JAS': -1.1, 'ASO': -1.3, 'SON': -1.4, 'OND': -1.5, 'NDJ': -1.6},
    1999: {'DJF': -1.5, 'JFM': -1.3, 'FMA': -1.1, 'MAM': -1.0, 'AMJ': -1.0, 'MJJ': -1.0, 'JJA': -1.1, 'JAS': -1.1, 'ASO': -1.2, 'SON': -1.3, 'OND': -1.5, 'NDJ': -1.7},
    2000: {'DJF': -1.7, 'JFM': -1.4, 'FMA': -1.1, 'MAM': -0.8, 'AMJ': -0.7, 'MJJ': -0.6, 'JJA': -0.6, 'JAS': -0.5, 'ASO': -0.5, 'SON': -0.6, 'OND': -0.7, 'NDJ': -0.7},
    2001: {'DJF': -0.7, 'JFM': -0.5, 'FMA': -0.4, 'MAM': -0.3, 'AMJ': -0.3, 'MJJ': -0.1, 'JJA': -0.1, 'JAS': -0.1, 'ASO': -0.2, 'SON': -0.3, 'OND': -0.3, 'NDJ': -0.3},
    2002: {'DJF': -0.1, 'JFM': 0.0, 'FMA': 0.1, 'MAM': 0.2, 'AMJ': 0.4, 'MJJ': 0.7, 'JJA': 0.8, 'JAS': 0.9, 'ASO': 1.0, 'SON': 1.2, 'OND': 1.3, 'NDJ': 1.1},
    2003: {'DJF': 0.9, 'JFM': 0.6, 'FMA': 0.4, 'MAM': 0.0, 'AMJ': -0.3, 'MJJ': -0.2, 'JJA': 0.1, 'JAS': 0.2, 'ASO': 0.3, 'SON': 0.3, 'OND': 0.4, 'NDJ': 0.4},
    2004: {'DJF': 0.4, 'JFM': 0.3, 'FMA': 0.2, 'MAM': 0.2, 'AMJ': 0.2, 'MJJ': 0.3, 'JJA': 0.5, 'JAS': 0.6, 'ASO': 0.7, 'SON': 0.7, 'OND': 0.7, 'NDJ': 0.7},
    2005: {'DJF': 0.6, 'JFM': 0.6, 'FMA': 0.4, 'MAM': 0.4, 'AMJ': 0.3, 'MJJ': 0.1, 'JJA': -0.1, 'JAS': -0.1, 'ASO': -0.1, 'SON': -0.3, 'OND': -0.6, 'NDJ': -0.8},
    2006: {'DJF': -0.9, 'JFM': -0.8, 'FMA': -0.6, 'MAM': -0.4, 'AMJ': -0.1, 'MJJ': 0.0, 'JJA': 0.1, 'JAS': 0.3, 'ASO': 0.5, 'SON': 0.8, 'OND': 0.9, 'NDJ': 0.9},
    2007: {'DJF': 0.7, 'JFM': 0.2, 'FMA': -0.1, 'MAM': -0.3, 'AMJ': -0.4, 'MJJ': -0.5, 'JJA': -0.6, 'JAS': -0.8, 'ASO': -1.1, 'SON': -1.3, 'OND': -1.5, 'NDJ': -1.6},
    2008: {'DJF': -1.6, 'JFM': -1.5, 'FMA': -1.3, 'MAM': -1.0, 'AMJ': -0.8, 'MJJ': -0.6, 'JJA': -0.4, 'JAS': -0.2, 'ASO': -0.2, 'SON': -0.4, 'OND': -0.6, 'NDJ': -0.7},
    2009: {'DJF': -0.8, 'JFM': -0.8, 'FMA': -0.6, 'MAM': -0.3, 'AMJ': 0.0, 'MJJ': 0.3, 'JJA': 0.5, 'JAS': 0.6, 'ASO': 0.7, 'SON': 1.0, 'OND': 1.4, 'NDJ': 1.6},
    2010: {'DJF': 1.5, 'JFM': 1.2, 'FMA': 0.8, 'MAM': 0.4, 'AMJ': -0.2, 'MJJ': -0.7, 'JJA': -1.0, 'JAS': -1.3, 'ASO': -1.6, 'SON': -1.6, 'OND': -1.6, 'NDJ': -1.6},
    2011: {'DJF': -1.4, 'JFM': -1.2, 'FMA': -0.9, 'MAM': -0.7, 'AMJ': -0.6, 'MJJ': -0.4, 'JJA': -0.5, 'JAS': -0.6, 'ASO': -0.8, 'SON': -1.0, 'OND': -1.1, 'NDJ': -1.0},
    2012: {'DJF': -0.9, 'JFM': -0.7, 'FMA': -0.6, 'MAM': -0.5, 'AMJ': -0.3, 'MJJ': 0.0, 'JJA': 0.2, 'JAS': 0.4, 'ASO': 0.4, 'SON': 0.3, 'OND': 0.1, 'NDJ': -0.2},
    2013: {'DJF': -0.4, 'JFM': -0.4, 'FMA': -0.3, 'MAM': -0.3, 'AMJ': -0.4, 'MJJ': -0.4, 'JJA': -0.4, 'JAS': -0.3, 'ASO': -0.3, 'SON': -0.2, 'OND': -0.2, 'NDJ': -0.3},
    2014: {'DJF': -0.4, 'JFM': -0.5, 'FMA': -0.3, 'MAM': 0.0, 'AMJ': 0.2, 'MJJ': 0.2, 'JJA': 0.0, 'JAS': 0.1, 'ASO': 0.2, 'SON': 0.5, 'OND': 0.6, 'NDJ': 0.7},
    2015: {'DJF': 0.5, 'JFM': 0.5, 'FMA': 0.5, 'MAM': 0.7, 'AMJ': 0.9, 'MJJ': 1.2, 'JJA': 1.5, 'JAS': 1.9, 'ASO': 2.2, 'SON': 2.4, 'OND': 2.6, 'NDJ': 2.6},
    2016: {'DJF': 2.5, 'JFM': 2.1, 'FMA': 1.6, 'MAM': 0.9, 'AMJ': 0.4, 'MJJ': -0.1, 'JJA': -0.4, 'JAS': -0.5, 'ASO': -0.6, 'SON': -0.7, 'OND': -0.7, 'NDJ': -0.6},
    2017: {'DJF': -0.3, 'JFM': -0.2, 'FMA': 0.1, 'MAM': 0.2, 'AMJ': 0.3, 'MJJ': 0.3, 'JJA': 0.1, 'JAS': -0.1, 'ASO': -0.4, 'SON': -0.7, 'OND': -0.8, 'NDJ': -1.0},
    2018: {'DJF': -0.9, 'JFM': -0.9, 'FMA': -0.7, 'MAM': -0.5, 'AMJ': -0.2, 'MJJ': 0.0, 'JJA': 0.1, 'JAS': 0.2, 'ASO': 0.5, 'SON': 0.8, 'OND': 0.9, 'NDJ': 0.8},
    2019: {'DJF': 0.7, 'JFM': 0.7, 'FMA': 0.7, 'MAM': 0.7, 'AMJ': 0.5, 'MJJ': 0.5, 'JJA': 0.3, 'JAS': 0.1, 'ASO': 0.2, 'SON': 0.3, 'OND': 0.5, 'NDJ': 0.5},
    2020: {'DJF': 0.5, 'JFM': 0.5, 'FMA': 0.4, 'MAM': 0.2, 'AMJ': -0.1, 'MJJ': -0.3, 'JJA': -0.4, 'JAS': -0.6, 'ASO': -0.9, 'SON': -1.2, 'OND': -1.3, 'NDJ': -1.2},
    2021: {'DJF': -1.0, 'JFM': -0.9, 'FMA': -0.8, 'MAM': -0.7, 'AMJ': -0.5, 'MJJ': -0.4, 'JJA': -0.4, 'JAS': -0.5, 'ASO': -0.7, 'SON': -0.8, 'OND': -1.0, 'NDJ': -1.0},
    2022: {'DJF': -1.0, 'JFM': -0.9, 'FMA': -1.0, 'MAM': -1.1, 'AMJ': -1.0, 'MJJ': -0.9, 'JJA': -0.8, 'JAS': -0.9, 'ASO': -1.0, 'SON': -1.0, 'OND': -0.9, 'NDJ': -0.8},
    2023: {'DJF': -0.7, 'JFM': -0.4, 'FMA': -0.1, 'MAM': 0.2, 'AMJ': 0.5, 'MJJ': 0.8, 'JJA': 1.1, 'JAS': 1.3, 'ASO': 1.6, 'SON': 1.8, 'OND': 1.9, 'NDJ': 2.0},
    2024: {'DJF': 1.8, 'JFM': 1.5, 'FMA': 1.1, 'MAM': 0.7, 'AMJ': 0.4, 'MJJ': 0.2, 'JJA': 0.0, 'JAS': -0.1, 'ASO': -0.2, 'SON': -0.3, 'OND': -0.4, 'NDJ': -0.5},
    2025: {'DJF': -0.6, 'JFM': -0.4, 'FMA': -0.2, 'MAM': -0.1, 'AMJ': -0.1, 'MJJ': -0.1}
}

def create_daily_oni_dataframe():
    """
    Create a daily DataFrame with ONI values repeated for each day of the month.
    """
    daily_data = []
    
    for year, year_data in oni_data.items():
        for period, oni_value in year_data.items():
            if oni_value is None or pd.isna(oni_value):
                continue
                
            month = period_to_month[period]
            
            # Get number of days in the month
            if month == 2 and calendar.isleap(year):
                days_in_month = 29
            else:
                days_in_month = calendar.monthrange(year, month)[1]
            
            # Create daily entries for this month
            for day in range(1, days_in_month + 1):
                date_str = f"{year}-{month:02d}-{day:02d}"
                daily_data.append({
                    'Date': date_str,
                    'NINO': oni_value
                })
    
    return pd.DataFrame(daily_data)

# Create the DataFrame
df = create_daily_oni_dataframe()

# Save to CSV if needed
#df.to_csv('oni_daily_data.csv', index=False)

## Reindex satellite datasets to match chl_ds

In [None]:
def reindex_satellite_ds(ds, ref_ds, dims=('latitude', 'longitude'), method_main='linear', method_fallback='nearest'):
    """
    Reindex all variables in a satellite dataset to match the lat/lon grid of a reference dataset.
    
    Parameters:
        ds (xr.Dataset): The dataset to reindex.
        ref_ds (xr.Dataset): The reference dataset (e.g., chlorophyll) for target grid.
        dims (tuple): Coordinate dimensions to align ('latitude', 'longitude' by default).
        method_main (str): Primary interpolation method (e.g., 'linear').
        method_fallback (str): Fallback method where primary interpolation gives NaN.
        
    Returns:
        xr.Dataset: Interpolated dataset with the same variables as the input ds.
    """
    
    target_lat = ref_ds[dims[0]]
    target_lon = ref_ds[dims[1]]
    
    regridded_vars = {}
    for var_name, var_data in ds.data_vars.items():
        try:
            interp_main = var_data.interp_like(ref_ds, method=method_main)
            interp_fallback = var_data.interp_like(ref_ds, method=method_fallback)
            regridded_vars[var_name] = interp_main.fillna(interp_fallback)
        except Exception as e:
            print(f"Skipping variable '{var_name}' due to error: {e}")
    
    return xr.Dataset(regridded_vars)

In [None]:
# Load satellite data (CHL, PAR, SST)
chl_ds = xr.open_dataset('cmems satellite data original grid/cmems_obs-oc_glo_bgc-plankton_my_l3-multi-4km_P1D_CHL_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc')
par_ds = xr.open_dataset('cmems satellite data original grid/PAR_globcolour_4km_daily_merged_L3m.nc')
sst_ds = xr.open_dataset('cmems satellite data original grid/cmems_obs-sst_glo_phy_my_l3s_P1D-m_multi-vars_126.95W-109.05W_22.05N-40.95N_1997-09-04-2020-01-25.nc')
sst_ds = sst_ds[['adjusted_sea_surface_temperature']].rename({'adjusted_sea_surface_temperature': 'SST'})
kd490_ds = xr.open_dataset('cmems satellite data original grid/cmems_obs-oc_glo_bgc-transp_my_l3-multi-4km_P1D_KD490_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc')
C_ds = xr.open_dataset('cmems satellite data original grid/cmems_obs-oc_glo_bgc-optics_my_l3-multi-4km_P1D_BBP_and_phyto_C_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc')
mld_ds = xr.open_dataset('cmems satellite data original grid/cmems_mod_glo_phy_my_0.083deg_P1D-m_mlotst_127.00W-109.08W_22.08N-41.00N_1997-09-04-2020-01-25.nc')
mld_ds = mld_ds.rename({'mlotst': 'MLD'})
chlc_ds = xr.open_dataset('cmems satellite data original grid/cmems_obs-oc_glo_bgc-optics_my_l3-multi-4km_P1D_CHLC_ratio_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc')
cdm_ds = xr.open_dataset('cmems satellite data original grid/cmems_obs-oc_glo_bgc-optics_my_l3-multi-4km_P1D_CDM_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc')
spm_ds = xr.open_dataset('cmems satellite data original grid/cmems_obs-oc_glo_bgc-transp_my_l3-multi-4km_P1D_SPM_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc')
phyt_ds = xr.open_dataset('cmems_obs-oc_glo_bgc-plankton_my_l3-multi-4km_P1D_multi-vars_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc')
wind_ds = xr.open_dataset('temporary_wind_ds.nc')
no3_ds = xr.open_dataset('cmems satellite data original grid/cmems_mod_glo_bgc_my_0.25deg_P1D-m_no3_127.00W-109.25W_22.25N-41.00N_0.51m_1997-09-04-2020-01-25.nc')
rrs_ds = xr.open_dataset('cmems satellite data original grid/cmems_obs-oc_glo_bgc-reflectance_my_l3-multi-4km_P1D_multi-vars_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc')
zsd_ds = xr.open_dataset('cmems satellite data original grid/cmems_obs-oc_glo_bgc-transp_my_l3-multi-4km_P1D_ZSD_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc')



In [None]:
# Reindex data like chl
# print('reindexing par...')
# par_ds_interp = reindex_satellite_ds(par_ds, chl_ds)
# print('reindexing sst...')
# sst_ds_interp = reindex_satellite_ds(sst_ds, chl_ds)
# print('reindexing kd490...')
# kd490_ds_interp = reindex_satellite_ds(kd490_ds, chl_ds)
# print('reindexing C...')
# C_ds_interp = reindex_satellite_ds(C_ds, chl_ds)
# print('reindexing mld...')
# mld_ds_interp = reindex_satellite_ds(mld_ds, chl_ds)
# print('reindexing CHL:C...')
# chlc_ds_interp = reindex_satellite_ds(chlc_ds, chl_ds)
# print('reindexing CDM...')
# cdm_ds_interp = reindex_satellite_ds(cdm_ds, chl_ds)
# print('reindexing SPM...')
# spm_ds_interp = reindex_satellite_ds(spm_ds, chl_ds)
# print('reindexing PHYT...')
# phyt_ds_interp = reindex_satellite_ds(phyt_ds, chl_ds)
# print('reindexing wind...')
# wind_ds_interp = reindex_satellite_ds(wind_ds, chl_ds)
# wind_ds_interp = wind_ds_interp.rename({'valid_time': 'time'})
# wind_ds_interp = wind_ds_interp.drop_vars('number')
# print('reindexing no3...')
# no3_ds_interp = reindex_satellite_ds(no3_ds, chl_ds)
# print('reindexing rrs...')
# rrs_ds_interp = reindex_satellite_ds(rrs_ds, chl_ds)
print('reindexing zsd...')
zsd_ds_interp = reindex_satellite_ds(zsd_ds, chl_ds)

In [None]:
# Create an encoding dictionary to compress all variables
encoding = {var: {"zlib": True, "complevel": 5} for var in zsd_ds_interp.data_vars}
output_filename = (
    'reindexed_cmems_obs-oc_glo_bgc-transp_my_l3-multi-4km_P1D_ZSD_126.98W-109.02W_22.02N-40.98N_1997-09-04-2020-01-25.nc'
)
print("Saving ZSD NetCDF...")
zsd_ds_interp.to_netcdf(
    output_filename,
    encoding=encoding,
    engine="netcdf4",
    compute=True,
)