# Interpolate ice concentration onto GLSEA/GLSEA3 grid

In [1]:
import xarray as xr
import numpy as np
from deepsensor_greatlakes.utils import standardize_dates

In [2]:
# Path to the files on U-M HPC
ice_concentration_path = '/nfs/turbo/seas-dannes/SST-sensor-placement-input/NSIDC/ice_concentration.zarr'
processed_path = '/nfs/turbo/seas-dannes/SST-sensor-placement-input/NSIDC/ice_concentration_interp_on_glsea_grid.zarr'
glsea_path = '/nfs/turbo/seas-dannes/SST-sensor-placement-input/GLSEA_combined.zarr'

In [3]:
# 1. Load the raw datasets using native on-disk chunking
# Dask's lazy loading will handle the large datasets without memory issues.

# GLSEA
glsea_raw = xr.open_zarr(glsea_path)
glsea_raw = standardize_dates(glsea_raw)
print(glsea_raw)

<xarray.Dataset> Size: 40GB
Dimensions:  (time: 10221, lat: 838, lon: 1181)
Coordinates:
  * lat      (lat) float64 7kB 38.87 38.89 38.9 38.92 ... 50.58 50.59 50.61
  * lon      (lon) float64 9kB -92.42 -92.41 -92.39 ... -75.91 -75.9 -75.88
  * time     (time) datetime64[s] 82kB 1995-01-01 1995-01-02 ... 2022-12-31
Data variables:
    crs      (time) |S1 10kB dask.array<chunksize=(366,), meta=np.ndarray>
    sst      (time, lat, lon) float32 40GB dask.array<chunksize=(366, 200, 200), meta=np.ndarray>
Attributes:
    CDI:          Climate Data Interface version 2.0.5 (https://mpimet.mpg.de...
    CDO:          Climate Data Operators version 2.0.5 (https://mpimet.mpg.de...
    Conventions:  CF-1.5
    GDAL:         GDAL 2.1.3, released 2017/20/01
    history:      Tue Feb 13 20:15:43 2024: cdo selyear,1995 GLSEA_SST_1995_2...
    source:       1995_001_glsea.asc
    title:        SST from Great Lakes Surface Environmental Analysis (GLSEA)


In [4]:
# Ice concentration
ice_concentration_raw = xr.open_zarr(ice_concentration_path)
ice_concentration_raw = standardize_dates(ice_concentration_raw)
print(ice_concentration_raw)

<xarray.Dataset> Size: 155GB
Dimensions:            (time: 18434, lat: 1024, lon: 1024)
Coordinates:
  * lat                (lat) float64 8kB 50.6 50.59 50.58 ... 38.9 38.89 38.87
  * lon                (lon) float64 8kB -92.41 -92.39 -92.38 ... -75.89 -75.87
  * time               (time) datetime64[s] 147kB 1972-12-01 ... 2023-05-21
Data variables:
    ice_concentration  (time, lat, lon) float64 155GB dask.array<chunksize=(366, 200, 200), meta=np.ndarray>
Attributes: (12/23)
    coverage_area:            Great Lakes
    data_source:              NOAA
    description:              Great Lakes ice concentrations
    disclaimer:               Data collected and processed by NOAA and dissem...
    dissemination:            USNIC Website, CIS Website
    grid_resolution:          1.800 km
    ...                       ...
    product:                  GRID - Resolution 1800
    source:                   NAIS daily Great Lakes ice analysis
    source_url:               https://noaadata.apps

In [5]:
# Replace -1 (land value) with NaN to ensure it's not treated as data
ice_concentration_raw = ice_concentration_raw.where(ice_concentration_raw != -1, np.nan)
print(ice_concentration_raw)

<xarray.Dataset> Size: 155GB
Dimensions:            (time: 18434, lat: 1024, lon: 1024)
Coordinates:
  * lat                (lat) float64 8kB 50.6 50.59 50.58 ... 38.9 38.89 38.87
  * lon                (lon) float64 8kB -92.41 -92.39 -92.38 ... -75.89 -75.87
  * time               (time) datetime64[s] 147kB 1972-12-01 ... 2023-05-21
Data variables:
    ice_concentration  (time, lat, lon) float64 155GB dask.array<chunksize=(366, 200, 200), meta=np.ndarray>
Attributes: (12/23)
    coverage_area:            Great Lakes
    data_source:              NOAA
    description:              Great Lakes ice concentrations
    disclaimer:               Data collected and processed by NOAA and dissem...
    dissemination:            USNIC Website, CIS Website
    grid_resolution:          1.800 km
    ...                       ...
    product:                  GRID - Resolution 1800
    source:                   NAIS daily Great Lakes ice analysis
    source_url:               https://noaadata.apps

In [6]:
# 2. Drop dates from the ice concentration dataset that are before the GLSEA start date.
# We're using a simple slice to keep only the overlapping time range.
print("Slicing ice concentration data to match GLSEA time range...")
start_time = glsea_raw.time.min().values
end_time = glsea_raw.time.max().values
ice_concentration_sliced = ice_concentration_raw.sel(time=slice(start_time, end_time))

print(ice_concentration_sliced)

Slicing ice concentration data to match GLSEA time range...
<xarray.Dataset> Size: 86GB
Dimensions:            (time: 10227, lat: 1024, lon: 1024)
Coordinates:
  * lat                (lat) float64 8kB 50.6 50.59 50.58 ... 38.9 38.89 38.87
  * lon                (lon) float64 8kB -92.41 -92.39 -92.38 ... -75.89 -75.87
  * time               (time) datetime64[s] 82kB 1995-01-01 ... 2022-12-31
Data variables:
    ice_concentration  (time, lat, lon) float64 86GB dask.array<chunksize=(352, 200, 200), meta=np.ndarray>
Attributes: (12/23)
    coverage_area:            Great Lakes
    data_source:              NOAA
    description:              Great Lakes ice concentrations
    disclaimer:               Data collected and processed by NOAA and dissem...
    dissemination:            USNIC Website, CIS Website
    grid_resolution:          1.800 km
    ...                       ...
    product:                  GRID - Resolution 1800
    source:                   NAIS daily Great Lakes ice ana

In [7]:
# 3. Put the ice data onto the GLSEA grid
# We use `.interp_like()` to align the ice data with the GLSEA grid.
# The 'nearest' method finds the closest point, which is efficient and suitable here.
ice_concentration_interp = ice_concentration_sliced.interp_like(
    glsea_raw, 
    method="nearest"
)

print(ice_concentration_interp)

<xarray.Dataset> Size: 81GB
Dimensions:            (time: 10221, lat: 838, lon: 1181)
Coordinates:
  * time               (time) datetime64[s] 82kB 1995-01-01 ... 2022-12-31
  * lat                (lat) float64 7kB 38.87 38.89 38.9 ... 50.58 50.59 50.61
  * lon                (lon) float64 9kB -92.42 -92.41 -92.39 ... -75.9 -75.88
Data variables:
    ice_concentration  (time, lat, lon) float64 81GB dask.array<chunksize=(105, 60, 1181), meta=np.ndarray>
Attributes: (12/23)
    coverage_area:            Great Lakes
    data_source:              NOAA
    description:              Great Lakes ice concentrations
    disclaimer:               Data collected and processed by NOAA and dissem...
    dissemination:            USNIC Website, CIS Website
    grid_resolution:          1.800 km
    ...                       ...
    product:                  GRID - Resolution 1800
    source:                   NAIS daily Great Lakes ice analysis
    source_url:               https://noaadata.apps.nsi

In [8]:
# 4. Perform the scaling (normalization), rechunk, and write to a new Zarr store
# The `.chunk('auto')` call is crucial to reorganize the data into a Zarr-compatible
# chunking scheme, which resolves the ValueError.
ice_concentration_final = (ice_concentration_interp / 100.0).clip(min=0.0, max=1.0).astype('float32').chunk({'time': 366, 'lat': 200, 'lon': 200})

print(ice_concentration_final)

<xarray.Dataset> Size: 40GB
Dimensions:            (time: 10221, lat: 838, lon: 1181)
Coordinates:
  * time               (time) datetime64[s] 82kB 1995-01-01 ... 2022-12-31
  * lat                (lat) float64 7kB 38.87 38.89 38.9 ... 50.58 50.59 50.61
  * lon                (lon) float64 9kB -92.42 -92.41 -92.39 ... -75.9 -75.88
Data variables:
    ice_concentration  (time, lat, lon) float32 40GB dask.array<chunksize=(366, 200, 200), meta=np.ndarray>
Attributes: (12/23)
    coverage_area:            Great Lakes
    data_source:              NOAA
    description:              Great Lakes ice concentrations
    disclaimer:               Data collected and processed by NOAA and dissem...
    dissemination:            USNIC Website, CIS Website
    grid_resolution:          1.800 km
    ...                       ...
    product:                  GRID - Resolution 1800
    source:                   NAIS daily Great Lakes ice analysis
    source_url:               https://noaadata.apps.nsi

In [9]:
# 5. Update the dataset's metadata (attributes) to reflect the new data
print("Updating metadata...")
ice_concentration_final.attrs['title'] = 'Great Lakes Ice Concentration Interpolated to GLSEA Grid'
ice_concentration_final.attrs['time_range'] = f'{ice_concentration_final.time.min().values} to {ice_concentration_final.time.max().values}'
ice_concentration_final.attrs['units'] = 'Ice concentration (0.0 - 1.0)'

Updating metadata...


In [10]:
print("Starting to process and save the final dataset. This will take some time...")

ice_concentration_final.to_zarr(processed_path, mode="w", compute=True)

Starting to process and save the final dataset. This will take some time...


<xarray.backends.zarr.ZarrStore at 0x14e15899fe20>

In [11]:
print(f"Successfully processed and saved the final dataset to {processed_path}!")

Successfully processed and saved the final dataset to /nfs/turbo/seas-dannes/SST-sensor-placement-input/NSIDC/ice_concentration_interp_on_glsea_grid.zarr!
