In [2]:
"""
Combine all ERSSTv6 monthly files (1850–2024) into one NetCDF dataset
Output: ersst.v6.195001_202412.nc

Install dask if needed
!conda install dask -c conda-forge -n meteo203 -y

"""

import xarray as xr
import os
import glob

# Directory where Bash saved the files
data_dir = "data_ersstv6"
out_file = "ersst.v6.195001_202412.nc"

# Collect all .nc files and sort by date
files = sorted(glob.glob(os.path.join(data_dir, "ersst.v6.*.nc")))

print(f"Found {len(files)} monthly files.")

# Open all datasets efficiently
ds = xr.open_mfdataset(
    files,
    combine="by_coords",
    parallel=True,
    chunks={"time": 12}  # adjust chunking as needed
)

# Inspect structure
print(ds)

# Save merged dataset
ds.to_netcdf(out_file)
print(f"\n✅ Combined dataset saved as: {out_file}")


Found 900 monthly files.
<xarray.Dataset> Size: 115MB
Dimensions:  (time: 900, lev: 1, lat: 89, lon: 180)
Coordinates:
  * lat      (lat) float64 712B -88.0 -86.0 -84.0 -82.0 ... 82.0 84.0 86.0 88.0
  * lev      (lev) float64 8B 0.0
  * lon      (lon) float64 1kB 0.0 2.0 4.0 6.0 8.0 ... 352.0 354.0 356.0 358.0
  * time     (time) datetime64[ns] 7kB 1950-01-15 1950-02-15 ... 2024-12-15
Data variables:
    sst      (time, lev, lat, lon) float32 58MB dask.array<chunksize=(1, 1, 89, 180), meta=np.ndarray>
    ssta     (time, lev, lat, lon) float32 58MB dask.array<chunksize=(1, 1, 89, 180), meta=np.ndarray>
Attributes: (12/66)
    Conventions:                     CF-1.6, ACDD-1.3
    metadata_link:                   gov.noaa.ncei:C01737
    id:                              ersst.v6.195001.nc
    naming_authority:                gov.noaa.ncei
    title:                           NOAA monthly ERSSTv6 (in situ only)
    summary:                         Monthly ERSSTv6 is developed based on v5.

HDF5-DIAG: Error detected in HDF5 (1.14.6) thread 1:
  #000: H5F.c line 496 in H5Fis_accessible(): unable to determine if file is accessible as HDF5
    major: File accessibility
    minor: Not an HDF5 file
  #001: H5VLcallback.c line 3913 in H5VL_file_specific(): file specific failed
    major: Virtual Object Layer
    minor: Can't operate on object
  #002: H5VLcallback.c line 3848 in H5VL__file_specific(): file specific failed
    major: Virtual Object Layer
    minor: Can't operate on object
  #003: H5VLnative_file.c line 344 in H5VL__native_file_specific(): error in HDF5 file check
    major: File accessibility
    minor: Can't get value
  #004: H5Fint.c line 1055 in H5F__is_hdf5(): unable to open file
    major: File accessibility
    minor: Unable to initialize object
  #005: H5FD.c line 787 in H5FD_open(): can't open file
    major: Virtual File Layer
    minor: Unable to open file
  #006: H5FDsec2.c line 323 in H5FD__sec2_open(): unable to open file: name = '/home/ba/source/met


✅ Combined dataset saved as: ersst.v6.195001_202412.nc
