## Import recipe and setup logging

In [1]:
from noaa_oisst_recipe import recipe
from setup_logging import setup_logging

setup_logging()

## Prune and inspect inputs

In [2]:
recipe = recipe.copy_pruned()

In [3]:
for k, v in recipe.file_pattern.items():
    print(k, v)

time-0 https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/198109/oisst-avhrr-v02r01.19810901.nc
time-1 https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/198109/oisst-avhrr-v02r01.19810902.nc


## Setup storage targets

> This is where an `fsspec` implementation for IPFS would be subsituted for `LocalFileSystem`

In [4]:
import tempfile
from fsspec.implementations.local import LocalFileSystem
from pangeo_forge_recipes.storage import MetadataTarget, CacheFSSpecTarget

fs_local = LocalFileSystem()
cache_dir = tempfile.TemporaryDirectory()
target_dir = tempfile.TemporaryDirectory()

recipe.input_cache = CacheFSSpecTarget(fs_local, cache_dir.name)
recipe.target = MetadataTarget(fs_local, target_dir.name)

## Execute recipe
See: https://pangeo-forge.readthedocs.io/en/latest/recipe_user_guide/execution.html#manual-execution

In [5]:
for input_name in recipe.iter_inputs():
    recipe.cache_input(input_name)

pangeo_forge_recipes.recipes.xarray_zarr - INFO - Caching input 'time-0'
pangeo_forge_recipes.storage - INFO - Caching file 'https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/198109/oisst-avhrr-v02r01.19810901.nc'
pangeo_forge_recipes.storage - INFO - Coping remote file 'https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/198109/oisst-avhrr-v02r01.19810901.nc' to cache
pangeo_forge_recipes.storage - DEBUG - entering fs.open context manager for /var/folders/mz/gxy_z7dx1k153xf0c3fks9_40000gp/T/tmpmsq1aar1/fe866b608e5c7eafba93f06954124ba1-https_www.ncei.noaa.gov_data_sea-surface-temperature-optimum-interpolation_v2.1_access_avhrr_198109_oisst-avhrr-v02r01.19810901.nc
pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielding <_io.BufferedWriter name='/var/folders/mz/gxy_z7dx1k153xf0c3fks9_40000gp/T/tmpmsq1aar1/fe866b608e5c7eafba93f06954124ba1-https_www.ncei.noaa.gov_data_sea-surface-temperature-

In [6]:
recipe.prepare_target()

pangeo_forge_recipes.recipes.xarray_zarr - INFO - Creating a new dataset in target
pangeo_forge_recipes.recipes.xarray_zarr - INFO - Opening inputs for chunk time-0
pangeo_forge_recipes.recipes.xarray_zarr - INFO - Opening input with Xarray time-0: 'https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/198109/oisst-avhrr-v02r01.19810901.nc'
pangeo_forge_recipes.storage - INFO - Opening 'https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/198109/oisst-avhrr-v02r01.19810901.nc' from cache
pangeo_forge_recipes.storage - DEBUG - file_opener entering first context for <contextlib._GeneratorContextManager object at 0x158e9f460>
pangeo_forge_recipes.storage - DEBUG - entering fs.open context manager for /var/folders/mz/gxy_z7dx1k153xf0c3fks9_40000gp/T/tmpmsq1aar1/fe866b608e5c7eafba93f06954124ba1-https_www.ncei.noaa.gov_data_sea-surface-temperature-optimum-interpolation_v2.1_access_avhrr_198109_oisst-avhrr-v02r

1. Consolidating metadata in this existing store with zarr.consolidate_metadata().
2. Explicitly setting consolidated=False, to avoid trying to read consolidate metadata, or
3. Explicitly setting consolidated=True, to raise an error in this case instead of falling back to try reading non-consolidated metadata.
  return xr.open_zarr(target.get_mapper())


pangeo_forge_recipes.recipes.xarray_zarr - DEBUG - <xarray.Dataset>
Dimensions:  (time: 2, zlev: 1, lat: 720, lon: 1440)
Coordinates:
  * lat      (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88
  * lon      (lon) float32 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9
  * time     (time) datetime64[ns] 1981-09-01T12:00:00 1981-09-02T12:00:00
  * zlev     (zlev) float32 0.0
Data variables:
    anom     (time, zlev, lat, lon) float32 dask.array<chunksize=(1, 1, 720, 1440), meta=np.ndarray>
    err      (time, zlev, lat, lon) float32 dask.array<chunksize=(1, 1, 720, 1440), meta=np.ndarray>
    ice      (time, zlev, lat, lon) float32 dask.array<chunksize=(1, 1, 720, 1440), meta=np.ndarray>
    sst      (time, zlev, lat, lon) float32 dask.array<chunksize=(1, 1, 720, 1440), meta=np.ndarray>
Attributes: (12/37)
    title:                      NOAA/NCEI 1/4 Degree Daily Optimum Interpolat...
    source:                     ICOADS, NCEP_GTS, GSFC_ICE, NCEP_ICE, Pathfin...
 

In [7]:
for chunk in recipe.iter_chunks():
    recipe.store_chunk(chunk)

pangeo_forge_recipes.recipes.xarray_zarr - INFO - Opening inputs for chunk time-0
pangeo_forge_recipes.recipes.xarray_zarr - INFO - Opening input with Xarray time-0: 'https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/198109/oisst-avhrr-v02r01.19810901.nc'
pangeo_forge_recipes.storage - INFO - Opening 'https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/198109/oisst-avhrr-v02r01.19810901.nc' from cache
pangeo_forge_recipes.storage - DEBUG - file_opener entering first context for <contextlib._GeneratorContextManager object at 0x158ef71f0>
pangeo_forge_recipes.storage - DEBUG - entering fs.open context manager for /var/folders/mz/gxy_z7dx1k153xf0c3fks9_40000gp/T/tmpmsq1aar1/fe866b608e5c7eafba93f06954124ba1-https_www.ncei.noaa.gov_data_sea-surface-temperature-optimum-interpolation_v2.1_access_avhrr_198109_oisst-avhrr-v02r01.19810901.nc
pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielding <_

In [8]:
recipe.finalize_target()

pangeo_forge_recipes.recipes.xarray_zarr - INFO - Consolidating Zarr metadata


## Examine zarr store

**Question**: Would this require an IPFS backend for `xarray`?

In [9]:
import xarray as xr

ds = xr.open_zarr(target_dir.name, consolidated=True)
ds

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(2, 1, 720, 1440)","(2, 1, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 7.91 MiB 7.91 MiB Shape (2, 1, 720, 1440) (2, 1, 720, 1440) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2  1  1440  720  1,

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(2, 1, 720, 1440)","(2, 1, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(2, 1, 720, 1440)","(2, 1, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 7.91 MiB 7.91 MiB Shape (2, 1, 720, 1440) (2, 1, 720, 1440) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2  1  1440  720  1,

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(2, 1, 720, 1440)","(2, 1, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(2, 1, 720, 1440)","(2, 1, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 7.91 MiB 7.91 MiB Shape (2, 1, 720, 1440) (2, 1, 720, 1440) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2  1  1440  720  1,

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(2, 1, 720, 1440)","(2, 1, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(2, 1, 720, 1440)","(2, 1, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 7.91 MiB 7.91 MiB Shape (2, 1, 720, 1440) (2, 1, 720, 1440) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2  1  1440  720  1,

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(2, 1, 720, 1440)","(2, 1, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
