In [25]:
from os.path import join, exists, basename
import tempfile
from urllib import request
from os import makedirs

import numpy as np
import xarray as xr
import pandas as pd

In [26]:
def get_nwm_uri(date, data_type, cycle_runtime, forecast_hour):
    return (
        f'https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/nwm.{date}/short_range/'
        f'nwm.t{cycle_runtime}z.short_range.{data_type}.f{forecast_hour}.conus.nc')

date = '20220209'
# model cycle runtime (00-23)
cycle_runtime = '00'
# data_types = ['channel_rt', 'land', 'reservoir', 'terrain_rt']
data_type = 'terrain_rt'
# forecast hour (000-00)
forecast_hour = '002'
nwm_uri = get_nwm_uri(date, data_type, cycle_runtime, forecast_hour)
nwm_uri

'https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/nwm.20220209/short_range/nwm.t00z.short_range.terrain_rt.f002.conus.nc'

In [14]:
out_dir = '/opt/data/noaa/nwm-preds'
archive_dir = join(out_dir, 'archive')
day_dir = join(out_dir, 'day')
if not exists(archive_dir): makedirs(archive_dir)
if not exists(day_dir): makedirs(day_dir)
nwm_path = join(day_dir, basename(nwm_uri))

In [15]:
request.urlretrieve(nwm_uri, nwm_path)

('/opt/data/noaa/nwm-preds/day/nwm.t00z.short_range.terrain_rt.f002.conus.nc',
 <http.client.HTTPMessage at 0x40a2cea460>)

In [16]:
ds = xr.open_dataset(nwm_path)
ds

In [19]:
# the netcdf and zarr files around both ~50mb. but the RAM usage of the notebook goes from 1.5 to 5.5 GB while 
# this is running. weird.

# remove CRS since it is always the same and we can't append it.
ds = ds.drop('crs')

out_path = join(archive_dir, 'terrain_rt.zarr')
ds.to_zarr(out_path, append_dim='time')

<xarray.Dataset>
Dimensions:         (time: 1, reference_time: 1, x: 18432, y: 15360)
Coordinates:
  * time            (time) datetime64[ns] 2022-02-09T02:00:00
  * reference_time  (reference_time) datetime64[ns] 2022-02-09
  * x               (x) float64 -2.304e+06 -2.304e+06 ... 2.304e+06 2.304e+06
  * y               (y) float64 -1.92e+06 -1.92e+06 ... 1.92e+06 1.92e+06
Data variables:
    zwattablrt      (time, y, x) float64 ...
    sfcheadsubrt    (time, y, x) float64 ...
Attributes:
    TITLE:                      OUTPUT FROM NWM v2.1
    model_initialization_time:  2022-02-09_00:00:00
    model_output_valid_time:    2022-02-09_02:00:00
    model_total_valid_times:    18
    Conventions:                CF-1.6
    code_version:               v5.2.0-beta2
    NWM_version_number:         v2.1
    model_output_type:          terrain_rt
    model_configuration:        short_range
    proj4:                      +proj=lcc +units=m +a=6370000.0 +b=6370000.0 ...
    GDAL_DataType:       

<xarray.backends.zarr.ZarrStore at 0x48d9442350>

In [None]:
ts = pd.Timestamp.utcnow() - pd.Timedelta('1D')
date_str = ts.strftime("%Y%m%d")
date_str

In [21]:
ds2 = xr.open_zarr(out_path)
ds2

Unnamed: 0,Array,Chunk
Bytes,4.22 GiB,4.22 MiB
Shape,"(2, 15360, 18432)","(1, 480, 1152)"
Count,1025 Tasks,1024 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 4.22 GiB 4.22 MiB Shape (2, 15360, 18432) (1, 480, 1152) Count 1025 Tasks 1024 Chunks Type float64 numpy.ndarray",18432  15360  2,

Unnamed: 0,Array,Chunk
Bytes,4.22 GiB,4.22 MiB
Shape,"(2, 15360, 18432)","(1, 480, 1152)"
Count,1025 Tasks,1024 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.22 GiB,4.22 MiB
Shape,"(2, 15360, 18432)","(1, 480, 1152)"
Count,1025 Tasks,1024 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 4.22 GiB 4.22 MiB Shape (2, 15360, 18432) (1, 480, 1152) Count 1025 Tasks 1024 Chunks Type float64 numpy.ndarray",18432  15360  2,

Unnamed: 0,Array,Chunk
Bytes,4.22 GiB,4.22 MiB
Shape,"(2, 15360, 18432)","(1, 480, 1152)"
Count,1025 Tasks,1024 Chunks
Type,float64,numpy.ndarray


In [22]:
ds2.time

In [24]:
ds2.zwattablrt

Unnamed: 0,Array,Chunk
Bytes,4.22 GiB,4.22 MiB
Shape,"(2, 15360, 18432)","(1, 480, 1152)"
Count,1025 Tasks,1024 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 4.22 GiB 4.22 MiB Shape (2, 15360, 18432) (1, 480, 1152) Count 1025 Tasks 1024 Chunks Type float64 numpy.ndarray",18432  15360  2,

Unnamed: 0,Array,Chunk
Bytes,4.22 GiB,4.22 MiB
Shape,"(2, 15360, 18432)","(1, 480, 1152)"
Count,1025 Tasks,1024 Chunks
Type,float64,numpy.ndarray
