# Experimenting with xarray and S3

In [1]:
import xarray as xr

In [22]:
import boto3
from botocore import UNSIGNED
from botocore.config import Config
from datetime import datetime

BUCKET = "noaa-rtma-pds"


def get_rtma_prefix() -> str:
    """Prefix used for server-side filtering of responses.
     
    Uses the files in the "noaa-rtma-pds" bucket via AWS S3's paginate method.

    See https://registry.opendata.aws/noaa-rtma/ for more info on this dataset

    Example:
    If the current year is 2024, return 'akrtma.202' to match all the files from 2020 onwards."""
    year = datetime.now().year

    prefix = f"akrtma.{year}"

    return prefix[:-1]


def list_s3_files(s3_boto3_client, bucket_name, f, prefix=""):
    """
    List all files in the specified S3 bucket given a specified condition.

    :param bucket_name: Name of the S3 bucket.
    :param f: Function that takes a key and returns a boolean value.
    :param prefix: Optional prefix to narrow down the search.
    :return: List of `.grib2` files in the bucket.
    """
    paginator = s3_boto3_client.get_paginator("list_objects_v2")

    files = []

    for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix):
        if "Contents" in page:
            for obj in page["Contents"]:
                if f(obj["Key"]):
                    files.append(obj["Key"])

    return files


def is_grib_file(key: str) -> bool:
    return key.endswith(".grb2") or key.endswith(".idx")


grib_files = list_s3_files(
    boto3.client("s3", region_name="us-east-1", config=Config(signature_version=UNSIGNED)),
    BUCKET,
    f=is_grib_file,
    prefix=get_rtma_prefix(),
)
grib_files = [f"s3://{BUCKET}/{file}" for file in grib_files]


In [23]:
ds = xr.open_mfdataset(grib_files, engine="cfgrib")


Can't create file 's3://noaa-rtma-pds/akrtma.20200101/akrtma.t00z.2dvaranl_ndfd_3p0.grb2.5b7b6.idx'
Traceback (most recent call last):
  File "/Users/Hodgs004/miniforge3/envs/localsolve/lib/python3.12/site-packages/cfgrib/messages.py", line 538, in from_indexpath_or_filestream
    with compat_create_exclusive(indexpath) as new_index_file:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/Hodgs004/miniforge3/envs/localsolve/lib/python3.12/contextlib.py", line 137, in __enter__
    return next(self.gen)
           ^^^^^^^^^^^^^^
  File "/Users/Hodgs004/miniforge3/envs/localsolve/lib/python3.12/site-packages/cfgrib/messages.py", line 504, in compat_create_exclusive
    fd = os.open(path, os.O_WRONLY | os.O_CREAT | os.O_EXCL)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: 's3://noaa-rtma-pds/akrtma.20200101/akrtma.t00z.2dvaranl_ndfd_3p0.grb2.5b7b6.idx'
Can't read index file 's3://noaa-rtma-pds/akrtma.20200101/ak

FileNotFoundError: [Errno 2] No such file or directory: 's3://noaa-rtma-pds/akrtma.20200101/akrtma.t00z.2dvaranl_ndfd_3p0.grb2'

In [26]:
def match(key):
    if key.startswith("s3://noaa-rtma-pds/akrtma.20200101/akrtma.t00z.2dvaranl_ndfd_3p0"):
        return True
    return False

files = list(filter( match, grib_files))

In [28]:
files = [
    "s3://noaa-rtma-pds/akrtma.20200101/akrtma.t00z.2dvaranl_ndfd_3p0.grb2",
    "s3://noaa-rtma-pds/akrtma.20200101/akrtma.t00z.2dvaranl_ndfd_3p0.grb2.idx",
]

"['s3://noaa-rtma-pds/akrtma.20200101/akrtma.t00z.2dvaranl_ndfd_3p0.grb2', 's3://noaa-rtma-pds/akrtma.20200101/akrtma.t00z.2dvaranl_ndfd_3p0.grb2.idx']"

In [30]:

ds = xr.open_mfdataset(files, engine="cfgrib")

Can't create file 's3://noaa-rtma-pds/akrtma.20200101/akrtma.t00z.2dvaranl_ndfd_3p0.grb2.5b7b6.idx'
Traceback (most recent call last):
  File "/Users/Hodgs004/miniforge3/envs/localsolve/lib/python3.12/site-packages/cfgrib/messages.py", line 538, in from_indexpath_or_filestream
    with compat_create_exclusive(indexpath) as new_index_file:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/Hodgs004/miniforge3/envs/localsolve/lib/python3.12/contextlib.py", line 137, in __enter__
    return next(self.gen)
           ^^^^^^^^^^^^^^
  File "/Users/Hodgs004/miniforge3/envs/localsolve/lib/python3.12/site-packages/cfgrib/messages.py", line 504, in compat_create_exclusive
    fd = os.open(path, os.O_WRONLY | os.O_CREAT | os.O_EXCL)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: 's3://noaa-rtma-pds/akrtma.20200101/akrtma.t00z.2dvaranl_ndfd_3p0.grb2.5b7b6.idx'
Can't read index file 's3://noaa-rtma-pds/akrtma.20200101/ak

FileNotFoundError: [Errno 2] No such file or directory: 's3://noaa-rtma-pds/akrtma.20200101/akrtma.t00z.2dvaranl_ndfd_3p0.grb2'