## Download and virtualize MUR-SST data

In [None]:
from pathlib import Path

import earthaccess
import fsspec
import xarray as xr
from common import earthaccess_args

### Setup earthaccess query parameters

In [None]:
dataset = "gpm_imerg"
dataset_args = earthaccess_args[dataset]
concept_id = dataset_args["concept_id"]
filename = dataset_args["filename"]
variable = dataset_args["variable"]

### Authenticate via earthaccess

In [None]:
earthaccess.login()

### Download dataset

In [None]:
results = earthaccess.search_data(
    concept_id=concept_id, count=1, temporal=("2002-06-01", "2002-06-01")
)
fp = earthaccess.download(results, "earthaccess_data")[0]

### Virtualize dataset

In [None]:
from virtualizarr import open_virtual_dataset  # noqa

In [None]:
def virtualize_dataset(local_fp):
    """Create a virtual reference file for a dataset"""

    def local_to_s3_url(old_local_path: str) -> str:
        """Replace local path to s3 uri for all chucks"""

        new_s3_bucket_url = Path("/".join(s3_uri.split("/")[1:-1]))
        filename = Path(old_local_path).name
        new_path = f"s3:/{str(new_s3_bucket_url / filename)}"
        return new_path

    s3_uri = results[0].data_links(access="direct")[0]
    if ".nc4" in s3_uri:
        output_fp = f"earthaccess_data/{s3_uri.split('/')[-1][:-4]}.json"
    else:
        output_fp = f"earthaccess_data/{s3_uri.split('/')[-1][:-3]}.json"
    print(output_fp)
    virtual_ds = open_virtual_dataset(str(local_fp), indexes={})
    virtual_ds = virtual_ds.virtualize.rename_paths(local_to_s3_url)
    virtual_ds = virtual_ds[[variable]]
    virtual_ds.virtualize.to_kerchunk(output_fp, format="json")
    return output_fp

In [None]:
output_fp = virtualize_dataset(fp)

## Load results

In [None]:
# output_fp = (
#     "earthaccess_data/20020601090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1.json"
# )
output_fp = "earthaccess_data/3B-DAY-E.MS.MRG.3IMERG.20020601-S000000-E235959.V07B.json"
earthaccess.login()
s3_fs = earthaccess.get_s3fs_session(daac="PODAAC")
storage_options = s3_fs.storage_options.copy()
fs = fsspec.filesystem("reference", fo=output_fp)

m = fs.get_mapper("")
ds = xr.open_dataset(
    m, engine="kerchunk", chunks={}, storage_options=storage_options
)  # normal xarray.Dataset object, wrapping dask/numpy arrays etc.