In [None]:
import coiled
import distributed
import pystac
import stacrs
from arraylake import Client as ArrayLakeClient
from odc.stac import load

In [None]:
# coiled.list_instance_types(arch="x86_64")

In [None]:
url = "https://data.source.coop/ausantarctic/ghrsst-mur-v2/ghrsst-mur-v2.parquet"

dicts = await stacrs.read(url)
items = [pystac.Item.from_dict(d) for d in dicts["features"]]

# There's data going back to 2002, but we only want 2025
less_items = [i for i in items if i.datetime.year == 2025]

In [None]:
data = load(
    less_items,
    chunks={"longitude": 1024, "latitude": 1024, "time": 1},
    measurements=["analysed_sst"],
    fail_on_error=False
)

data

In [None]:
# coiled.create_software_environment(
#     name="ghrsst-ingest-test",
#     pip=[
#         "arraylake",
#         "coiled",
#         "dask[complete]",
#         "icechunk",
#         "odc-stac",
#         "stacrs",
#         "xarray"
#     ],
# )

In [None]:
cluster = coiled.Cluster(
    n_workers=[10, 200],
    software="ghrsst-ingest-test",
    name="write-ghrsst-2",
    idle_timeout="10m",
    region="us-west-2",
    spot_policy="spot_with_fallback",
    worker_vm_types=["m6i.xlarge", "r7a.xlarge", "c7a.xlarge", "m5d.xlarge"],
)

In [None]:
# cluster.shutdown()

In [None]:
client = distributed.Client(cluster)
client

In [None]:
# Instantiate the Arraylake client
client = ArrayLakeClient()

# Checkout the repo
repo = client.get_repo("AustralianAntarcticDivision/ghrsst-testing-coiiled")
session = repo.writable_session("main")

In [None]:
# Make your first commit
with session.allow_pickling():
    data.to_zarr(session.store, group="analysed_sst", zarr_format=3, mode="a")
    session.commit('New commit')