In [1]:
import fsspec
import xarray as xr

import pystac
from datetime import datetime

In [2]:
# Locations for where the data is. Each .nc file can be a STAC Item
BUCKET = "imos-data"
PATH = "IMOS/Argo/aggregated_datasets/south_pacific"

In [3]:
# Use FSSpec to get a list of files in the bucket at the path
fs = fsspec.filesystem('s3', anon=True)
files = ['s3://' + f for f in fs.glob(f"s3://{BUCKET}/{PATH}/*.nc")]

# Pick one to ply with
file = files[0]

In [4]:
# FSSpec is being used to read data from S3
options = dict(
    mode="rb",
    anon=True,
    default_fill_cache=False,
    default_cache_type="none"
)

# This is slow... maybe the NetCDF4 library can be used instead
with fsspec.open(file, **options) as f:
    data = xr.open_dataset(f)

In [5]:
def get_extent_geojson(data):
    """Get GeoJSON extent from xarray dataset. Probably will only work for Argo"""
    min_x = float(data.LONGITUDE.min())
    max_x = float(data.LONGITUDE.max())
    min_y = float(data.LATITUDE.min())
    max_y = float(data.LATITUDE.max())

    extent = {
        "type": "Polygon",
        "coordinates": [
            [
                [min_x, min_y],
                [min_x, max_y],
                [max_x, max_y],
                [max_x, min_y],
                [min_x, min_y],            
            ]
        ],
    }

    bbox = [min_x, min_y, max_x, max_y]

    return extent, bbox

def np_dt64_to_dt(in_datetime):
    """Convert numpy datetime64 to datetime"""
    return datetime.fromtimestamp(in_datetime.astype(int) / 1e9).strftime("%Y-%m-%dT%H:%M:%SZ")


In [6]:
# Load the collection so we have a reference
collection = pystac.Collection.from_file("4402cb50-e20a-44ee-93e6-4728259250d2_stac-collection.json")

# Geometry stuff
extent, bbox = get_extent_geojson(data)

# This can have anything in it. The start and end dates are required.
properties = dict(
    start_datetime=np_dt64_to_dt(data.JULD.min()),
    end_datetime=np_dt64_to_dt(data.JULD.max()),
    description=data.attrs["description"],
)

# Create the actual item
item = pystac.Item(
    id=file.split("/")[-1].rstrip(".nc"),
    geometry=extent,
    properties=properties,
    bbox=bbox,
    datetime=None,
    collection=collection
)

# This should be real... and we should really put it on S3 too
item.set_self_href(file.replace(".nc", "_stac-item.json"))

# An asset is the actual data file
asset = pystac.Asset(
    href=file,
    media_type="application/x-netcdf",
    roles=["data"]
)
item.add_asset(file, asset)

# Validate what we're doing, hopefully it passes!
item.validate()

  return datetime.fromtimestamp(in_datetime.astype(int) / 1e9).strftime("%Y-%m-%dT%H:%M:%SZ")
  return datetime.fromtimestamp(in_datetime.astype(int) / 1e9).strftime("%Y-%m-%dT%H:%M:%SZ")


['https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json']

In [7]:
# And write to file.
item.save_object(dest_href=f"./{file.split('/')[-1].replace('.nc', '_stac-item')}.json")