# ML Dataset STAC extension

Here we explore how to use the ml-dataset extension

In [19]:
!pip uninstall -y pystac
!pip install git+https://github.com/earthpulse/pystac.git@ml-dataset

Found existing installation: pystac 1.7.3
Uninstalling pystac-1.7.3:
  Successfully uninstalled pystac-1.7.3
Collecting git+https://github.com/earthpulse/pystac.git@ml-dataset
  Cloning https://github.com/earthpulse/pystac.git (to revision ml-dataset) to /private/var/folders/bb/d_59_md170v8ht3ncwr4v3vm0000gp/T/pip-req-build-4smv17ay
  Running command git clone --filter=blob:none --quiet https://github.com/earthpulse/pystac.git /private/var/folders/bb/d_59_md170v8ht3ncwr4v3vm0000gp/T/pip-req-build-4smv17ay
  Running command git checkout -b ml-dataset --track origin/ml-dataset
  Switched to a new branch 'ml-dataset'
  branch 'ml-dataset' set up to track 'origin/ml-dataset'.
  Resolved https://github.com/earthpulse/pystac.git to commit 717824f06ebb7fb67397c5af375a668ae7e2896d
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ld

In [30]:
import pystac
from pystac.extensions.ml_dataset import MLDatasetExtension

In [31]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [32]:
catalog = pystac.read_file(
    "https://raw.githubusercontent.com/radiantearth/stac-spec/master/examples/catalog.json"
)

In [33]:
print(f"Implements Extension: {MLDatasetExtension.has_extension(catalog)}")

Implements Extension: False


In [34]:
catalog_ml_dataset = MLDatasetExtension.ext(catalog, add_if_missing=True)

In [35]:
split_training = pystac.Link(
    rel="child",
    target="flood-detection-training/catalog.json",
    media_type="application/json",
    title="flood-detection-training",
)

split_validation = pystac.Link(
    rel="child",
    target="flood-detection-validation/catalog.json",
    media_type="application/json",
    title="flood-detection-validation",
)


In [36]:
catalog_ml_dataset.name = 'Test ML Dataset'
catalog_ml_dataset.tasks = ['classification', 'segmentation']
catalog_ml_dataset.inputs_type = 'satellite imagery'
catalog_ml_dataset.annotations_type = 'raster'
catalog_ml_dataset.quality = 'L0'
catalog_ml_dataset.version = '0.1.0'
catalog_ml_dataset.add_splits((split_training, split_validation))

In [37]:
print(catalog_ml_dataset.to_dict())

{'type': 'Catalog', 'id': 'examples', 'stac_version': '1.0.0', 'description': 'This catalog is a simple demonstration of an example catalog that is used to organize a hierarchy of collections and their items.', 'links': [{'rel': 'self', 'href': 'https://raw.githubusercontent.com/radiantearth/stac-spec/master/examples/catalog.json', 'type': 'application/json'}, {'rel': 'root', 'href': './catalog.json', 'type': 'application/json', 'title': 'Example Catalog'}, {'rel': 'child', 'href': './extensions-collection/collection.json', 'type': 'application/json', 'title': 'Collection Demonstrating STAC Extensions'}, {'rel': 'child', 'href': './collection-only/collection.json', 'type': 'application/json', 'title': 'Collection with no items (standalone)'}, {'rel': 'child', 'href': './collection-only/collection-with-schemas.json', 'type': 'application/json', 'title': 'Collection with no items (standalone with JSON Schemas)'}, {'rel': 'item', 'href': './collectionless-item.json', 'type': 'application/

In [38]:
catalog_ml_dataset.save(dest_href='flood-detection')

In [39]:
catalog_ml_dataset.validate()

['https://schemas.stacspec.org/v1.0.0/catalog-spec/json-schema/catalog.json',
 'https://raw.githubusercontent.com/earthpulse/ml-dataset/main/json-schema/schema.json']