# ML Dataset STAC extension

Here we explore how to use the ml-dataset extension

In [None]:
!pip uninstall -y pystac
!pip install git+https://github.com/earthpulse/pystac.git@ml-dataset   # Use this branch until PR is merged

In [1]:
import pystac
from pystac.extensions.ml_dataset import MLDatasetExtension

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
catalog = pystac.read_file(
    "https://raw.githubusercontent.com/radiantearth/stac-spec/master/examples/catalog.json"
)

In [4]:
print(f"Implements Extension: {MLDatasetExtension.has_extension(catalog)}")

Implements Extension: False


In [5]:
catalog_ml_dataset = MLDatasetExtension.ext(catalog, add_if_missing=True)

In [6]:
split_training = pystac.Link(
    rel="child",
    target="flood-detection-training/catalog.json",
    media_type="application/json",
    title="flood-detection-training",
)

split_validation = pystac.Link(
    rel="child",
    target="flood-detection-validation/catalog.json",
    media_type="application/json",
    title="flood-detection-validation",
)


In [7]:
catalog_ml_dataset.name = 'Test ML Dataset'
catalog_ml_dataset.tasks = ['image classification', 'segmentation']
catalog_ml_dataset.inputs_type = ['satellite imagery']
catalog_ml_dataset.annotations_type = 'raster'
catalog_ml_dataset.quality = 'L0'
catalog_ml_dataset.version = '0.1.0'
catalog_ml_dataset.add_splits((split_training, split_validation))

In [8]:
print(catalog_ml_dataset.to_dict())

{'type': 'Catalog', 'id': 'examples', 'stac_version': '1.0.0', 'description': 'This catalog is a simple demonstration of an example catalog that is used to organize a hierarchy of collections and their items.', 'links': [{'rel': 'self', 'href': 'https://raw.githubusercontent.com/radiantearth/stac-spec/master/examples/catalog.json', 'type': 'application/json'}, {'rel': 'root', 'href': './catalog.json', 'type': 'application/json', 'title': 'Example Catalog'}, {'rel': 'child', 'href': './extensions-collection/collection.json', 'type': 'application/json', 'title': 'Collection Demonstrating STAC Extensions'}, {'rel': 'child', 'href': './collection-only/collection.json', 'type': 'application/json', 'title': 'Collection with no items (standalone)'}, {'rel': 'child', 'href': './collection-only/collection-with-schemas.json', 'type': 'application/json', 'title': 'Collection with no items (standalone with JSON Schemas)'}, {'rel': 'item', 'href': './collectionless-item.json', 'type': 'application/

In [9]:
catalog_ml_dataset.save(dest_href='flood-detection')

In [10]:
catalog_ml_dataset.validate()

['https://schemas.stacspec.org/v1.0.0/catalog-spec/json-schema/catalog.json',
 'https://raw.githubusercontent.com/earthpulse/ml-dataset/main/json-schema/schema.json']