In [1]:
from pathlib import Path
import pprint

# run pip install -e . in the root directory to install this package
from stacbuilder import *

In [2]:
# Collection configuration
catalog_version = "v0.1"
collection_config_path = Path("config-collection.json")

# Input Paths
tiff_input_path = Path("/data/users/Public/emile.sonneveld/ANIN/VCI/MODIS_NDVI/")
tiffs_glob = '*/*.tif'

# Output Paths
output_path = Path("results")
test_output_path = output_path / "test" / catalog_version
publish_output_path = "/data/users/Public/victor.verhaert/ANINStac/MODIS_NDVI_DERIVATIONS"
overwrite = True

In [3]:
# list input files
input_files = list_input_files(
    glob=tiffs_glob,
    input_dir=tiff_input_path,
    max_files=None
)
print(f"Found {len(input_files)} input files. 5 first files:")
for i in input_files[:5]: print(i) 

Found 24 input files. 5 first files:
/data/users/Public/emile.sonneveld/ANIN/VCI/MODIS_NDVI/MAX/2020-04-01.tif
/data/users/Public/emile.sonneveld/ANIN/VCI/MODIS_NDVI/MAX/2020-08-01.tif
/data/users/Public/emile.sonneveld/ANIN/VCI/MODIS_NDVI/MAX/2020-12-01.tif
/data/users/Public/emile.sonneveld/ANIN/VCI/MODIS_NDVI/MAX/2020-02-01.tif
/data/users/Public/emile.sonneveld/ANIN/VCI/MODIS_NDVI/MAX/2020-01-01.tif


In [4]:
# list meta data
asset_metadata = list_asset_metadata(
    collection_config_path=collection_config_path,
    glob=tiffs_glob,
    input_dir=tiff_input_path,
    max_files=5
)
for k in asset_metadata: 
    pprint.pprint(k.to_dict())

{'asset_id': '2020-04-01',
 'asset_path': PosixPath('/data/users/Public/emile.sonneveld/ANIN/VCI/MODIS_NDVI/MAX/2020-04-01.tif'),
 'asset_type': 'MAX',
 'bbox_lat_lon': {'east': 32.89434523906337,
                  'epsg': 4326,
                  'north': -22.126488095214867,
                  'south': -34.83184523806912,
                  'west': 16.44791666756001},
 'bbox_projected': {'east': 32.89434523906337,
                    'epsg': 4326,
                    'north': -22.126488095214867,
                    'south': -34.83184523806912,
                    'west': 16.44791666756001},
 'collection_id': None,
 'datetime': datetime.datetime(2020, 4, 1, 0, 0, tzinfo=datetime.timezone.utc),
 'end_datetime': datetime.datetime(2020, 4, 30, 23, 59, 59, tzinfo=datetime.timezone.utc),
 'file_size': 39463151,
 'geometry_lat_lon': <POLYGON ((16.448 -34.832, 16.448 -22.126, 32.894 -22.126, 32.894 -34.832, 1...>,
 'href': '/data/users/Public/emile.sonneveld/ANIN/VCI/MODIS_NDVI/MAX/2020-04-01.

In [5]:
# list items
stac_items, failed_files = list_stac_items(
    collection_config_path=collection_config_path,
    glob=tiffs_glob,
    input_dir=tiff_input_path,
    max_files=0
)
print(f"Found {len(stac_items)} STAC items")
if failed_files: print(f"Failed files: {failed_files}")

Found 12 STAC items


In [6]:
print("First stac item:")
stac_items[0]

First stac item:


In [12]:
from shutil import rmtree
rmtree(test_output_path)

In [7]:
# build grouped collection
build_grouped_collections(
    collection_config_path=collection_config_path,
    glob=tiffs_glob,
    input_dir=tiff_input_path,
    output_dir=publish_output_path,
    overwrite=overwrite,
)

In [9]:
# show collection
load_collection(
    collection_file=test_output_path / "collection.json"
)

In [10]:
# validate collection
validate_collection(
    collection_file=test_output_path / "collection.json",
)

## Legacy
Better to use grouped collections

In [None]:
# build collection
build_collection(
    collection_config_path=collection_config_path,
    glob=tiffs_glob,
    input_dir=tiff_input_path,
    output_dir=test_output_path,
    overwrite=overwrite,
)