# STAC EuroSAT

In this demo we generate STAC metadata for the [EuroSAT](https://github.com/phelber/EuroSAT) dataset.

In [31]:
# data download

import requests
import os 
from pathlib import Path
import zipfile

url = 'https://madm.dfki.de/files/sentinel/EuroSATallBands.zip'
path = Path('data')

if not os.path.exists(path / 'ds'):
	r = requests.get(url, allow_redirects=True)
	open('data/EuroSATallBands.zip', 'wb').write(r.content)
	with zipfile.ZipFile(path / 'EuroSATallBands.zip', 'r') as zip_ref:
		zip_ref.extractall(path)

The EuroSAT dataset consists of 2700 Sentinel 2 images with one label per image for scene classification. There are 10 different categories in total. We use 100 samples for fast prototyping.

In [32]:
import pandas 
from glob import glob 
from random import sample

images = glob(str(path) + '/ds/**/*.tif', recursive=True)
images = sample(images, 100)
labels = [x.split('/')[-1].split('_')[0] for x in images]
cats = sorted(os.listdir(path / 'ds/images/remote_sensing/otherDatasets/sentinel_2/tif'))
ixs = [cats.index(x) for x in labels]

df = pandas.DataFrame({'image': images, 'label': labels, 'ix': ixs})
df

Unnamed: 0,image,label,ix
0,data/ds/images/remote_sensing/otherDatasets/se...,Pasture,5
1,data/ds/images/remote_sensing/otherDatasets/se...,Highway,3
2,data/ds/images/remote_sensing/otherDatasets/se...,PermanentCrop,6
3,data/ds/images/remote_sensing/otherDatasets/se...,Residential,7
4,data/ds/images/remote_sensing/otherDatasets/se...,Residential,7
...,...,...,...
95,data/ds/images/remote_sensing/otherDatasets/se...,Forest,1
96,data/ds/images/remote_sensing/otherDatasets/se...,Residential,7
97,data/ds/images/remote_sensing/otherDatasets/se...,SeaLake,9
98,data/ds/images/remote_sensing/otherDatasets/se...,River,8


In [33]:
df.ix.unique()

array([5, 3, 6, 7, 2, 1, 9, 4, 0, 8])

We start by generating STAC metadata following the core STAC specification. 

- We generate a STAC item for every image in the datasets
- a STAC collection to represent the images collection
- a STAC catalog to represent the final dataset (which will include also the annotations).

https://pystac.readthedocs.io/en/stable/

In [34]:
import pystac
from datetime import datetime
import rasterio as rio
import uuid
from shapely.geometry import GeometryCollection, Polygon, box, shape, mapping
from tqdm import tqdm

In [35]:
# create empty catalog

eurosat = pystac.Catalog(id="eurosat", description="EuroSAT dataset")
eurosat

0
ID: eurosat
Description: EuroSAT dataset

0
Rel: root
Target:
Media Type: application/json


In [36]:
# create collection

# # spatial extent (should compute from images)
sp_extent = pystac.SpatialExtent([None,None,None,None])

# temporal extentn (should compute from images or given by authors)
from_date = datetime.strptime('2015-10-22', '%Y-%m-%d') # unknown
to_date = datetime.strptime('2019-10-22', '%Y-%m-%d') # unknown
tmp_extent = pystac.TemporalExtent([(from_date, to_date)])

extent = pystac.Extent(sp_extent, tmp_extent)

sentinel = pystac.Collection(id='sentinel2', description = 'EuroSAT Sentinel 2 dataset', extent = extent)
eurosat.add_child(sentinel)

eurosat

0
ID: eurosat
Description: EuroSAT dataset

0
ID: sentinel2
Description: EuroSAT Sentinel 2 dataset

0
Rel: root
Target:
Media Type: application/json

0
Rel: parent
Target:
Media Type: application/json

0
Rel: root
Target:
Media Type: application/json

0
Rel: child
Target:
Media Type: application/json


In [37]:
# creating items

dst_path = path / 'eurosat'
def create_item(image):
    params = {}
    params['id'] = image.split('/')[-1].split('.')[0] # use original name
    params['datetime'] = from_date # unknown
    params['properties'] = {}
    with rio.open(image) as src:
        params['bbox'] = list(src.bounds)
        params['geometry'] = mapping(box(*params['bbox']))
        i = pystac.Item(**params)
        image_dst_path = dst_path / f"{params['id']}.tif"
        for band in src.indexes:
            image_dst_path = dst_path / f"{params['id']}_B{band}.tif"
            out_meta = src.meta.copy()
            out_meta.update({"count": 1})
            with rio.open(image_dst_path, "w", **out_meta) as dest:
                dest.write(src.read(band), 1)
            i.add_asset(key=f'B{band}', asset=pystac.Asset(href=str(image_dst_path), title='Geotiff', media_type=pystac.MediaType.GEOTIFF))
    return i

In [38]:
import multiprocessing
from concurrent.futures import ProcessPoolExecutor

num_cores = multiprocessing.cpu_count()
with ProcessPoolExecutor(max_workers=num_cores) as pool:
    with tqdm(total=len(images)) as progress:
        futures = []
        for image in df.image:
            future = pool.submit(create_item, image) 
            future.add_done_callback(lambda p: progress.update())
            futures.append(future)
        items = []
        for future in futures:
            result = future.result()
            items.append(result)
            
for item in tqdm(items):
  sentinel.add_item(item)

100%|██████████| 100/100 [00:00<00:00, 471.99it/s]
100%|██████████| 100/100 [00:00<00:00, 39900.15it/s]


In [39]:
# reset spatial extent

bounds = [list(GeometryCollection([shape(s.geometry) for s in eurosat.get_all_items()]).bounds)]
sentinel.extent.spatial = pystac.SpatialExtent(bounds)

In [40]:
eurosat.normalize_hrefs('eurosat-stac')

In [41]:
eurosat.validate_all()

In [42]:
eurosat.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)

We have created a STAC Catalog for our dataset !

In [43]:
eurosat = pystac.Catalog.from_file('eurosat-stac/catalog.json')
eurosat

0
ID: eurosat
Description: EuroSAT dataset
type: Catalog

0
ID: sentinel2
Description: EuroSAT Sentinel 2 dataset
type: Collection
stac_extensions: []

0
ID: Pasture_354
"Bounding Box: [557139.766364, 5882832.541223793, 557778.4111188718, 5883473.46161]"
Datetime: 2015-10-22 00:00:00+00:00
datetime: 2015-10-22T00:00:00Z
stac_extensions: []

0
href: ./data/eurosat/Pasture_354_B1.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/Pasture_354_B2.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/Pasture_354_B3.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/Pasture_354_B4.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/Pasture_354_B5.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/Pasture_354_B6.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/Pasture_354_B7.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/Pasture_354_B8.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/Pasture_354_B9.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/Pasture_354_B10.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/Pasture_354_B11.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/Pasture_354_B12.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/Pasture_354_B13.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
Rel: root
Target:
Media Type: application/json

0
Rel: collection
Target: ../collection.json
Media Type: application/json

0
Rel: self
Target: /home/juan/Desktop/eotdl/demos/STAC/eurosat-stac/sentinel2/Pasture_354/Pasture_354.json
Media Type: application/json

0
Rel: parent
Target:
Media Type: application/json

0
Rel: root
Target:
Media Type: application/json

0
Rel: item
Target:
Media Type: application/json

0
Rel: item
Target: ./Highway_2338/Highway_2338.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_737/PermanentCrop_737.json
Media Type: application/json

0
Rel: item
Target: ./Residential_1316/Residential_1316.json
Media Type: application/json

0
Rel: item
Target: ./Residential_2610/Residential_2610.json
Media Type: application/json

0
Rel: item
Target: ./Residential_2976/Residential_2976.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_2999/HerbaceousVegetation_2999.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_2342/PermanentCrop_2342.json
Media Type: application/json

0
Rel: item
Target: ./Forest_1602/Forest_1602.json
Media Type: application/json

0
Rel: item
Target: ./Highway_624/Highway_624.json
Media Type: application/json

0
Rel: item
Target: ./Highway_2207/Highway_2207.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_1308/SeaLake_1308.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_2826/SeaLake_2826.json
Media Type: application/json

0
Rel: item
Target: ./Residential_1550/Residential_1550.json
Media Type: application/json

0
Rel: item
Target: ./Residential_1244/Residential_1244.json
Media Type: application/json

0
Rel: item
Target: ./Highway_517/Highway_517.json
Media Type: application/json

0
Rel: item
Target: ./Forest_7/Forest_7.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_691/HerbaceousVegetation_691.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_1237/PermanentCrop_1237.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_240/HerbaceousVegetation_240.json
Media Type: application/json

0
Rel: item
Target: ./Industrial_688/Industrial_688.json
Media Type: application/json

0
Rel: item
Target: ./Residential_1092/Residential_1092.json
Media Type: application/json

0
Rel: item
Target: ./Highway_2437/Highway_2437.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_121/HerbaceousVegetation_121.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_1224/SeaLake_1224.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_2982/AnnualCrop_2982.json
Media Type: application/json

0
Rel: item
Target: ./Pasture_1932/Pasture_1932.json
Media Type: application/json

0
Rel: item
Target: ./Residential_2698/Residential_2698.json
Media Type: application/json

0
Rel: item
Target: ./Highway_424/Highway_424.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_1471/HerbaceousVegetation_1471.json
Media Type: application/json

0
Rel: item
Target: ./Forest_1003/Forest_1003.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_2950/AnnualCrop_2950.json
Media Type: application/json

0
Rel: item
Target: ./River_2160/River_2160.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_520/PermanentCrop_520.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_281/PermanentCrop_281.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_1919/HerbaceousVegetation_1919.json
Media Type: application/json

0
Rel: item
Target: ./Residential_1553/Residential_1553.json
Media Type: application/json

0
Rel: item
Target: ./Residential_67/Residential_67.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_890/AnnualCrop_890.json
Media Type: application/json

0
Rel: item
Target: ./Industrial_1440/Industrial_1440.json
Media Type: application/json

0
Rel: item
Target: ./Forest_1062/Forest_1062.json
Media Type: application/json

0
Rel: item
Target: ./Residential_305/Residential_305.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_244/SeaLake_244.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_2407/PermanentCrop_2407.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_2009/PermanentCrop_2009.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_465/HerbaceousVegetation_465.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_2733/AnnualCrop_2733.json
Media Type: application/json

0
Rel: item
Target: ./Forest_1038/Forest_1038.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_624/AnnualCrop_624.json
Media Type: application/json

0
Rel: item
Target: ./Industrial_1184/Industrial_1184.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_2267/AnnualCrop_2267.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_2147/AnnualCrop_2147.json
Media Type: application/json

0
Rel: item
Target: ./Industrial_1053/Industrial_1053.json
Media Type: application/json

0
Rel: item
Target: ./Forest_1467/Forest_1467.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_1597/HerbaceousVegetation_1597.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_1124/HerbaceousVegetation_1124.json
Media Type: application/json

0
Rel: item
Target: ./Pasture_1410/Pasture_1410.json
Media Type: application/json

0
Rel: item
Target: ./Industrial_915/Industrial_915.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_702/PermanentCrop_702.json
Media Type: application/json

0
Rel: item
Target: ./Highway_777/Highway_777.json
Media Type: application/json

0
Rel: item
Target: ./Forest_2583/Forest_2583.json
Media Type: application/json

0
Rel: item
Target: ./Forest_2814/Forest_2814.json
Media Type: application/json

0
Rel: item
Target: ./Highway_1849/Highway_1849.json
Media Type: application/json

0
Rel: item
Target: ./Pasture_1612/Pasture_1612.json
Media Type: application/json

0
Rel: item
Target: ./Highway_2277/Highway_2277.json
Media Type: application/json

0
Rel: item
Target: ./Highway_194/Highway_194.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_1192/HerbaceousVegetation_1192.json
Media Type: application/json

0
Rel: item
Target: ./Forest_1033/Forest_1033.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_149/PermanentCrop_149.json
Media Type: application/json

0
Rel: item
Target: ./River_1435/River_1435.json
Media Type: application/json

0
Rel: item
Target: ./River_27/River_27.json
Media Type: application/json

0
Rel: item
Target: ./Industrial_898/Industrial_898.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_1579/HerbaceousVegetation_1579.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_1921/SeaLake_1921.json
Media Type: application/json

0
Rel: item
Target: ./River_1468/River_1468.json
Media Type: application/json

0
Rel: item
Target: ./Highway_1403/Highway_1403.json
Media Type: application/json

0
Rel: item
Target: ./Pasture_1767/Pasture_1767.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_1356/HerbaceousVegetation_1356.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_2332/HerbaceousVegetation_2332.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_314/PermanentCrop_314.json
Media Type: application/json

0
Rel: item
Target: ./Highway_1740/Highway_1740.json
Media Type: application/json

0
Rel: item
Target: ./Residential_494/Residential_494.json
Media Type: application/json

0
Rel: item
Target: ./Industrial_1618/Industrial_1618.json
Media Type: application/json

0
Rel: item
Target: ./Residential_2919/Residential_2919.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_73/AnnualCrop_73.json
Media Type: application/json

0
Rel: item
Target: ./Highway_1966/Highway_1966.json
Media Type: application/json

0
Rel: item
Target: ./River_1575/River_1575.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_566/PermanentCrop_566.json
Media Type: application/json

0
Rel: item
Target: ./Residential_2448/Residential_2448.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_2459/HerbaceousVegetation_2459.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_346/HerbaceousVegetation_346.json
Media Type: application/json

0
Rel: item
Target: ./Highway_201/Highway_201.json
Media Type: application/json

0
Rel: item
Target: ./Highway_2453/Highway_2453.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_988/PermanentCrop_988.json
Media Type: application/json

0
Rel: item
Target: ./Residential_902/Residential_902.json
Media Type: application/json

0
Rel: item
Target: ./Forest_1343/Forest_1343.json
Media Type: application/json

0
Rel: item
Target: ./Residential_1986/Residential_1986.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_1645/SeaLake_1645.json
Media Type: application/json

0
Rel: item
Target: ./River_2327/River_2327.json
Media Type: application/json

0
Rel: item
Target: ./Pasture_759/Pasture_759.json
Media Type: application/json

0
Rel: self
Target: /home/juan/Desktop/eotdl/demos/STAC/eurosat-stac/sentinel2/collection.json
Media Type: application/json

0
Rel: parent
Target:
Media Type: application/json

0
Rel: self
Target: /home/juan/Desktop/eotdl/demos/STAC/eurosat-stac/catalog.json
Media Type: application/json

0
Rel: root
Target:
Media Type: application/json

0
Rel: child
Target:
Media Type: application/json
