# STAC EuroSAT

In this demo we generate STAC metadata for the [EuroSAT](https://github.com/phelber/EuroSAT) dataset.

In [1]:
import os

os.makedirs('data', exist_ok=True)
if not 'EuroSAT.zip' in os.listdir('data'):
	!eotdl-cli datasets get EuroSAT --path data
if not 'ds' in os.listdir('data'):
	!unzip data/EuroSAT.zip -d data

The EuroSAT dataset consists of 2700 Sentinel 2 images with one label per image for scene classification. There are 10 different categories in total. We use 100 samples for fast prototyping.

In [2]:
import pandas 
from glob import glob 
from random import sample
from pathlib import Path

path = Path('data')
images = glob(str(path) + '/ds/**/*.tif', recursive=True)
images = sample(images, 100)
labels = [x.split('/')[-1].split('_')[0] for x in images]
cats = sorted(os.listdir(path / 'ds/images/remote_sensing/otherDatasets/sentinel_2/tif'))
ixs = [cats.index(x) for x in labels]

df = pandas.DataFrame({'image': images, 'label': labels, 'ix': ixs})
df

Unnamed: 0,image,label,ix
0,data/ds/images/remote_sensing/otherDatasets/se...,HerbaceousVegetation,2
1,data/ds/images/remote_sensing/otherDatasets/se...,Industrial,4
2,data/ds/images/remote_sensing/otherDatasets/se...,Forest,1
3,data/ds/images/remote_sensing/otherDatasets/se...,Highway,3
4,data/ds/images/remote_sensing/otherDatasets/se...,Highway,3
...,...,...,...
95,data/ds/images/remote_sensing/otherDatasets/se...,SeaLake,9
96,data/ds/images/remote_sensing/otherDatasets/se...,Residential,7
97,data/ds/images/remote_sensing/otherDatasets/se...,AnnualCrop,0
98,data/ds/images/remote_sensing/otherDatasets/se...,Forest,1


In [3]:
df.ix.unique()

array([2, 4, 1, 3, 5, 8, 0, 7, 6, 9])

We start by generating STAC metadata following the core STAC specification. 

- We generate a STAC item for every image in the datasets
- a STAC collection to represent the images collection
- a STAC catalog to represent the final dataset (which will include also the annotations).

https://pystac.readthedocs.io/en/stable/

In [4]:
import pystac
from datetime import datetime
import rasterio as rio
import uuid
from shapely.geometry import GeometryCollection, Polygon, box, shape, mapping
from tqdm import tqdm

In [5]:
# create empty catalog

eurosat = pystac.Catalog(id="eurosat", description="EuroSAT dataset")
eurosat

0
ID: eurosat
Description: EuroSAT dataset

0
Rel: root
Target:
Media Type: application/json


In [6]:
# create collection

# # spatial extent (should compute from images)
sp_extent = pystac.SpatialExtent([None,None,None,None])

# temporal extent (should compute from images or given by authors)
from_date = datetime.strptime('2015-10-22', '%Y-%m-%d') # unknown
to_date = datetime.strptime('2019-10-22', '%Y-%m-%d') # unknown
tmp_extent = pystac.TemporalExtent([(from_date, to_date)])

extent = pystac.Extent(sp_extent, tmp_extent)

sentinel = pystac.Collection(id='sentinel2', description = 'EuroSAT Sentinel 2 dataset', extent = extent)
eurosat.add_child(sentinel)

eurosat

0
ID: eurosat
Description: EuroSAT dataset

0
ID: sentinel2
Description: EuroSAT Sentinel 2 dataset

0
Rel: root
Target:
Media Type: application/json

0
Rel: parent
Target:
Media Type: application/json

0
Rel: root
Target:
Media Type: application/json

0
Rel: child
Target:
Media Type: application/json


In [7]:
# creating items

dst_path = path / 'eurosat'
os.makedirs(dst_path, exist_ok=True)

def create_item(image):
    params = {}
    params['id'] = image.split('/')[-1].split('.')[0] # use original name
    params['datetime'] = from_date # unknown
    params['properties'] = {}
    with rio.open(image) as src:
        params['bbox'] = list(src.bounds)
        params['geometry'] = mapping(box(*params['bbox']))
        i = pystac.Item(**params)
        image_dst_path = dst_path / f"{params['id']}.tif"
        for band in src.indexes:
            image_dst_path = dst_path / f"{params['id']}_B{band}.tif"
            out_meta = src.meta.copy()
            out_meta.update({"count": 1})
            with rio.open(image_dst_path, "w", **out_meta) as dest:
                dest.write(src.read(band), 1)
            i.add_asset(key=f'B{band}', asset=pystac.Asset(href=str(image_dst_path), title='Geotiff', media_type=pystac.MediaType.GEOTIFF))
    return i

In [8]:
# import multiprocessing
# from concurrent.futures import ProcessPoolExecutor

# num_cores = multiprocessing.cpu_count()
# with ProcessPoolExecutor(max_workers=num_cores) as pool:
#     with tqdm(total=len(images)) as rm-rprogress:
#         futures = []
#         for image in df.image:
#             future = pool.submit(create_item, image) 
#             future.add_done_callback(lambda p: progress.update())
#             futures.append(future)
#         items = []
#         for future in futures:
#             result = future.result()
#             items.append(result)

items = [create_item(image) for image in tqdm(df.image)]
            
for item in tqdm(items):
  sentinel.add_item(item)

100%|██████████| 100/100 [00:01<00:00, 88.91it/s]
100%|██████████| 100/100 [00:00<00:00, 62424.53it/s]


In [9]:
# reset spatial extent

bounds = [list(GeometryCollection([shape(s.geometry) for s in eurosat.get_all_items()]).bounds)]
sentinel.extent.spatial = pystac.SpatialExtent(bounds)

In [10]:
eurosat.normalize_hrefs('eurosat-stac')

In [11]:
# eurosat.validate_all()

In [12]:
eurosat.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)

We have created a STAC Catalog for our dataset !

In [13]:
eurosat = pystac.Catalog.from_file('eurosat-stac/catalog.json')
eurosat

0
ID: eurosat
Description: EuroSAT dataset
type: Catalog

0
ID: sentinel2
Description: EuroSAT Sentinel 2 dataset
type: Collection
stac_extensions: []

0
ID: HerbaceousVegetation_647
"Bounding Box: [698825.1223874632, 4608211.654485944, 699465.1392659532, 4608851.723517504]"
Datetime: 2015-10-22 00:00:00+00:00
datetime: 2015-10-22T00:00:00Z
stac_extensions: []

0
href: ./data/eurosat/HerbaceousVegetation_647_B1.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/HerbaceousVegetation_647_B2.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/HerbaceousVegetation_647_B3.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/HerbaceousVegetation_647_B4.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/HerbaceousVegetation_647_B5.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/HerbaceousVegetation_647_B6.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/HerbaceousVegetation_647_B7.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/HerbaceousVegetation_647_B8.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/HerbaceousVegetation_647_B9.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/HerbaceousVegetation_647_B10.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/HerbaceousVegetation_647_B11.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/HerbaceousVegetation_647_B12.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
href: ./data/eurosat/HerbaceousVegetation_647_B13.tif
Title: Geotiff
Media type: image/tiff; application=geotiff
Owner:

0
Rel: root
Target:
Media Type: application/json

0
Rel: collection
Target: ../collection.json
Media Type: application/json

0
Rel: self
Target: /home/juan/Desktop/eotdl/demos/STAC/eurosat-stac/sentinel2/HerbaceousVegetation_647/HerbaceousVegetation_647.json
Media Type: application/json

0
Rel: parent
Target:
Media Type: application/json

0
Rel: root
Target:
Media Type: application/json

0
Rel: item
Target:
Media Type: application/json

0
Rel: item
Target: ./Industrial_542/Industrial_542.json
Media Type: application/json

0
Rel: item
Target: ./Forest_2902/Forest_2902.json
Media Type: application/json

0
Rel: item
Target: ./Highway_1296/Highway_1296.json
Media Type: application/json

0
Rel: item
Target: ./Highway_304/Highway_304.json
Media Type: application/json

0
Rel: item
Target: ./Pasture_1267/Pasture_1267.json
Media Type: application/json

0
Rel: item
Target: ./Forest_426/Forest_426.json
Media Type: application/json

0
Rel: item
Target: ./Industrial_1565/Industrial_1565.json
Media Type: application/json

0
Rel: item
Target: ./River_281/River_281.json
Media Type: application/json

0
Rel: item
Target: ./Industrial_2040/Industrial_2040.json
Media Type: application/json

0
Rel: item
Target: ./Pasture_1052/Pasture_1052.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_2737/AnnualCrop_2737.json
Media Type: application/json

0
Rel: item
Target: ./Residential_1668/Residential_1668.json
Media Type: application/json

0
Rel: item
Target: ./Highway_40/Highway_40.json
Media Type: application/json

0
Rel: item
Target: ./Residential_2514/Residential_2514.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_1858/PermanentCrop_1858.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_154/SeaLake_154.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_1849/PermanentCrop_1849.json
Media Type: application/json

0
Rel: item
Target: ./Highway_37/Highway_37.json
Media Type: application/json

0
Rel: item
Target: ./Residential_2171/Residential_2171.json
Media Type: application/json

0
Rel: item
Target: ./Highway_251/Highway_251.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_1405/AnnualCrop_1405.json
Media Type: application/json

0
Rel: item
Target: ./Residential_2358/Residential_2358.json
Media Type: application/json

0
Rel: item
Target: ./Residential_1155/Residential_1155.json
Media Type: application/json

0
Rel: item
Target: ./Residential_2312/Residential_2312.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_833/PermanentCrop_833.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_895/PermanentCrop_895.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_2361/HerbaceousVegetation_2361.json
Media Type: application/json

0
Rel: item
Target: ./River_475/River_475.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_825/HerbaceousVegetation_825.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_2/SeaLake_2.json
Media Type: application/json

0
Rel: item
Target: ./Highway_1220/Highway_1220.json
Media Type: application/json

0
Rel: item
Target: ./Residential_985/Residential_985.json
Media Type: application/json

0
Rel: item
Target: ./Highway_90/Highway_90.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_2541/HerbaceousVegetation_2541.json
Media Type: application/json

0
Rel: item
Target: ./Highway_2392/Highway_2392.json
Media Type: application/json

0
Rel: item
Target: ./Forest_507/Forest_507.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_465/SeaLake_465.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_838/PermanentCrop_838.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_2731/SeaLake_2731.json
Media Type: application/json

0
Rel: item
Target: ./Forest_1644/Forest_1644.json
Media Type: application/json

0
Rel: item
Target: ./Highway_1383/Highway_1383.json
Media Type: application/json

0
Rel: item
Target: ./Highway_1264/Highway_1264.json
Media Type: application/json

0
Rel: item
Target: ./Highway_2388/Highway_2388.json
Media Type: application/json

0
Rel: item
Target: ./Pasture_1534/Pasture_1534.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_857/HerbaceousVegetation_857.json
Media Type: application/json

0
Rel: item
Target: ./River_284/River_284.json
Media Type: application/json

0
Rel: item
Target: ./Pasture_1177/Pasture_1177.json
Media Type: application/json

0
Rel: item
Target: ./River_1195/River_1195.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_1001/SeaLake_1001.json
Media Type: application/json

0
Rel: item
Target: ./Forest_2252/Forest_2252.json
Media Type: application/json

0
Rel: item
Target: ./Residential_2148/Residential_2148.json
Media Type: application/json

0
Rel: item
Target: ./Highway_2270/Highway_2270.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_1741/SeaLake_1741.json
Media Type: application/json

0
Rel: item
Target: ./Industrial_493/Industrial_493.json
Media Type: application/json

0
Rel: item
Target: ./Residential_745/Residential_745.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_223/SeaLake_223.json
Media Type: application/json

0
Rel: item
Target: ./Industrial_2078/Industrial_2078.json
Media Type: application/json

0
Rel: item
Target: ./Residential_697/Residential_697.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_698/HerbaceousVegetation_698.json
Media Type: application/json

0
Rel: item
Target: ./Residential_2840/Residential_2840.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_620/SeaLake_620.json
Media Type: application/json

0
Rel: item
Target: ./Highway_147/Highway_147.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_2163/HerbaceousVegetation_2163.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_1533/HerbaceousVegetation_1533.json
Media Type: application/json

0
Rel: item
Target: ./Pasture_1545/Pasture_1545.json
Media Type: application/json

0
Rel: item
Target: ./Residential_870/Residential_870.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_334/AnnualCrop_334.json
Media Type: application/json

0
Rel: item
Target: ./River_1543/River_1543.json
Media Type: application/json

0
Rel: item
Target: ./Pasture_464/Pasture_464.json
Media Type: application/json

0
Rel: item
Target: ./Residential_1103/Residential_1103.json
Media Type: application/json

0
Rel: item
Target: ./Residential_2769/Residential_2769.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_1889/PermanentCrop_1889.json
Media Type: application/json

0
Rel: item
Target: ./River_2151/River_2151.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_2292/HerbaceousVegetation_2292.json
Media Type: application/json

0
Rel: item
Target: ./Industrial_531/Industrial_531.json
Media Type: application/json

0
Rel: item
Target: ./Forest_1993/Forest_1993.json
Media Type: application/json

0
Rel: item
Target: ./Residential_1254/Residential_1254.json
Media Type: application/json

0
Rel: item
Target: ./Pasture_1780/Pasture_1780.json
Media Type: application/json

0
Rel: item
Target: ./River_894/River_894.json
Media Type: application/json

0
Rel: item
Target: ./River_1534/River_1534.json
Media Type: application/json

0
Rel: item
Target: ./Forest_290/Forest_290.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_1993/AnnualCrop_1993.json
Media Type: application/json

0
Rel: item
Target: ./Residential_721/Residential_721.json
Media Type: application/json

0
Rel: item
Target: ./Pasture_503/Pasture_503.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_195/AnnualCrop_195.json
Media Type: application/json

0
Rel: item
Target: ./Residential_1478/Residential_1478.json
Media Type: application/json

0
Rel: item
Target: ./Highway_908/Highway_908.json
Media Type: application/json

0
Rel: item
Target: ./Industrial_1369/Industrial_1369.json
Media Type: application/json

0
Rel: item
Target: ./Highway_198/Highway_198.json
Media Type: application/json

0
Rel: item
Target: ./Residential_1127/Residential_1127.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_785/PermanentCrop_785.json
Media Type: application/json

0
Rel: item
Target: ./HerbaceousVegetation_2798/HerbaceousVegetation_2798.json
Media Type: application/json

0
Rel: item
Target: ./PermanentCrop_253/PermanentCrop_253.json
Media Type: application/json

0
Rel: item
Target: ./Highway_2030/Highway_2030.json
Media Type: application/json

0
Rel: item
Target: ./SeaLake_1090/SeaLake_1090.json
Media Type: application/json

0
Rel: item
Target: ./Residential_2604/Residential_2604.json
Media Type: application/json

0
Rel: item
Target: ./AnnualCrop_2633/AnnualCrop_2633.json
Media Type: application/json

0
Rel: item
Target: ./Forest_2780/Forest_2780.json
Media Type: application/json

0
Rel: item
Target: ./Highway_1707/Highway_1707.json
Media Type: application/json

0
Rel: self
Target: /home/juan/Desktop/eotdl/demos/STAC/eurosat-stac/sentinel2/collection.json
Media Type: application/json

0
Rel: parent
Target:
Media Type: application/json

0
Rel: self
Target: /home/juan/Desktop/eotdl/demos/STAC/eurosat-stac/catalog.json
Media Type: application/json

0
Rel: root
Target:
Media Type: application/json

0
Rel: child
Target:
Media Type: application/json
