This code will create a STAC catalog and add several items from a local folder

In [1]:
%pip install rasterio shapely pystac piexif

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Additional Metadata to add to items
platform = 'DJI Phantom 4 RTK'
license = 'CC-BY-SA-4.0'

# top-level catalog definitions
catalog_id = 'Cyverse Remotely Sensed Imagery STAC Catalog'
catalog_description = 'This catalog includes all of the imagery assets the exist in Cyverse Data Store'

# collection definitions
collection_id = 'Drone_Imagery_Collection' # needs to be folder-name compatible
collection_description = 'Drone_Imagery'

# source images folder
#source_image_folder = '/data-store/iplant/home/jgillan/STAC_drone'
source_image_folder = '/data-store/iplant/home/shared/bighorn_fire/Bighorn_Fire_Imagery/Catalina_Mnts_Drone_2021'

#commons_repo/curated/Gillan_Ecosphere_2021/raster_products/May_2019/15_g2_products'
              

# output folder (will overwrite existing files with matching output names)
stac_output_directory = '/data-store/iplant/home/jgillan/STAC_drone/stac_bighorn_fire'

# email variables - DON'T CHANGE these unless you know what you're doing
contact_email = 'jgillan@arizona.edu'
smtp_host = '128.196.254.80'

# input the date and time of original imagery acquisition
# dating assets will be a manual process. 
from datetime import datetime
year = 2019
month = 5
day = 25
hour = 12

# create default datetime object for the collection - used when files don't have the EXIF creation timestamp
default_datetime = datetime(year=2019, month=5, day=25, hour=12)

#data_type = 'uint8'
items_mission_description = 'The imagery was part of the Ecostate Mapping project of 2019 at Santa Rita Experimental Range'


In [3]:
from datetime import datetime
from pathlib import Path
import pystac
import os

# create the catalog instance
catalog = pystac.Catalog(
    id=catalog_id,
    description=catalog_description,
    stac_extensions=['https://stac-extensions.github.io/projection/v1.0.0/schema.json',
                    'https://stac-extensions.github.io/processing/v1.1.0/schema.json']
)

In [4]:
import rasterio

# function to truncate spatial resolution to 3 decimal places
def trun_n_d(num,n):
    num_s = str(num)
    if 'e' in num_s or 'E' in num_s:
        return '{0:.{1}f}'.format(num,n)
    i,p,d = num_s.partition('.')
    return '.'.join([i,(d+'0'*n)[:n]])

# function to get the spatial resolution of a raster. The imagery product has to have a projection, or else the gsd will be 0.00
def spatial_resolution(raster):
    """extracts the XY Pixel Size"""
    t = raster.transform
    x = t[0]
    y = -t[4]
    x_trunc = trun_n_d(x, 3)
    y_trunc = trun_n_d(y, 3)
    return x_trunc, y_trunc

In [5]:
from shapely.geometry import Polygon, mapping
import rasterio
import rasterio.warp
import rasterio.features

#This creates a function called 'get_bbox_and_footprint' 
def get_bbox_and_footprint(dataset):

    # create the bounding box it will depend if it comes from rasterio or rioxarray
    bounds = dataset.bounds
    
    bounds = rasterio.warp.transform_bounds(dataset.crs, 'EPSG:4326', 
                                            bounds.left, bounds.bottom, bounds.right, bounds.top)
    bounds = rasterio.coords.BoundingBox(bounds[0], bounds[1], bounds[2], bounds[3])
    
    #from rasterio.warp import transform_bounds, transform_geom
    #bounds = rasterio.warp.transform_geom(
         #   dataset.crs, 'EPSG:4326', bounds, precision=6)
    
    if isinstance(bounds, rasterio.coords.BoundingBox):
        bbox = [bounds.left, bounds.bottom, bounds.right, bounds.top]
    else:
        bbox = [float(f) for f in bounds()]

    # create the 4 corners of the footprint
    footprint = Polygon([
        [bbox[0], bbox[1]],#left bottom
        [bbox[0], bbox[3]],#left top
        [bbox[2], bbox[3]],#right top
        [bbox[2], bbox[1]] #right bottom
    ])

    return bbox, mapping(footprint)

In [6]:
#import piexif

#def exif_to_timestamp(file_path):
    # EXIF tags to look for
   # EXIF_ORIGIN_TIMESTAMP = 36867         # Capture timestamp
   # EXIF_TIMESTAMP_OFFSET = 36881         # Timestamp UTC offset (general)
   # EXIF_ORIGIN_TIMESTAMP_OFFSET = 36881  # Capture timestamp UTC offset

   # cur_stamp, cur_offset = (None, None)

    # try to load EXIF data
   # exif_tags = piexif.load(file_path)
   # if not exif_tags or "Exif" not in exif_tags:
   #     return None
    
   # def convert_and_clean_tag(value):
        # internal helper function for handling EXIF tag values
      #  if not value:
      #      return None

        # Convert bytes to string
       # if isinstance(value, bytes):
        #    value = value.decode('UTF-8').strip()
       # else:
        #    value = value.strip()

        # Check for an empty string after stripping colons
       # if value:
          #  if not value.replace(":", "").replace("+:", "").replace("-", "").strip():
          #      value = None

       # return None if not value else value

    # Process the EXIF data
   # if EXIF_ORIGIN_TIMESTAMP in exif_tags:
     #   cur_stamp = convert_and_clean_tag(exif_tags[EXIF_ORIGIN_TIMESTAMP])
    #if not cur_stamp:
     #   return None

    #if EXIF_ORIGIN_TIMESTAMP_OFFSET in exif_tags:
       # cur_offset = convert_and_clean_tag(exif_tags[EXIF_ORIGIN_TIMESTAMP_OFFSET])
   # if not cur_offset and EXIF_TIMESTAMP_OFFSET in exif_tags:
       # cur_offset = convert_and_clean_tag(exif_tags[EXIF_TIMESTAMP_OFFSET])

    # Format the string to a timestamp and return the result
    #try:
     #   if not cur_offset:
      #      cur_ts = datetime.datetime.fromisoformat(cur_stamp)
      #  else:
       #     cur_offset = cur_offset.replace(":", "")
        #    cur_ts = datetime.datetime.fromisoformat(cur_stamp + cur_offset)
  #  except Exception as ex:
      #  cur_ts = None
       # print("Exception caught converting EXIF tag to timestamp: %s", str(ex))

    #return cur_ts


In [7]:
from pystac.extensions.projection import ProjectionExtension
from pystac.extensions.raster import RasterExtension

# get a list of geospatial files (in this case located in Cyverse data store)
folder = Path(source_image_folder)
files = list(folder.rglob('*.tif'))#find files that end with .tif

# loop through each .tif item in the folder and do several things
all_items = []
for file in files:

    # open the individual file with rasterio
    ds = rasterio.open(file)

    # apply the function to get the bounding box (left, bottom, right, top) and make a footprint rectangle
    bbox, footprint = get_bbox_and_footprint(ds)

    # extract the spatial resolution (gsd) of the image product using the function 'spatial_resolution'.
    x_res,y_res = spatial_resolution(ds)

    # the ID (name) for each indivual file
    idx = file.stem
    
    # get the timestamp to use - try to load capture date from file, otherwise use the start timestamp
    #file_datetime = exif_to_timestamp(str(file))
   # if not file_datetime:
    file_datetime = default_datetime
       # print(f"Using the default timestamp for image {file}")

    # create a STAC item for each individual file 
    item = pystac.Item(id=idx,
                 geometry=footprint,
                 bbox=bbox,
                 datetime=file_datetime,
                 stac_extensions=['https://stac-extensions.github.io/projection/v1.0.0/schema.json',
                                 'https://stac-extensions.github.io/processing/v1.1.0/schema.json'],
                 properties={'gsd': x_res,
                            'platform': platform,
                            'license': license,
                            'mission': items_mission_description})

    # adding the projection extension to each item 
    asset_ext = ProjectionExtension.add_to(item)
    ProjectionExtension.ext(item).apply(ds.crs.to_epsg(),
                                        shape = ds.shape,
                                        bbox = bbox,
                                        geometry = footprint)
                                        #transform = [float(getattr(ds.transform, letter)) for letter in 'abcdef']
                                        #)

    #raster_ext = RasterExtension.add_to(item)
   # ItemRasterExtension.ext(item).apply(datatype = data_type)

    # add the asset to each STAC item                      
    item.add_asset(
        key='geotiff',
        asset=pystac.Asset(
            href=file.as_posix(),
            media_type=pystac.MediaType.GEOTIFF,
            extra_fields=asset_ext
        )
    )

    # add each STAC item to the list
    all_items.append(item)    
    
   
print("Finished collecting items")

Finished collecting items


In [8]:
# the geographic extent of all the items added
item_extents = pystac.Extent.from_items(all_items)

# creating the collection
collection = pystac.Collection(id=collection_id,
                               description=collection_description,
                               extent=item_extents,
                               license=license)

# add all STAC item to the STAC collection
for item in all_items:
    collection.add_item(item)
    
catalog.add_child(collection)

In [9]:
# print the number of STAC items that were added to the STAC catalog    
print(len(list(collection.get_items())))

# describe the items in the STAC catalog
catalog.describe()

10
* <Catalog id=Cyverse Remotely Sensed Imagery STAC Catalog>
    * <Collection id=Drone_Imagery_Collection>
      * <Item id=bighorn_c12_DTM>
      * <Item id=bighorn_c22_DSM>
      * <Item id=bighorn_c22_DTM>
      * <Item id=bighorn_c23_DSM>
      * <Item id=bighorn_c23_DTM>
      * <Item id=bighorn_m13_DSM>
      * <Item id=bighorn_m13_DTM>
      * <Item id=bighorn_m13_ortho>
      * <Item id=bighorn_p3_DSM>
      * <Item id=bighorn_p3_DTM>


In [10]:
# write the STAC catalog out
catalog.normalize_hrefs(stac_output_directory)

catalog.make_all_asset_hrefs_relative()
catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)

To make the saved catalog available, you will need to:
-  share the output folder to \"public\" using the [Discovery Environment](https://de.cyverse.org/) Data tab
- add your catalog to the CyVerse [master catalog](/iplant/home/jgillan/stac.cyverse.org/cyverse_stac_catalog/catalog.json)


In [11]:
#%pip install python-irodsclient

The following will share the folder containing your catalog with the CyVerse `public` user using read-only permissions

You will be prompted to enter your CyVerse username and password. Be sure to use the *Enter* key for each prompt

In [12]:
#import os
#from getpass import getpass
#from irods.access import iRODSAccess
#from irods.session import iRODSSession

#irods_username = input('Enter your CyVerse user name:')
#irods_password = getpass('Enter your CyVerse password:')

#sess = iRODSSession(host='data.cyverse.org', port=1247, user=irods_username, password=irods_password, zone='iplant')

#user = sess.users.get(sess.username, sess.zone)

# add the needed users for linking
#acl_path = '/' + os.path.join(*(stac_output_directory.split(os.path.sep)[2:]))
#acl = iRODSAccess('read', acl_path, 'anonymous', user.zone)
#sess.acls.set(acl, recursive=True)

#sess.cleanup()

#print("Folder updated")

**Almost Done!**

To have a *newly created* catalog added to the global CyVerse catalog, update `your_email` using your email address
and send an email using the following:

In [13]:
#your_email = '<someone>@arizona.edu'

# Import smtplib for the actual sending function
#import smtplib

# Import the email modules we'll need
#from email.mime.text import MIMEText

#msg = MIMEText("Please add the following STAC catalog to the main CyVerse catalog: " + 
           #    "https://data.cyverse.org/dav-anon/" + os.path.join(*(stac_output_directory.split(os.path.sep)[2:])) +
             #  "/catalog.json")
#msg['Subject'] = 'Add new STAC catalog to main CyVerse catalog'
#msg['From'] = your_email
#msg['To'] = contact_email

# Send the message via our own SMTP server, but don't include the
# envelope header.
#smtp = smtplib.SMTP(smtp_host)
#smtp.sendmail(your_email, [contact_email], msg.as_string())
#smtp.quit()


The following is experimental, trying to creat multiple polygons to display on the STAC browser collection

In [None]:
import pystac
import json

catalog = pystac.Catalog.from_file('/data-store/iplant/home/jgillan/STAC_drone/stac_bighorn_fire/catalog.json')

footprints = []
for item in catalog.get_all_items():
    footprint = item.geometry
    footprints.append(footprint)

geojson = {
    "type": "FeatureCollection",
    "features": [
        {
            "type": "Feature",
            "geometry": footprint,
            "properties": {}
        }
        for footprint in footprints
    ]
}


In [15]:
with open('/data-store/iplant/home/jgillan/STAC_drone/stac_bighorn_fire/Drone_Imagery_Collection/collection.json', 'r') as f:
    collection_data = json.load(f)

collection_data['individual_footprints'] = geojson

with open('/data-store/iplant/home/jgillan/STAC_drone/stac_bighorn_fire/Drone_Imagery_Collection/collection.json', 'w') as f:
    json.dump(collection_data, f, indent=2)
