##### Source 
https://registry.opendata.aws/umbra-open-data/

##### Description
Umbra Spotlight collects including GEC, SICD, SIDD, CPHD data and metadata

In [37]:
import s3fs
import os
import json
import pandas as pd

In [9]:
bucket_name = 's3://umbra-open-data-catalog/'

In [13]:
# list data from Umbra S3 bucket
def listFilesInBucket(bucket):
    fs = s3fs.S3FileSystem(anon = True)
    
    files = fs.ls(bucket)
    return list(files)

In [14]:
print(listFilesInBucket(bucket_name))

['umbra-open-data-catalog/', 'umbra-open-data-catalog/error.html', 'umbra-open-data-catalog/index.html', 'umbra-open-data-catalog/list.js', 'umbra-open-data-catalog/sar-data', 'umbra-open-data-catalog/stac', 'umbra-open-data-catalog/verified.txt']


In [24]:
# list sar_data
for i in listFilesInBucket(f"{bucket_name}/sar-data/tasks/Kalgoorlie Super Pit, Australia/0015903b-453a-4d06-a91f-554408a5f4b7/2025-06-16-14-58-04_UMBRA-08"):
    print(i)

umbra-open-data-catalog/sar-data/tasks/Kalgoorlie Super Pit, Australia/0015903b-453a-4d06-a91f-554408a5f4b7/2025-06-16-14-58-04_UMBRA-08/2025-06-16-14-58-04_UMBRA-08.stac.v2.json
umbra-open-data-catalog/sar-data/tasks/Kalgoorlie Super Pit, Australia/0015903b-453a-4d06-a91f-554408a5f4b7/2025-06-16-14-58-04_UMBRA-08/2025-06-16-14-58-04_UMBRA-08_GEC.tif
umbra-open-data-catalog/sar-data/tasks/Kalgoorlie Super Pit, Australia/0015903b-453a-4d06-a91f-554408a5f4b7/2025-06-16-14-58-04_UMBRA-08/2025-06-16-14-58-04_UMBRA-08_SICD.nitf
umbra-open-data-catalog/sar-data/tasks/Kalgoorlie Super Pit, Australia/0015903b-453a-4d06-a91f-554408a5f4b7/2025-06-16-14-58-04_UMBRA-08/2025-06-16-14-58-04_UMBRA-08_SIDD.nitf


In [28]:
my_path = 'sar-data/tasks/Kalgoorlie Super Pit, Australia/0015903b-453a-4d06-a91f-554408a5f4b7/2025-06-16-14-58-04_UMBRA-08'

def createFolder(path):
    # get current directory
    cwd = os.getcwd()
    
    # create directory if it does not exists
    try:
        os.makedirs(f"{cwd}/{path}")
    except OSError:
        pass

In [29]:
createFolder(my_path)

In [31]:
s3_prefix = "s3://umbra-open-data-catalog/sar-data/tasks/Kalgoorlie Super Pit, Australia/"
fs = s3fs.S3FileSystem(anon=True) 

In [38]:
# get stac keys
def getSARStacKeys(s3_prefix):
    stac_keys = [p for p in fs.glob(f"{s3_prefix}**/*.stac.v2.json")]
    
    return stac_keys

In [39]:
# extract timestamp and file path
def umbraSARToDataframe(keys):
    rows = []

    for key in stac_keys:
        with fs.open(key, "rb") as f:
            item = json.load(f)
        
        dt = item.get("properties", {}).get("datetime")
        print(dt)
    
        if dt:
            ts = pd.to_datetime(dt)
        else:
            # fallback: .../2025-06-16-14-58-04_UMBRA-08/...
            m = re.search(r"/(\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2})_", key)
            ts = pd.to_datetime(m.group(1).replace("-", ":"), format="%Y:%m:%d:%H:%M:%S") if m else None
        
        assets = item.get("assets", {})
        # Sometimes assets are not listed completely - then we will restore them from neighbors by name
        base_dir = "/".join(key.split("/")[:-1])
        gec = assets.get("geotiff") or assets.get("GEC") or f"{base_dir}/{base_dir.split('/')[-1]}_GEC.tif"
        sicd = assets.get("SICD") or f"{base_dir}/{base_dir.split('/')[-1]}_SICD.nitf"
        sidd = assets.get("SIDD") or f"{base_dir}/{base_dir.split('/')[-1]}_SIDD.nitf"

        rows.append({"datetime": ts, "stac": f"s3://{key}", "gec": gec if str(gec).startswith("s3://") else f"s3://{gec}",
                 "sicd": sicd if str(sicd).startswith("s3://") else f"s3://{sidd}"})

    dataframe = pd.DataFrame(rows).dropna(subset=["datetime"]).sort_values("datetime").reset_index(drop=True)
    
    return dataframe

In [40]:
sar_keys = getSARStacKeys(s3_prefix)
df = umbraSARToDataframe(sar_keys)

df.head(5)

2025-06-16T14:58:06.300000Z
2025-04-16T14:54:10.600000Z
2025-01-23T00:41:45.299998Z
2025-08-16T02:13:43.900000Z
2025-05-25T15:06:46.400000Z
2025-06-10T14:55:15.200000Z
2025-07-29T00:45:21.400000Z
2025-02-11T01:58:00.600000Z
2025-05-20T02:06:42.500000Z
2025-05-15T02:16:56.200000Z
2025-02-07T02:18:20.600000Z
2025-02-20T00:57:09.199999Z
2025-05-15T02:16:43.400000Z
2025-07-24T01:57:11.300000Z
2025-02-16T00:59:11.299999Z
2025-05-07T14:31:35.200000Z
2025-07-22T14:30:57.500000Z
2025-06-27T00:47:07.199999Z
2025-02-04T00:56:35.199999Z
2025-05-02T02:18:02.400000Z
2025-03-28T02:17:09.400000Z
2025-01-29T02:13:48.300000Z
2025-03-05T02:17:09.500000Z
2025-03-09T02:17:57.500000Z
2025-07-04T01:08:35.100000Z
2025-01-09T14:33:39.800000Z
2025-07-09T02:19:44.300000Z
2025-05-10T02:39:37.600000Z
2025-01-21T01:50:30.300000Z
2025-06-06T02:20:37.400000Z
2025-04-10T00:57:11.500000Z


Unnamed: 0,datetime,stac,gec,sicd
0,2025-01-09 14:33:39.800000+00:00,s3://umbra-open-data-catalog/sar-data/tasks/Ka...,s3://umbra-open-data-catalog/sar-data/tasks/Ka...,s3://umbra-open-data-catalog/sar-data/tasks/Ka...
1,2025-01-21 01:50:30.300000+00:00,s3://umbra-open-data-catalog/sar-data/tasks/Ka...,s3://umbra-open-data-catalog/sar-data/tasks/Ka...,s3://umbra-open-data-catalog/sar-data/tasks/Ka...
2,2025-01-23 00:41:45.299998+00:00,s3://umbra-open-data-catalog/sar-data/tasks/Ka...,s3://umbra-open-data-catalog/sar-data/tasks/Ka...,s3://umbra-open-data-catalog/sar-data/tasks/Ka...
3,2025-01-29 02:13:48.300000+00:00,s3://umbra-open-data-catalog/sar-data/tasks/Ka...,s3://umbra-open-data-catalog/sar-data/tasks/Ka...,s3://umbra-open-data-catalog/sar-data/tasks/Ka...
4,2025-02-04 00:56:35.199999+00:00,s3://umbra-open-data-catalog/sar-data/tasks/Ka...,s3://umbra-open-data-catalog/sar-data/tasks/Ka...,s3://umbra-open-data-catalog/sar-data/tasks/Ka...
