In [1]:
# Usage
# To run this notebook, you need to set up your AWS credentials in the terminal.
# You can do this by exporting the required environment variables as follows:
#
# export AWS_ACCESS_KEY_ID='your-access-key-id'
# export AWS_SECRET_ACCESS_KEY='your-secret-access-key'
# export AWS_DEFAULT_REGION='your-aws-region'
#
# These environment variables are necessary for accessing AWS services.
# After setting up the environment variables, you can run the notebook cells as usual.

In [2]:
!pip install boto3 requests pystac rasterio pandas tabulate IPython


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [3]:
# Test Capella

In [4]:
# One off test of file:
# test_url = "https://capella-open-data.s3.amazonaws.com/data/2023/12/24/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359.tif"

import boto3
import requests
import rasterio
from rasterio.session import AWSSession
from rasterio.env import Env
import os

# Set AWS credentials from environment variables
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
aws_default_region = os.getenv('AWS_DEFAULT_REGION')

# Create a boto3 session
session = boto3.Session(
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    region_name=aws_default_region
)

# Function to check if a given URL is accessible
def check_url(url):
    response = requests.head(url)
    return response.status_code == 200

# Function to check if a given GeoTIFF is readable
def is_readable(asset_url):
    try:
        with Env(AWSSession(session)):
            with rasterio.open(asset_url) as src:
                print(f"Successfully opened asset: {asset_url}")
                print(f"Asset profile: {src.profile}")
                return True
    except rasterio.errors.RasterioIOError as e:
        print(f"Error opening asset: {e}")
        return False

# Known accessible URL for testing
test_url = "https://capella-open-data.s3.amazonaws.com/data/2023/12/24/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359.tif"

print("Testing known accessible URL:")
if check_url(test_url):
    if is_readable(test_url):
        print("The test URL is readable.")
    else:
        print("The test URL is NOT readable.")
else:
    print("The test URL is not accessible.")


Testing known accessible URL:
Successfully opened asset: https://capella-open-data.s3.amazonaws.com/data/2023/12/24/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359.tif
Asset profile: {'driver': 'GTiff', 'dtype': 'uint16', 'nodata': 0.0, 'width': 27332, 'height': 22237, 'count': 1, 'crs': CRS.from_epsg(32760), 'transform': Affine(0.7940040041547718, 0.0, 291678.28181944776,
       0.0, -0.7940040041547718, 5926175.3821243895), 'blockxsize': 512, 'blockysize': 512, 'tiled': True, 'compress': 'deflate', 'interleave': 'band'}
The test URL is readable.


In [5]:
# Test Capella STAC catalog URL
capella_stac_url = "https://capella-open-data.s3.us-west-2.amazonaws.com/stac/capella-open-data-by-datetime/capella-open-data-2023/capella-open-data-2023-12/capella-open-data-2023-12-24/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359.json"

import boto3
import requests
import pystac
import rasterio
from rasterio.session import AWSSession
from rasterio.env import Env
import json
import os
import pandas as pd
from IPython.display import display, HTML

# Set AWS credentials from environment variables
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
aws_default_region = os.getenv('AWS_DEFAULT_REGION')

# Create a boto3 session
session = boto3.Session(
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    region_name=aws_default_region
)

# Function to check if a given URL is accessible
def check_url(url):
    response = requests.head(url)
    return response.status_code == 200

# Function to check if a given asset is a readable GeoTIFF or PNG
def is_readable_raster(asset_url):
    try:
        with Env(AWSSession(session)):
            with rasterio.open(asset_url) as src:
                print(f"Successfully opened asset: {asset_url}")
                print(f"Asset profile: {src.profile}")
                return True, src.profile
    except rasterio.errors.RasterioIOError as e:
        print(f"Error opening asset: {e}")
        return False, None

# Function to check if a given asset is a readable JSON
def is_readable_json(asset_url):
    try:
        response = requests.get(asset_url)
        response.raise_for_status()  # Raise an error for bad responses
        json_data = response.json()
        print(f"Successfully opened JSON asset: {asset_url}")
        return True, json_data
    except requests.exceptions.RequestException as e:
        print(f"Error opening JSON asset: {e}")
        return False, None

# Function to test a STAC item
def test_stac(stac_url):
    log = []
    summary = []
    stac_item = pystac.Item.from_file(stac_url)
    for asset_key, asset in stac_item.assets.items():
        asset_href = asset.href  # Use the absolute URL directly from the JSON
        print(f"Checking asset: {asset_key} -> {asset_href}")
        if check_url(asset_href):
            if asset_href.endswith('.tif') or asset_href.endswith('.png'):
                readable, profile = is_readable_raster(asset_href)
                log.append(f"Asset profile: {profile}" if readable else "Asset is NOT readable.")
                if readable and asset_key == "thumbnail":
                    summary.append((asset_key, f'<a href="{asset_href}" target="_blank">{asset_href}</a>', "Readable", profile['driver'], f'<img src="{asset_href}" width="100"/>'))
                else:
                    summary.append((asset_key, f'<a href="{asset_href}" target="_blank">{asset_href}</a>', "Readable" if readable else "Not Readable", profile['driver'] if readable else "Unknown", ""))
            elif asset_href.endswith('.json'):
                readable, json_data = is_readable_json(asset_href)
                log.append(f"JSON content: {json_data}" if readable else "JSON is NOT readable.")
                summary.append((asset_key, f'<a href="{asset_href}" target="_blank">{asset_href}</a>', "Readable" if readable else "Not Readable", "JSON", ""))
            else:
                log.append(f"Asset {asset_key} is of an unsupported format.")
                summary.append((asset_key, f'<a href="{asset_href}" target="_blank">{asset_href}</a>', "Unsupported Format", "Unknown", ""))
        else:
            log.append(f"Asset {asset_key} is not accessible.")
            summary.append((asset_key, f'<a href="{asset_href}" target="_blank">{asset_href}</a>', "Not Accessible", "Unknown", ""))
    return log, summary

# Define the Capella STAC catalog URL
capella_stac_url = "https://capella-open-data.s3.us-west-2.amazonaws.com/stac/capella-open-data-by-datetime/capella-open-data-2023/capella-open-data-2023-12/capella-open-data-2023-12-24/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359.json"

# Test Capella STAC
print("Testing Capella STAC:")
log, summary = test_stac(capella_stac_url)

# Print log
print("\nLog:")
for entry in log:
    print(entry)

# Create summary DataFrame
summary_df = pd.DataFrame(summary, columns=["Asset Name", "URL", "Status", "Format", "Thumbnail"])

# Display summary table as HTML
html_table = summary_df.to_html(escape=False)
display(HTML(html_table))


Testing Capella STAC:
Checking asset: HH -> https://capella-open-data.s3.amazonaws.com/data/2023/12/24/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359.tif
Successfully opened asset: https://capella-open-data.s3.amazonaws.com/data/2023/12/24/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359.tif
Asset profile: {'driver': 'GTiff', 'dtype': 'uint16', 'nodata': 0.0, 'width': 27332, 'height': 22237, 'count': 1, 'crs': CRS.from_epsg(32760), 'transform': Affine(0.7940040041547718, 0.0, 291678.28181944776,
       0.0, -0.7940040041547718, 5926175.3821243895), 'blockxsize': 512, 'blockysize': 512, 'tiled': True, 'compress': 'deflate', 'interleave': 'band'}
Checking asset: metadata -> https://capella-open-data.s3.amazonaws.com/data/2023/12/24/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359_extended.json
Successfully opened JSON asset

  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


Unnamed: 0,Asset Name,URL,Status,Format,Thumbnail
0,HH,https://capella-open-data.s3.amazonaws.com/data/2023/12/24/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359.tif,Readable,GTiff,
1,metadata,https://capella-open-data.s3.amazonaws.com/data/2023/12/24/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359_extended.json,Readable,JSON,
2,preview,https://capella-open-data.s3.amazonaws.com/data/2023/12/24/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359_preview.tif,Readable,GTiff,
3,thumbnail,https://capella-open-data.s3.amazonaws.com/data/2023/12/24/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359/CAPELLA_C09_SM_GEO_HH_20231224140355_20231224140359_thumb.png,Readable,PNG,


In [6]:
# Test Umbra - TBA
# https://registry.opendata.aws/umbra-open-data/