# An Examination of the Completeness of Sentinel S1A Collection on Planetary Computer
In response 

In [23]:
import pystac
import planetary_computer
import rioxarray
import requests
import xml.etree.ElementTree as ET

item_url = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1A_IW_GRDH_1SDV_20240903T142512_20240903T142537_055502_06C59B"



In [3]:

# Load the individual item metadata and sign the assets
item = pystac.Item.from_file(item_url)
signed_item = planetary_computer.sign(item)


In [27]:
id = 'https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2024/12/12/IW/DV/S1A_IW_GRDH_1SDV_20241212T211455_20241212T211520_056964_06FF7C_FFEB'
product_info = requests.get(planetary_computer.sign(id + '/productInfo.json')).json()

filename_map = product_info.get('filenameMap', {})
print(f"{'Key':<50} {'Value':<50}")
print("-" * 100)
for key, value in filename_map.items():
    print(f"{key:<50} {value:<50}")

Key                                                Value                                             
----------------------------------------------------------------------------------------------------
manifest.safe                                      manifest.safe                                     
preview/icons/logo.png                             preview/icons/logo.png                            
preview/map-overlay.kml                            preview/map-overlay.kml                           
preview/product-preview.html                       preview/product-preview.html                      
preview/quick-look.png                             preview/quick-look.png                            
support/s1-level-1-calibration.xsd                 support/s1-level-1-calibration.xsd                
support/s1-level-1-measurement.xsd                 support/s1-level-1-measurement.xsd                
support/s1-level-1-noise.xsd                       support/s1-level-1-noise.xsd    

In [18]:
import os
assets = signed_item.assets.keys()
print(assets)
asset_table = [(asset_key, signed_item.assets[asset_key].href) for asset_key in assets]

# Find the common base URL
# Filter asset_table to only include 'vh' and 'vv' assets
filtered_asset_table = [(asset_key, href) for asset_key, href in asset_table if asset_key in ['vh', 'vv', 'thumbnail']]

# Find the common base URL for the filtered assets
common_url = os.path.commonprefix([href for _, href in filtered_asset_table])
print(f"Common URL: {common_url}")

# Find the common suffix for the filtered assets
common_suffix = os.path.commonprefix([href[::-1] for _, href in filtered_asset_table])[::-1]
print(f"Common Suffix: {common_suffix}")

# Replace the full length HREF with the unique relative URL for each asset
asset_table = [(asset_key, href.replace(common_url, '').replace(common_suffix, '')) for asset_key, href in asset_table]



for asset_name, href in asset_table:
    print(f"Asset Name: {asset_name}, HREF: {href}")



dict_keys(['vh', 'vv', 'thumbnail', 'safe-manifest', 'schema-noise-vh', 'schema-noise-vv', 'schema-product-vh', 'schema-product-vv', 'schema-calibration-vh', 'schema-calibration-vv', 'tilejson', 'rendered_preview'])
Common URL: https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2024/9/3/IW/DV/S1A_IW_GRDH_1SDV_20240903T142512_20240903T142537_055502_06C59B_9EC1/
Common Suffix: ?st=2025-01-05T16%3A44%3A40Z&se=2025-01-06T17%3A29%3A40Z&sp=rl&sv=2024-05-04&sr=c&skoid=9c8ff44a-6a2c-4dfb-b298-1c9212f64d9a&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2025-01-06T16%3A36%3A02Z&ske=2025-01-13T16%3A36%3A02Z&sks=b&skv=2024-05-04&sig=FcngNnAMCs0744cvtXkoQGRzaIKos9oigALYw5FFgCg%3D
Asset Name: vh, HREF: measurement/iw-vh.tiff
Asset Name: vv, HREF: measurement/iw-vv.tiff
Asset Name: thumbnail, HREF: preview/quick-look.png
Asset Name: safe-manifest, HREF: manifest.safe
Asset Name: schema-noise-vh, HREF: annotation/calibration/noise-iw-vh.xml
Asset Name: schema-noise-vv, HREF: annotation/calibrati

In [28]:
product_info = requests.get(planetary_computer.sign(common_url + 'productInfo.json')).json()
#print(product_info)
filename_map = product_info.get('filenameMap', {})
print(f"{'Key':<50} {'Value':<50}")
print("-" * 100)
for key, value in filename_map.items():
    print(f"{key:<50} {value:<50}")

Key                                                Value                                             
----------------------------------------------------------------------------------------------------
manifest.safe                                      manifest.safe                                     
preview/icons/logo.png                             preview/icons/logo.png                            
preview/map-overlay.kml                            preview/map-overlay.kml                           
preview/product-preview.html                       preview/product-preview.html                      
preview/quick-look.png                             preview/quick-look.png                            
support/s1-level-1-calibration.xsd                 support/s1-level-1-calibration.xsd                
support/s1-level-1-measurement.xsd                 support/s1-level-1-measurement.xsd                
support/s1-level-1-noise.xsd                       support/s1-level-1-noise.xsd    

In [12]:


# Get the href for the 'safe-manifest' asset
safe_manifest_href = signed_item.assets['safe-manifest'].href

# Download the XML content
response = requests.get(safe_manifest_href)
xml_content = response.content

# Parse and print the XML content
root = ET.fromstring(xml_content)
#ET.dump(root)


In [29]:
# Extract dataObjectSection
data_object_section = root.find("dataObjectSection")

# Prepare data for tabular display
data_objects = []
for data_object in data_object_section.findall("dataObject"):
    rep_id = data_object.get("repID")
    byte_stream = data_object.find("byteStream")
    href = byte_stream.find("fileLocation").get("href")
    size = byte_stream.get("size")
    data_objects.append((rep_id, href, size))

# Print the data in tabular format
print(f"{'repID':<30} {'href':<80} {'size':<10}")
print("-" * 120)
for rep_id, href, size in data_objects:
    print(f"{rep_id:<30} {href:<80} {size:<10}")

repID                          href                                                                             size      
------------------------------------------------------------------------------------------------------------------------
s1Level1ProductSchema          ./annotation/s1a-iw-grd-vh-20240903t142512-20240903t142537-055502-06c59b-002.xml 1758469   
s1Level1NoiseSchema            ./annotation/calibration/noise-s1a-iw-grd-vh-20240903t142512-20240903t142537-055502-06c59b-002.xml 426270    
s1Level1RfiSchema              ./annotation/rfi/rfi-s1a-iw-grd-vh-20240903t142512-20240903t142537-055502-06c59b-002.xml 43718     
s1Level1CalibrationSchema      ./annotation/calibration/calibration-s1a-iw-grd-vh-20240903t142512-20240903t142537-055502-06c59b-002.xml 1014835   
s1Level1ProductSchema          ./annotation/s1a-iw-grd-vv-20240903t142512-20240903t142537-055502-06c59b-001.xml 1758492   
s1Level1NoiseSchema            ./annotation/calibration/noise-s1a-iw-grd-vv-20240903t142512

In [30]:
# Augment the data_objects table with values from the product_info JSON
augmented_data_objects = []
for rep_id, href, size in data_objects:
    key = href.replace('./', '')
    value = filename_map.get(key, 'N/A')
    augmented_data_objects.append((rep_id, href, size, value))

# Print the augmented data in tabular format
print(f"{'repID':<30} {'href':<80} {'size':<10} {'value':<50}")
print("-" * 170)
for rep_id, href, size, value in augmented_data_objects:
    print(f"{rep_id:<30} {href:<80} {size:<10} {value:<50}")

repID                          href                                                                             size       value                                             
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
s1Level1ProductSchema          ./annotation/s1a-iw-grd-vh-20240903t142512-20240903t142537-055502-06c59b-002.xml 1758469    annotation/iw-vh.xml                              
s1Level1NoiseSchema            ./annotation/calibration/noise-s1a-iw-grd-vh-20240903t142512-20240903t142537-055502-06c59b-002.xml 426270     annotation/calibration/noise-iw-vh.xml            
s1Level1RfiSchema              ./annotation/rfi/rfi-s1a-iw-grd-vh-20240903t142512-20240903t142537-055502-06c59b-002.xml 43718      annotation/rfi/rfi-iw-vh.xml                      
s1Level1CalibrationSchema      ./annotation/calibration/calibration-s1a-iw-grd-vh-20240903t142512-20240903t

In [37]:
# Create a set of hrefs from the asset_table for quick lookup
asset_hrefs = set(href for _, href in asset_table)

# Cross-reference and prepare the new table
cross_reference_table = []
for rep_id, href, size, value in augmented_data_objects:
    exists = 'Included' if value in asset_hrefs else 'Not Included'
    cross_reference_table.append((rep_id, href, size, value, exists))

# Print the cross-referenced table
print(f"{'safe-manifest repID':<30} {'safe-manifest href':<110} {'filesize':<10} {'pc href':<50} {'stac item asset':<15}")
print("-" * 225)
for rep_id, href, size, value, exists in cross_reference_table:
    print(f"{rep_id:<30} {href:<110} {size:<10} {value:<50} {exists:<15}")

safe-manifest repID            safe-manifest href                                                                                             filesize   pc href                                            stac item asset
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
s1Level1ProductSchema          ./annotation/s1a-iw-grd-vh-20240903t142512-20240903t142537-055502-06c59b-002.xml                               1758469    annotation/iw-vh.xml                               Not Included   
s1Level1NoiseSchema            ./annotation/calibration/noise-s1a-iw-grd-vh-20240903t142512-20240903t142537-055502-06c59b-002.xml             426270     annotation/calibration/noise-iw-vh.xml             Included       
s1Level1RfiSchema              ./annotation/rfi/rfi-s1a-iw-grd-vh-20240903t142512-20240903t142537-055502-06c59b-00

In [None]:

# Open one of the data assets (other asset keys to use: 'vv')
asset_href = signed_item.assets["vh"].href
ds = rioxarray.open_rasterio(asset_href)
ds
