from : https://github.com/bopen/xarray-sentinel/tree/main

In [1]:
import geopandas as gpd 
import numpy as np 
import os
import matplotlib.pyplot as plt 
import rasterio as rio 
import rioxarray as rioxr
import glob 
import xarray as xr
import folium
from shapely.geometry import box

from pathlib import Path
from typing import Tuple, Dict, Any, List, Optional
import fsspec
from xml.etree import ElementTree
import re

In [10]:
import src.config as cfg

In [17]:
raster_path = str(Path(cfg.ConfigS1.s1_dir) / "S1B_IW_GRDH_1SDV_20170425T214234_20170425T214302_005323_00953E_F02D.SAFE")

In [18]:
# ?
SENTINEL1_NAMESPACES = {
    "safe": "http://www.esa.int/safe/sentinel-1.0",
    "s1": "http://www.esa.int/safe/sentinel-1.0/sentinel-1",
    "s1sarl1": "http://www.esa.int/safe/sentinel-1.0/sentinel-1/sar/level-1",
}
def findtext(
    tree: ElementTree.Element,
    query: str,
    namespaces: Dict[str, str] = SENTINEL1_NAMESPACES,
) -> str:
    value = tree.findtext(query, namespaces=namespaces)
    if value is None:
        raise ValueError(f"{query=} returned None")
    return value


def findall(
    tree: ElementTree.Element,
    query: str,
    namespaces: Dict[str, str] = SENTINEL1_NAMESPACES,
) -> List[str]:
    tags = tree.findall(query, namespaces=namespaces)
    values: List[str] = []
    for tag in tags:
        if tag.text is None:
            raise ValueError(f"{query=} returned None")
        values.append(tag.text)
    return values

def parse_annotation_filename(name: str) -> Tuple[str, str, str, str]:
    match = re.match(
        r"([a-z-]*)s1[ab]-([^-]*)-[^-]*-([^-]*)-([\dt]*)-", os.path.basename(name)
    )
    if match is None:
        raise ValueError(f"cannot parse name {name!r}")
    return tuple(match.groups())  # type: ignore

In [19]:
def get_fs_path(
    urlpath_or_path,
    fs: Optional[fsspec.AbstractFileSystem] = None,
    storage_options: Optional[Dict[str, Any]] = None,
) -> Tuple[fsspec.AbstractFileSystem, str]:
    if fs is not None and storage_options is not None:
        raise TypeError("only one of 'fs' and 'storage_options' can be not None")

    if fs is None:
        fs, _, paths = fsspec.get_fs_token_paths(
            urlpath_or_path, storage_options=storage_options
        )
        if len(paths) == 0:
            raise ValueError(f"file or object not found {urlpath_or_path!r}")
        elif len(paths) > 1:
            raise ValueError(f"multiple files or objects found {urlpath_or_path!r}")
        path = paths[0]
    else:
        path = str(urlpath_or_path)

    if fs.isdir(path):
        path = os.path.join(path, "manifest.safe")

    return fs, path

In [20]:
product_urlpath= raster_path[:]
fs= None
storage_options= None
fs, manifest_path = get_fs_path(product_urlpath, fs, storage_options)


In [21]:
fs

<fsspec.implementations.local.LocalFileSystem at 0x7f6db044e370>

In [22]:
manifest_path

'/home/rustt/Documents/Projects/S1_S2_classification/sentinel_classification/data/raw/S1/zip/S1B_IW_GRDH_1SDV_20170425T214234_20170425T214302_005323_00953E_F02D.SAFE/manifest.safe'

In [23]:
def parse_manifest_sentinel1(
    manifest_path,
) -> Tuple[Dict[str, Any], Dict[str, Tuple[str, str, str, str, str]]]:
    # We use ElementTree because we didn't find a XSD definition for the manifest
    manifest = ElementTree.parse(manifest_path).getroot()

    family_name = findtext(manifest, ".//safe:platform/safe:familyName")
    if family_name != "SENTINEL-1":
        raise ValueError(f"{family_name=} not supported")

    number = findtext(manifest, ".//safe:platform/safe:number")
    mode = findtext(manifest, ".//s1sarl1:instrumentMode/s1sarl1:mode")
    swaths = findall(manifest, ".//s1sarl1:instrumentMode/s1sarl1:swath")

    orbit_number = findall(manifest, ".//safe:orbitNumber")
    if len(orbit_number) != 2 or orbit_number[0] != orbit_number[1]:
        raise ValueError(f"{orbit_number=} not supported")

    relative_orbit_number = findall(manifest, ".//safe:relativeOrbitNumber")
    if (
        len(relative_orbit_number) != 2
        or relative_orbit_number[0] != relative_orbit_number[1]
    ):
        raise ValueError(f"{relative_orbit_number=} not supported")

    orbit_pass = findtext(manifest, ".//s1:pass")
    if orbit_pass not in {"ASCENDING", "DESCENDING"}:
        raise ValueError(f"pass={orbit_pass} not supported")

    ascending_node_time = findtext(manifest, ".//s1:ascendingNodeTime")

    transmitter_receiver_polarisations = findall(
        manifest, ".//s1sarl1:transmitterReceiverPolarisation"
    )
    product_type = findtext(manifest, ".//s1sarl1:productType")

    start_time = findtext(manifest, ".//safe:startTime")
    stop_time = findtext(manifest, ".//safe:stopTime")

    attributes = {
        "family_name": family_name,
        "number": number,
        "mode": mode,
        "swaths": swaths,
        "orbit_number": int(orbit_number[0]),
        "relative_orbit_number": int(relative_orbit_number[0]),
        "pass": orbit_pass,
        "ascending_node_time": ascending_node_time,
        "transmitter_receiver_polarisations": transmitter_receiver_polarisations,
        "product_type": product_type,
        "start_time": start_time,
        "stop_time": stop_time,
    }

    files = {}

    for file_tag in manifest.findall(".//dataObjectSection/dataObject"):
        location_tag = file_tag.find(".//fileLocation")
        if location_tag is not None:
            file_href = location_tag.attrib["href"]
            try:
                description = parse_annotation_filename(os.path.basename(file_href))
            except ValueError:
                continue
            print(file_href)

            file_type = file_tag.attrib["repID"]
            files[file_href] = (file_type,) + description

    return attributes, files


In [24]:
common_attrs, product_files = parse_manifest_sentinel1(manifest_path)

./annotation/s1b-iw-grd-vh-20170425t214234-20170425t214302-005323-00953e-002.xml
./annotation/calibration/noise-s1b-iw-grd-vh-20170425t214234-20170425t214302-005323-00953e-002.xml
./annotation/calibration/calibration-s1b-iw-grd-vh-20170425t214234-20170425t214302-005323-00953e-002.xml
./annotation/s1b-iw-grd-vv-20170425t214234-20170425t214302-005323-00953e-001.xml
./annotation/calibration/noise-s1b-iw-grd-vv-20170425t214234-20170425t214302-005323-00953e-001.xml
./annotation/calibration/calibration-s1b-iw-grd-vv-20170425t214234-20170425t214302-005323-00953e-001.xml
./measurement/s1b-iw-grd-vh-20170425t214234-20170425t214302-005323-00953e-002.tiff
./measurement/s1b-iw-grd-vv-20170425t214234-20170425t214302-005323-00953e-001.tiff


In [25]:
common_attrs

{'family_name': 'SENTINEL-1',
 'number': 'B',
 'mode': 'IW',
 'swaths': ['IW'],
 'orbit_number': 5323,
 'relative_orbit_number': 47,
 'pass': 'ASCENDING',
 'ascending_node_time': '2017-04-25T20:02:53.956661',
 'transmitter_receiver_polarisations': ['VV', 'VH'],
 'product_type': 'GRD',
 'start_time': '2017-04-25T21:42:34.373420',
 'stop_time': '2017-04-25T21:43:02.748930'}

In [26]:
 product_files

{'./annotation/s1b-iw-grd-vh-20170425t214234-20170425t214302-005323-00953e-002.xml': ('s1Level1ProductSchema',
  '',
  'iw',
  'vh',
  '20170425t214234'),
 './annotation/calibration/noise-s1b-iw-grd-vh-20170425t214234-20170425t214302-005323-00953e-002.xml': ('s1Level1NoiseSchema',
  'noise-',
  'iw',
  'vh',
  '20170425t214234'),
 './annotation/calibration/calibration-s1b-iw-grd-vh-20170425t214234-20170425t214302-005323-00953e-002.xml': ('s1Level1CalibrationSchema',
  'calibration-',
  'iw',
  'vh',
  '20170425t214234'),
 './annotation/s1b-iw-grd-vv-20170425t214234-20170425t214302-005323-00953e-001.xml': ('s1Level1ProductSchema',
  '',
  'iw',
  'vv',
  '20170425t214234'),
 './annotation/calibration/noise-s1b-iw-grd-vv-20170425t214234-20170425t214302-005323-00953e-001.xml': ('s1Level1NoiseSchema',
  'noise-',
  'iw',
  'vv',
  '20170425t214234'),
 './annotation/calibration/calibration-s1b-iw-grd-vv-20170425t214234-20170425t214302-005323-00953e-001.xml': ('s1Level1CalibrationSchema',
  

Extract footprints : see `open_gcp_dataset`

In [27]:
raster_path

'/home/rustt/Documents/Projects/S1_S2_classification/sentinel_classification/data/raw/S1/zip/S1B_IW_GRDH_1SDV_20170425T214234_20170425T214302_005323_00953E_F02D.SAFE'

In [35]:
for root, dirs, files in os.walk(raster_path):
    for file in files:
        if file.endswith(".tiff"):   
            print(file)

s1b-iw-grd-vh-20170425t214234-20170425t214302-005323-00953e-002.tiff
s1b-iw-grd-vv-20170425t214234-20170425t214302-005323-00953e-001.tiff


In [40]:
from pathlib import Path
for file in Path(raster_path).rglob("*.tiff"): 
    print(file)

/home/rustt/Documents/Projects/S1_S2_classification/sentinel_classification/data/raw/S1/zip/S1B_IW_GRDH_1SDV_20170425T214234_20170425T214302_005323_00953E_F02D.SAFE/measurement/s1b-iw-grd-vh-20170425t214234-20170425t214302-005323-00953e-002.tiff
/home/rustt/Documents/Projects/S1_S2_classification/sentinel_classification/data/raw/S1/zip/S1B_IW_GRDH_1SDV_20170425T214234_20170425T214302_005323_00953E_F02D.SAFE/measurement/s1b-iw-grd-vv-20170425t214234-20170425t214302-005323-00953e-001.tiff


In [41]:
raster_path

'/home/rustt/Documents/Projects/S1_S2_classification/sentinel_classification/data/raw/S1/zip/S1B_IW_GRDH_1SDV_20170425T214234_20170425T214302_005323_00953E_F02D.SAFE'