# Sentinel-1 Data Audit MSPC

Starting from the Parquet listing of all scenes, filter to a BBOX over Australia and then export counts of available scenes per year.

The [Sentinel-1 GRD metadata is here](https://planetarycomputer.microsoft.com/dataset/sentinel-1-grd).

There's a parquet file available at `abfs://items/sentinel-1-grd.parquet`

In [1]:
import dask.dataframe as dd
from planetary_computer import sign_inplace
from pystac_client import Client

from utils import bbox

In [2]:
catalog = Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1/",
    modifier=sign_inplace,
)

asset = catalog.get_collection("sentinel-1-grd").assets["geoparquet-items"]

s1grd = dd.read_parquet(
    asset.href, storage_options=asset.extra_fields["table:storage_options"]
)
s1grd.head()

Unnamed: 0,type,stac_version,stac_extensions,id,geometry,bbox,links,assets,collection,datetime,...,sar:center_frequency,sar:resolution_range,s1:product_timeliness,sar:resolution_azimuth,sar:pixel_spacing_range,sar:observation_direction,sar:pixel_spacing_azimuth,sar:looks_equivalent_number,s1:instrument_configuration_ID,sat:platform_international_designator
0,Feature,1.0.0,[https://stac-extensions.github.io/sar/v1.0.0/...,S1A_EW_GRDH_1SDH_20141010T002821_20141010T0029...,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00U\x00\x0...,"[96.16291342, 78.38726451, 127.61342505, 83.41...",[{'href': 'https://planetarycomputer.microsoft...,{'hh': {'description': 'Amplitude of signal tr...,sentinel-1-grd,2014-10-10 00:28:53.899620+00:00,...,5.405,50,Fast-24h,50,25,right,25,2.7,3,0000-000A
1,Feature,1.0.0,[https://stac-extensions.github.io/sar/v1.0.0/...,S1A_EW_GRDH_1SDH_20141010T002925_20141010T0030...,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00Q\x00\x0...,"[92.01536772, 75.2044615, 114.16649847, 79.909...",[{'href': 'https://planetarycomputer.microsoft...,{'hh': {'description': 'Amplitude of signal tr...,sentinel-1-grd,2014-10-10 00:29:55.942789+00:00,...,5.405,50,Fast-24h,50,25,right,25,2.7,3,0000-000A
2,Feature,1.0.0,[https://stac-extensions.github.io/sar/v1.0.0/...,S1A_EW_GRDH_1SDH_20141010T003025_20141010T0031...,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00@\x00\x0...,"[90.39374409, 73.31512626, 106.296735, 76.4099...",[{'href': 'https://planetarycomputer.microsoft...,{'hh': {'description': 'Amplitude of signal tr...,sentinel-1-grd,2014-10-10 00:30:44.271743+00:00,...,5.405,50,Fast-24h,50,25,right,25,2.7,3,0000-000A
3,Feature,1.0.0,[https://stac-extensions.github.io/sar/v1.0.0/...,S1A_EW_GRDH_1SDH_20141010T034548_20141010T0346...,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00U\x00\x0...,"[47.02955941, 78.51491623, 78.97114922, 83.550...",[{'href': 'https://planetarycomputer.microsoft...,{'hh': {'description': 'Amplitude of signal tr...,sentinel-1-grd,2014-10-10 03:46:20.591628+00:00,...,5.405,50,Fast-24h,50,25,right,25,2.7,3,0000-000A
4,Feature,1.0.0,[https://stac-extensions.github.io/sar/v1.0.0/...,S1A_EW_GRDH_1SDH_20141010T034652_20141010T0347...,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00P\x00\x0...,"[42.78269906, 75.33913766, 65.22075705, 80.055...",[{'href': 'https://planetarycomputer.microsoft...,{'hh': {'description': 'Amplitude of signal tr...,sentinel-1-grd,2014-10-10 03:47:22.631839+00:00,...,5.405,50,Fast-24h,50,25,right,25,2.7,3,0000-000A


In [None]:
# Create a geopandas dataframe with the bounding box
gdf = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326")
gdf.explore()

In [7]:
# Filter with bounding box values
# This is "intersects" logic, so max of the scene box
# can be within the bounding box and vice versa
filtered = s1grd.loc[
    (s1grd.bbox.str[2] > bbox[0])
    & (s1grd.bbox.str[3] > bbox[1])
    & (s1grd.bbox.str[0] < bbox[2])
    & (s1grd.bbox.str[1] < bbox[3])
    & (s1grd["sar:instrument_mode"] == "IW")
    & (s1grd["sat:orbit_state"] == "descending")
]

filtered

Unnamed: 0_level_0,type,stac_version,stac_extensions,id,geometry,bbox,links,assets,collection,datetime,platform,s1:shape,end_datetime,constellation,s1:resolution,s1:datatake_id,start_datetime,s1:orbit_source,s1:slice_number,s1:total_slices,sar:looks_range,sat:orbit_state,sar:product_type,sar:looks_azimuth,sar:polarizations,sar:frequency_band,sat:absolute_orbit,sat:relative_orbit,s1:processing_level,sar:instrument_mode,sar:center_frequency,sar:resolution_range,s1:product_timeliness,sar:resolution_azimuth,sar:pixel_spacing_range,sar:observation_direction,sar:pixel_spacing_azimuth,sar:looks_equivalent_number,s1:instrument_configuration_ID,sat:platform_international_designator
npartitions=114,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1
,string,string,object,string,object,object,object,object,string,"datetime64[us, UTC]",string,object,"datetime64[us, UTC]",string,string,string,"datetime64[us, UTC]",string,string,string,int64,string,string,int64,object,string,int64,int64,string,string,float64,int64,string,int64,int64,string,int64,float64,string,string
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [8]:
# Note that this is not filtered by the actual geometry, just a bbox

count_per_year = filtered.groupby(filtered.datetime.dt.year).size().compute()
count_per_year

datetime
2014     116
2015    1701
2016    3657
2017    8520
2018    8634
2019    9015
2020    8819
2021    8859
2022    5962
2023    3398
2024     113
dtype: int64