In [12]:
# Utilities
import matplotlib.pyplot as plt
import pandas as pd
import getpass
import requests

from sentinelhub import (
    SHConfig,
    DataCollection,
    SentinelHubCatalog,
    SentinelHubRequest,
    SentinelHubStatistical,
    BBox,
    bbox_to_dimensions,
    CRS,
    MimeType,
    Geometry,
)

from utils import plot_image

In [16]:
config = SHConfig()
config

SHConfig(
  instance_id='',
  sh_client_id='***********************************a932',
  sh_client_secret='****************************nN8E',
  sh_base_url='https://sh.dataspace.copernicus.eu',
  sh_auth_base_url=None,
  sh_token_url='https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token',
  geopedia_wms_url='https://service.geopedia.world',
  geopedia_rest_url='https://www.geopedia.world/rest',
  aws_access_key_id='',
  aws_secret_access_key='',
  aws_session_token='',
  aws_metadata_url='https://roda.sentinel-hub.com',
  aws_s3_l1c_bucket='sentinel-s2-l1c',
  aws_s3_l2a_bucket='sentinel-s2-l2a',
  opensearch_url='http://opensearch.sentinel-hub.com/resto/api/collections/Sentinel2',
  max_wfs_records_per_query=100,
  max_opensearch_records_per_query=500,
  max_download_attempts=4,
  download_sleep_time=5.0,
  download_timeout_seconds=120.0,
  number_of_download_processes=1,
  max_retries=None,
)

In [13]:
# --- AOI and time interval setup ---
time_interval = ('2024-06-01', '2024-12-31')
aoi_coords_wgs84 = [-117.272555, 32.392761, -117.083041, 32.678348]

In [14]:
# --- resolution setup ---
resolution = 10
aoi_bbox = BBox(bbox=aoi_coords_wgs84, crs=CRS.WGS84)
aoi_size = bbox_to_dimensions(aoi_bbox, resolution=resolution)
print(f"Image shape at {resolution} m resolution: {aoi_size} pixels")

Image shape at 10 m resolution: (1785, 3163) pixels


In [17]:
# --- Catalog search ---
catalog = SentinelHubCatalog(config=config)

s1_iterator = catalog.search(
    DataCollection.SENTINEL1_IW,
    bbox=aoi_bbox,
    time=time_interval,
    fields={"include": ["id", "properties.datetime"], "exclude": []},
)
s1_results = list(s1_iterator)
print("Sentinel-1 results:", len(s1_results))

s2_iterator = catalog.search(
    DataCollection.SENTINEL2_L1C,
    bbox=aoi_bbox,
    time=time_interval,
    fields={"include": ["id", "properties.datetime"], "exclude": []},
)
s2_results = list(s2_iterator)
print("Sentinel-2 results:", len(s2_results))

# --- Convert results to DataFrames ---
df_s1 = pd.DataFrame([{
    "id": item["id"],
    "datetime": pd.to_datetime(item["properties"]["datetime"]),
    "date": pd.to_datetime(item["properties"]["datetime"]).date()
} for item in s1_results])

df_s2 = pd.DataFrame([{
    "id": item["id"],
    "datetime": pd.to_datetime(item["properties"]["datetime"]),
    "date": pd.to_datetime(item["properties"]["datetime"]).date()
} for item in s2_results])

Sentinel-1 results: 57
Sentinel-2 results: 165


In [18]:
# --- Find overlapping dates ---
common_dates = set(df_s1["date"]).intersection(set(df_s2["date"]))
print("Number of matching dates:", len(common_dates))

# --- Filter and group by date ---
df_s1_common = df_s1[df_s1["date"].isin(common_dates)].copy()
df_s2_common = df_s2[df_s2["date"].isin(common_dates)].copy()

grouped_s1 = df_s1_common.groupby("date")["id"].apply(list).rename("s1_ids")
grouped_s2 = df_s2_common.groupby("date")["id"].apply(list).rename("s2_ids")

matched_df = pd.concat([grouped_s1, grouped_s2], axis=1).reset_index()
print(matched_df.head())

Number of matching dates: 13
         date                                             s1_ids  \
0  2024-06-26  [S1A_IW_GRDH_1SDV_20240626T134456_20240626T134...   
1  2024-07-01  [S1A_IW_GRDH_1SDV_20240701T135318_20240701T135...   
2  2024-07-13  [S1A_IW_GRDH_1SDV_20240713T135318_20240713T135...   
3  2024-08-25  [S1A_IW_GRDH_1SDV_20240825T134455_20240825T134...   
4  2024-08-30  [S1A_IW_GRDH_1SDV_20240830T135318_20240830T135...   

                                              s2_ids  
0  [S2A_MSIL1C_20240626T182921_N0510_R027_T11SMR_...  
1  [S2B_MSIL1C_20240701T182919_N0510_R027_T11SMR_...  
2  [S2A_MSIL1C_20240713T181921_N0510_R127_T11SMR_...  
3  [S2A_MSIL1C_20240825T182921_N0511_R027_T11SMR_...  
4  [S2B_MSIL1C_20240830T182919_N0511_R027_T11SMR_...  


In [20]:
print("matched_df columns:", matched_df.columns.tolist())
print("matched_df shape:", matched_df.shape)
print("\nSample rows:")
print(matched_df.head(3))

# Optional: look at one row's content clearly
print("\nExample row content:")
example_row = matched_df.iloc[0]
print("Date:", example_row["date"])
print("S1 IDs:", example_row["s1_ids"])
print("S2 IDs:", example_row["s2_ids"])

matched_df.to_csv("matched_s1_s2_ids.csv", index=False)

matched_df columns: ['date', 's1_ids', 's2_ids']
matched_df shape: (13, 3)

Sample rows:
         date                                             s1_ids  \
0  2024-06-26  [S1A_IW_GRDH_1SDV_20240626T134456_20240626T134...   
1  2024-07-01  [S1A_IW_GRDH_1SDV_20240701T135318_20240701T135...   
2  2024-07-13  [S1A_IW_GRDH_1SDV_20240713T135318_20240713T135...   

                                              s2_ids  
0  [S2A_MSIL1C_20240626T182921_N0510_R027_T11SMR_...  
1  [S2B_MSIL1C_20240701T182919_N0510_R027_T11SMR_...  
2  [S2A_MSIL1C_20240713T181921_N0510_R127_T11SMR_...  

Example row content:
Date: 2024-06-26
S1 IDs: ['S1A_IW_GRDH_1SDV_20240626T134456_20240626T134521_054495_06A1B9_E2D5_COG.SAFE']
S2 IDs: ['S2A_MSIL1C_20240626T182921_N0510_R027_T11SMR_20240626T231652.SAFE', 'S2A_MSIL1C_20240626T182921_N0510_R027_T11SMS_20240626T231652.SAFE']


In [10]:
json = requests.get("https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=not (Collection/Name eq 'SENTINEL-2') and ContentDate/Start gt 2022-05-03T00:00:00.000Z and ContentDate/Start lt 2022-05-03T00:10:00.000Z&$orderby=ContentDate/Start&$top=100").json()
df = pd.DataFrame.from_dict(json['value'])

# Print only specific columns
columns_to_print = ['Id', 'Name','S3Path','GeoFootprint']
df[columns_to_print].head(3)

Unnamed: 0,Id,Name,S3Path,GeoFootprint
0,2a92387c-d802-4ac7-9b24-187e0e6d8ab4,c_gls_LIE250_202205030000_Baltic_MODIS_V1.2.2_nc,/eodata/CLMS/bio-geophysical/river_and_lake_ic...,"{'type': 'Polygon', 'coordinates': [[[4.99625,..."
1,1d42f2d3-2456-485f-a93e-92f08bdd5c51,S1A_OPER_AUX_GNSSRD_POD__20220510T020122_V2022...,/eodata/Sentinel-1/AUX/AUX_GNSSRD/2022/05/03/S...,
2,5c744d5c-c082-4a34-a181-81cde73cd25d,S1B_OPER_AUX_GNSSRD_POD__20220510T023113_V2022...,/eodata/Sentinel-1/AUX/AUX_GNSSRD/2022/05/03/S...,


In [None]:
import pandas as pd
import getpass
import requests
import os
import ast
from time import sleep
from datetime import datetime, timedelta

from sentinelhub import (
    SHConfig, DataCollection, SentinelHubCatalog,
    BBox, bbox_to_dimensions, CRS
)

# --- Config ---
config = SHConfig()
time_interval = ('2024-06-01', '2025-06-30')
aoi_coords_wgs84 = [-117.272555, 32.392761, -117.083041, 32.678348]
resolution = 10
utc_offset_hours = -7

# --- AOI Setup ---
aoi_bbox = BBox(bbox=aoi_coords_wgs84, crs=CRS.WGS84)
aoi_size = bbox_to_dimensions(aoi_bbox, resolution=resolution)
print(f"Image shape at {resolution} m resolution: {aoi_size} pixels")

# --- Catalog Search ---
catalog = SentinelHubCatalog(config=config)

s1_results = list(catalog.search(
    DataCollection.SENTINEL1_IW,
    bbox=aoi_bbox,
    time=time_interval,
    fields={"include": ["id", "properties.datetime"], "exclude": []}
))
print("Sentinel-1 results:", len(s1_results))

s2_results = list(catalog.search(
    DataCollection.SENTINEL2_L1C,
    bbox=aoi_bbox,
    time=time_interval,
    fields={"include": ["id", "properties.datetime"], "exclude": []}
))
print("Sentinel-2 L1C results:", len(s2_results))

# --- Authentication ---
auth_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
username = input("CDSE username: ")
password = getpass.getpass("CDSE password: ")
auth_data = {
    'grant_type': 'password',
    'client_id': 'cdse-public',
    'username': username,
    'password': password
}

def refresh_token():
    global access_token, headers, token_acquired
    auth_response = requests.post(auth_url, data=auth_data)
    auth_response.raise_for_status()
    access_token = auth_response.json()["access_token"]
    headers = {"Authorization": f"Bearer {access_token}"}
    token_acquired = datetime.now()
    print("🔁 Refreshed access token.")

refresh_token()
token_valid_for_minutes = 55

# --- Helper functions ---
def get_product_name_by_id(sh_id):
    if (datetime.now() - token_acquired) > timedelta(minutes=token_valid_for_minutes):
        refresh_token()
    url = f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Name eq '{sh_id}'&$format=json"
    r = requests.get(url, headers=headers)
    if r.status_code == 403:
        print(f"🚫 Forbidden for SH ID: {sh_id}")
        return None
    r.raise_for_status()
    items = r.json().get("value", [])
    return items[0]["Name"] if items else None

# --- Filter non-COG products for Sentinel-2, allow COG for Sentinel-1 ---
filtered_s1, filtered_s2 = [], []

for item in s1_results:
    name = get_product_name_by_id(item["id"])
    if name and name.endswith("_COG.SAFE"):
        filtered_s1.append({
            "id": name,
            "datetime": pd.to_datetime(item["properties"]["datetime"]),
            "date": pd.to_datetime(item["properties"]["datetime"]).date()
        })

for item in s2_results:
    name = get_product_name_by_id(item["id"])
    if name and not name.endswith("_COG.SAFE"):
        filtered_s2.append({
            "id": name,
            "datetime": pd.to_datetime(item["properties"]["datetime"]),
            "date": pd.to_datetime(item["properties"]["datetime"]).date()
        })

# --- Create DataFrames ---
df_s1 = pd.DataFrame(filtered_s1)
df_s2 = pd.DataFrame(filtered_s2)

common_dates = set(df_s1["date"]).intersection(df_s2["date"])
print("Number of matching dates:", len(common_dates))

df_s1_common = df_s1[df_s1["date"].isin(common_dates)]
df_s2_common = df_s2[df_s2["date"].isin(common_dates)]

grouped_s1 = df_s1_common.groupby("date")["id"].apply(list).rename("s1_ids")
grouped_s2 = df_s2_common.groupby("date")["id"].apply(list).rename("s2_ids")

matched_df = pd.concat([grouped_s1, grouped_s2], axis=1).reset_index()

matched_df.to_csv("matched_s1_s2_L2A.csv", index=False)
print("✅ Matched product list saved to matched_s1_s2_L2A.csv")

# --- Download functions ---
def get_product_id(product_name):
    def make_request():
        return requests.get(
            f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Name eq '{product_name}'&$format=json",
            headers=headers
        )

    if (datetime.now() - token_acquired) > timedelta(minutes=token_valid_for_minutes):
        refresh_token()

    r = make_request()
    if r.status_code == 403:
        print(f"⚠️ 403 for {product_name}, refreshing token and retrying...")
        refresh_token()
        r = make_request()
        if r.status_code == 403:
            print(f"🚫 Still forbidden after refresh: {product_name}")
            return None

    r.raise_for_status()
    items = r.json().get("value", [])
    return items[0]["Id"] if items else None

def download_product(product_name, folder):
    product_id = get_product_id(product_name)
    if not product_id:
        return
    if (datetime.now() - token_acquired) > timedelta(minutes=token_valid_for_minutes):
        refresh_token()
    url = f"https://download.dataspace.copernicus.eu/odata/v1/Products({product_id})/$value"
    os.makedirs(folder, exist_ok=True)
    out_path = os.path.join(folder, f"{product_name}.zip")
    if os.path.exists(out_path):
        print(f"⏩ Already downloaded: {product_name}")
        return
    print(f"⬇️ Downloading: {product_name}")
    with requests.get(url, headers=headers, stream=True) as r:
        r.raise_for_status()
        with open(out_path, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    print(f"✅ Saved: {out_path}")

# --- Download loop ---
for _, row in matched_df.iterrows():
    print(f"\n📅 Processing date: {row['date']}")
    for s1_id in row["s1_ids"]:
        download_product(s1_id, folder="sentinel1")
        sleep(1)
    for s2_id in row["s2_ids"]:
        download_product(s2_id, folder="sentinel2")
        sleep(1)


Image shape at 10 m resolution: (1785, 3163) pixels
Sentinel-1 results: 127
Sentinel-2 L1C results: 315
🔁 Refreshed access token.


In [27]:
import pandas as pd
import getpass
import requests
import os
import ast
from time import sleep
from datetime import datetime, timedelta

from sentinelhub import (
    SHConfig, DataCollection, SentinelHubCatalog,
    BBox, bbox_to_dimensions, CRS
)

# --- Config ---
config = SHConfig()
time_interval = ('2024-06-01', '2024-12-31')
aoi_coords_wgs84 = [-117.272555, 32.392761, -117.083041, 32.678348]
resolution = 10
utc_offset_hours = -7

# --- AOI Setup ---
aoi_bbox = BBox(bbox=aoi_coords_wgs84, crs=CRS.WGS84)
aoi_size = bbox_to_dimensions(aoi_bbox, resolution=resolution)
print(f"Image shape at {resolution} m resolution: {aoi_size} pixels")

# --- Catalog Search ---
catalog = SentinelHubCatalog(config=config)

s1_results = list(catalog.search(
    DataCollection.SENTINEL1_IW,
    bbox=aoi_bbox,
    time=time_interval,
    fields={"include": ["id", "properties.datetime"], "exclude": []}
))
print("Sentinel-1 results:", len(s1_results))

s2_results = list(catalog.search(
    DataCollection.SENTINEL2_L2A,
    bbox=aoi_bbox,
    time=time_interval,
    fields={"include": ["id", "properties.datetime"], "exclude": []}
))
print("Sentinel-2 L2A results:", len(s2_results))

# --- Authentication ---
auth_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
username = input("CDSE username: ")
password = getpass.getpass("CDSE password: ")
auth_data = {
    'grant_type': 'password',
    'client_id': 'cdse-public',
    'username': username,
    'password': password
}

def refresh_token():
    global access_token, headers, token_acquired
    auth_response = requests.post(auth_url, data=auth_data)
    auth_response.raise_for_status()
    access_token = auth_response.json()["access_token"]
    headers = {"Authorization": f"Bearer {access_token}"}
    token_acquired = datetime.now()
    print("🔁 Refreshed access token.")

refresh_token()
token_valid_for_minutes = 55

# --- Filter non-COG products for Sentinel-2, allow COG for Sentinel-1 ---
filtered_s1, filtered_s2 = [], []

for item in s1_results:
    name = item["id"]
    if name and name.endswith("_COG.SAFE"):
        filtered_s1.append({
            "id": name,
            "datetime": pd.to_datetime(item["properties"]["datetime"]),
            "date": pd.to_datetime(item["properties"]["datetime"]).date()
        })

for item in s2_results:
    name = item["id"]
    if name and not name.endswith("_COG.SAFE"):
        filtered_s2.append({
            "id": name,
            "datetime": pd.to_datetime(item["properties"]["datetime"]),
            "date": pd.to_datetime(item["properties"]["datetime"]).date()
        })

# --- Create DataFrames ---
df_s1 = pd.DataFrame(filtered_s1)
df_s2 = pd.DataFrame(filtered_s2)

common_dates = set(df_s1["date"]).intersection(df_s2["date"])
print("Number of matching dates:", len(common_dates))

df_s1_common = df_s1[df_s1["date"].isin(common_dates)]
df_s2_common = df_s2[df_s2["date"].isin(common_dates)]

grouped_s1 = df_s1_common.groupby("date")["id"].apply(list).rename("s1_ids")
grouped_s2 = df_s2_common.groupby("date")["id"].apply(list).rename("s2_ids")

matched_df = pd.concat([grouped_s1, grouped_s2], axis=1).reset_index()

matched_df.to_csv("matched_s1_s2_L2A.csv", index=False)
print("✅ Matched product list saved to matched_s1_s2_L2A.csv")

# --- Download functions ---
def get_product_id(product_name):
    def make_request():
        return requests.get(
            f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Name eq '{product_name}'&$format=json",
            headers=headers
        )

    if (datetime.now() - token_acquired) > timedelta(minutes=token_valid_for_minutes):
        refresh_token()

    r = make_request()
    if r.status_code == 403:
        print(f"⚠️ 403 for {product_name}, refreshing token and retrying...")
        refresh_token()
        r = make_request()
        if r.status_code == 403:
            print(f"🚫 Still forbidden after refresh: {product_name}")
            return None

    r.raise_for_status()
    items = r.json().get("value", [])
    return items[0]["Id"] if items else None

def download_product(product_name, folder):
    product_id = get_product_id(product_name)
    if not product_id:
        return
    if (datetime.now() - token_acquired) > timedelta(minutes=token_valid_for_minutes):
        refresh_token()
    url = f"https://download.dataspace.copernicus.eu/odata/v1/Products({product_id})/$value"
    os.makedirs(folder, exist_ok=True)
    out_path = os.path.join(folder, f"{product_name}.zip")
    if os.path.exists(out_path):
        print(f"⏩ Already downloaded: {product_name}")
        return
    print(f"⬇️ Downloading: {product_name}")
    with requests.get(url, headers=headers, stream=True) as r:
        r.raise_for_status()
        with open(out_path, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    print(f"✅ Saved: {out_path}")

# --- Download loop ---
for _, row in matched_df.iterrows():
    print(f"\n📅 Processing date: {row['date']}")
    for s1_id in row["s1_ids"]:
        download_product(s1_id, folder="sentinel1")
        sleep(1)
    for s2_id in row["s2_ids"]:
        download_product(s2_id, folder="sentinel2")
        sleep(1)



Image shape at 10 m resolution: (1785, 3163) pixels
Sentinel-1 results: 57
Sentinel-2 L2A results: 165
🔁 Refreshed access token.
Number of matching dates: 13
✅ Matched product list saved to matched_s1_s2_L2A.csv

📅 Processing date: 2024-06-26
⏩ Already downloaded: S1A_IW_GRDH_1SDV_20240626T134456_20240626T134521_054495_06A1B9_E2D5_COG.SAFE
⏩ Already downloaded: S2A_MSIL2A_20240626T182921_N0510_R027_T11SMR_20240627T003249.SAFE
⏩ Already downloaded: S2A_MSIL2A_20240626T182921_N0510_R027_T11SMS_20240627T003249.SAFE

📅 Processing date: 2024-07-01
⏩ Already downloaded: S1A_IW_GRDH_1SDV_20240701T135318_20240701T135343_054568_06A44B_DBD2_COG.SAFE
⏩ Already downloaded: S1A_IW_GRDH_1SDV_20240701T015016_20240701T015045_054561_06A407_2F9B_COG.SAFE
⏩ Already downloaded: S2B_MSIL2A_20240701T182919_N0510_R027_T11SMR_20240701T223927.SAFE
⏩ Already downloaded: S2B_MSIL2A_20240701T182919_N0510_R027_T11SMS_20240701T223927.SAFE

📅 Processing date: 2024-07-13
⏩ Already downloaded: S1A_IW_GRDH_1SDV_2024071

KeyboardInterrupt: 