In [None]:
!pip install sentinelhub
!pip install requests_oauthlib
!pip install dotenv
!pip install rasterio
!pip install netCDF4

Collecting sentinelhub
  Downloading sentinelhub-3.11.1-py3-none-any.whl.metadata (10 kB)
Collecting aenum>=2.1.4 (from sentinelhub)
  Downloading aenum-3.1.15-py3-none-any.whl.metadata (3.7 kB)
Collecting dataclasses-json (from sentinelhub)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting tomli (from sentinelhub)
  Downloading tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting tomli-w (from sentinelhub)
  Downloading tomli_w-1.2.0-py3-none-any.whl.metadata (5.7 kB)
Collecting utm (from sentinelhub)
  Downloading utm-0.8.1-py3-none-any.whl.metadata (5.2 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->sentinelhub)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json->sentinelhub)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dat

In [None]:
import os
from sentinelhub import (SHConfig,
    DataCollection,
    SentinelHubCatalog,
    SentinelHubRequest,
    SentinelHubStatistical,
    BBox,
    bbox_to_dimensions,
    CRS,
    MimeType,
    Geometry,
)
from dotenv import load_dotenv
import requests_oauthlib as requests
import matplotlib.pyplot as plt
import numpy as np
import datetime
import pandas as pd
from PIL import Image

In [None]:
os.environ['CLIENT_ID'] = 'cdse'
os.environ['CLIENT_SECRET'] = 'cdse'
os.environ['TOKEN_URL'] = 'https://identity.sentinel-hub.com/token'
os.environ['BASE_URL'] = 'https://sh.sentinel-hub.com'

In [None]:
config = SHConfig("cdse")
config.load("cdse")

SHConfig(
  instance_id='',
  sh_client_id='***********************************d738',
  sh_client_secret='****************************kzbY',
  sh_base_url='https://sh.dataspace.copernicus.eu',
  sh_auth_base_url=None,
  sh_token_url='https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token',
  geopedia_wms_url='https://service.geopedia.world',
  geopedia_rest_url='https://www.geopedia.world/rest',
  aws_access_key_id='',
  aws_secret_access_key='',
  aws_session_token='',
  aws_metadata_url='https://roda.sentinel-hub.com',
  aws_s3_l1c_bucket='sentinel-s2-l1c',
  aws_s3_l2a_bucket='sentinel-s2-l2a',
  opensearch_url='http://opensearch.sentinel-hub.com/resto/api/collections/Sentinel2',
  max_wfs_records_per_query=100,
  max_opensearch_records_per_query=500,
  max_download_attempts=4,
  download_sleep_time=5.0,
  download_timeout_seconds=120.0,
  number_of_download_processes=1,
  max_retries=None,
)

In [None]:
# check that the credentials are set correctly
if not config.sh_client_id or not config.sh_client_secret:
    print("Please provide your Sentinel Hub credentials in the .env file.")
    exit(1)

# check that the credentials are what you expect (i.e. output is the same & not None)
# NOTE: you can also set the credentials directly in the code,
# but this is not recommended for security reasons.
# do not print the secret credentials 'id' or 'secret' in a public notebook for security reasons either
print(config.sh_token_url)
print(config.sh_base_url)

https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token
https://sh.dataspace.copernicus.eu


In [None]:
# Define bounding boxes as [min_lon, min_lat, max_lon, max_lat]

AOI_BBOXES = {
    'Po_River_Plume':      [12.5, 44.8, 13.2, 45.5],   # Near the Po River delta (Northern Adriatic)
    'Northern_Corsica':    [8.5, 42.9, 9.2, 43.6],     # North of Corsica island
    'South_East_Calabria': [16.5, 38.35, 16.755, 38.555], # North of Calabria
    'Gulf_of_Genova':      [8.5, 43.7, 9.2, 44.4],     # Ligurian Sea, near major coastal cities
}

In [None]:
# Po River
resolution = 10
AOI1 = 'Po_River_Plume'
aoi1_bbox = BBox(bbox=AOI_BBOXES['Po_River_Plume'], crs=CRS.WGS84)
aoi1_size = bbox_to_dimensions(aoi1_bbox, resolution=resolution)
print(f"Image shape for {AOI_BBOXES['Po_River_Plume'],} at {resolution} m resolution: {aoi1_size} pixels")


Image shape for ([12.5, 44.8, 13.2, 45.5],) at 10 m resolution: (5710, 7630) pixels


In [None]:
# Corsica
resolution = 10
AOI2 = 'Northern_Corsica'
aoi2_bbox = BBox(bbox=AOI_BBOXES[AOI2], crs=CRS.WGS84)
aoi2_size = bbox_to_dimensions(aoi2_bbox, resolution=resolution)
print(f"Image shape for {AOI2} at {resolution} m resolution: {aoi2_size} pixels")

Image shape for Northern_Corsica at 10 m resolution: (5696, 7764) pixels


In [None]:
# Calabria
resolution = 10
AOI3 = 'South_East_Calabria'
aoi3_bbox = BBox(bbox=AOI_BBOXES[AOI3], crs=CRS.WGS84)
aoi3_size = bbox_to_dimensions(aoi3_bbox, resolution=resolution)
print(f"Image shape for {AOI3} at {resolution}m resolution: {aoi3_size} pixels")

Image shape for South_East_Calabria at 10m resolution: (2185, 2314) pixels


In [None]:
AOI4 = 'Gulf_of_Genova'
aoi4_bbox = BBox(bbox=AOI_BBOXES[AOI4], crs=CRS.WGS84)
aoi4_size = bbox_to_dimensions(aoi4_bbox, resolution=resolution)
print(f"Image shape for {AOI4} at {resolution}m resolution: {aoi4_size} pixels")

Image shape for Gulf_of_Genova at 10m resolution: (5622, 7765) pixels


In [None]:
catalog = SentinelHubCatalog(config=config)

In [None]:
time_interval =  "2015-06-01", "2021-09-18"  # adjust dates as needed with max period:


In [None]:
# Retrieve images from the Sentinel Hub Catalog for the specified AOI
# and time interval in representative batches

search_iterator = catalog.search(
    DataCollection.SENTINEL2_L1C,
    bbox=aoi1_bbox,
    time=time_interval,
    fields={"include": ["id", "properties.datetime"], "exclude": []},
)

results_prp = list(search_iterator)
print("Total number of results:", len(results_prp))


Total number of results: 3077


In [None]:

search_iterator = catalog.search(
    DataCollection.SENTINEL2_L1C,
    bbox=aoi2_bbox,
    time=time_interval,
    fields={"include": ["id", "properties.datetime"], "exclude": []},
)

results_nec = list(search_iterator)
print("Total number of results:", len(results_nec))

Total number of results: 2668


In [None]:

search_iterator = catalog.search(
    DataCollection.SENTINEL2_L1C,
    bbox=aoi3_bbox,
    time=time_interval,
    fields={"include": ["id", "properties.datetime"], "exclude": []},
)

results_sec = list(search_iterator)
print("Total number of results:", len(results_sec))

Total number of results: 382


In [None]:
search_iterator = catalog.search(
    DataCollection.SENTINEL2_L1C,
    bbox=aoi4_bbox,
    time=time_interval,
    fields={"include": ["id", "properties.datetime"], "exclude": []},
)

results_gen = list(search_iterator)
print("Total number of results:", len(results_gen))

Total number of results: 1802


In [None]:
from google.colab import drive
drive.mount('/content/drive')

from netCDF4 import Dataset

file_path = '/content/drive/MyDrive/WASP_LW_SENT2_MED_L1C_B_201506_202109_10m_6y_NRT_v1.0.nc'
ds = Dataset(file_path, mode='r')

print("Available variables:")
print(ds.variables.keys())

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Available variables:
dict_keys(['s2_product', 'dec_time', 'x_centroid', 'y_centroid', 'lat_centroid', 'lon_centroid', 'n_pixels_fil', 'limits', 'pixel_x', 'pixel_y', 'pixel_spec'])


In [None]:
import numpy as np

# Load all centroids
lat = ds.variables['lat_centroid'][:]
lon = ds.variables['lon_centroid'][:]

# Initialize results
filtered_filaments = {
    AOI1: [],
    AOI2: [],
    AOI3: [],
    AOI4: [],
}

# Check which centroids fall in each AOI
for i in range(len(lat)):
    lat_i, lon_i = lat[i], lon[i]

    for aoi, (min_lon, min_lat, max_lon, max_lat) in AOI_BBOXES.items():
        if (min_lon <= lon_i <= max_lon) and (min_lat <= lat_i <= max_lat):
            filtered_filaments[aoi].append(i)

# Print summary
for aoi in filtered_filaments:
    print(f"{aoi}: {len(filtered_filaments[aoi])} filaments found")

Po_River_Plume: 1512 filaments found
Northern_Corsica: 39 filaments found
South_East_Calabria: 27 filaments found
Gulf_of_Genova: 31 filaments found


In [None]:
# Load all relevant variables
pixel_x_all = ds.variables['pixel_x']
pixel_y_all = ds.variables['pixel_y']
limits_all = ds.variables['limits']
n_pixels_all = ds.variables['n_pixels_fil']


filament_indices = filtered_filaments[AOI1]

# Store data for each filament
aoi_filaments_data = []

for idx in filament_indices:
    n_pixels = n_pixels_all[idx]

    # Get valid pixel coordinates
    px = pixel_x_all[idx][:n_pixels]
    py = pixel_y_all[idx][:n_pixels]

    # Get bounding box: [x_min, y_min, x_max, y_max]
    bbox = limits_all[idx]

    aoi_filaments_data.append({
        'index': idx,
        'n_pixels': n_pixels,
        'pixels_x': px,
        'pixels_y': py,
        'bbox': bbox
    })

# Print summary of first 3 filaments
for i, f in enumerate(aoi_filaments_data[:3]):
    print(f"\nFilament #{f['index']}")
    print(f"Number of pixels: {f['n_pixels']}")
    print(f"Bounding box: {f['bbox']}")
    print(f"First 5 pixel coords: {list(zip(f['pixels_x'][:5], f['pixels_y'][:5]))}")


Filament #3298
Number of pixels: 16
Bounding box: [8534 3246 8578 3322]
First 5 pixel coords: [(np.int16(3262), np.int16(8554)), (np.int16(3265), np.int16(8553)), (np.int16(3266), np.int16(8553)), (np.int16(3275), np.int16(8551)), (np.int16(3276), np.int16(8551))]

Filament #3299
Number of pixels: 101
Bounding box: [9050 2582 9280 2714]
First 5 pixel coords: [(np.int16(2605), np.int16(9067)), (np.int16(2605), np.int16(9069)), (np.int16(2606), np.int16(9069)), (np.int16(2606), np.int16(9070)), (np.int16(2607), np.int16(9086))]

Filament #3300
Number of pixels: 11
Bounding box: [9305 2681 9346 2710]
First 5 pixel coords: [(np.int16(2692), np.int16(9315)), (np.int16(2692), np.int16(9320)), (np.int16(2692), np.int16(9321)), (np.int16(2693), np.int16(9321)), (np.int16(2693), np.int16(9322))]


In [None]:
# --- Config ---
netcdf_path = "/content/drive/MyDrive/WASP_LW_SENT2_MED_L1C_B_201506_202109_10m_6y_NRT_v1.0.nc"
output_dir = "/content/drive/MyDrive/data/window_litter/patches/"
os.makedirs(output_dir, exist_ok=True)


In [None]:
# NetCDF-Driven Pipeline: Build 256x256 Image-Mask Pairs from Filament Annotations

from netCDF4 import Dataset
import pandas as pd
import numpy as np
from shapely.geometry import Point, box
import re
from sentinelhub import SHConfig, BBox, CRS, SentinelHubRequest, MimeType, DataCollection, bbox_to_dimensions
import rasterio
from rasterio.transform import from_origin
import os

from shapely.geometry import Point, box

raw_products = ds.variables["s2_product"][:]
s2_products = ["".join(row.astype(str)).strip().split(".SAFE")[0] for row in raw_products]

def extract_str_time(s):
    match = re.search(r"20\d{6}T\d{6}", s)
    return match.group(0) if match else None

str_times = [extract_str_time(s) for s in s2_products]

filament_df = pd.DataFrame({
    "s2_product": s2_products,
    "Str_time": str_times,
    "lat_centroid": ds.variables["lat_centroid"][:],
    "lon_centroid": ds.variables["lon_centroid"][:],
    "dec_time": ds.variables["dec_time"][:],
    "n_pixels_fil": ds.variables["n_pixels_fil"][:],
    "x_centroid": ds.variables["x_centroid"][:],
    "y_centroid": ds.variables["y_centroid"][:]
})
filament_df.index.name = "filament_id"

# --- Step 2: Filter by AOI ---
def assign_aoi(row):
    point = Point(row["lon_centroid"], row["lat_centroid"])
    for name, bounds in AOI_BBOXES.items():
        if box(*bounds).contains(point):
            return name
    return None

filament_df["AOI"] = filament_df.apply(assign_aoi, axis=1)
filament_df.reset_index(drop=False, inplace=True)
filament_df = filament_df.dropna(subset=["Str_time", "AOI"])
print(f"✅ Filtered to {len(filament_df)} filaments inside AOIs.")

# --- Build pixel lookup by filament using precise per-filament slices ---
pixel_map = {}
pixel_x_all = ds.variables['pixel_x']
pixel_y_all = ds.variables['pixel_y']
limits_all = ds.variables['limits']
n_pixels_all = ds.variables['n_pixels_fil']

for idx in filament_df['filament_id']:
    n_pixels = n_pixels_all[idx]
    px = pixel_x_all[idx][:n_pixels]
    py = pixel_y_all[idx][:n_pixels]
    pixel_map[idx] = {
        "px": px,
        "py": py,
        "bbox": limits_all[idx]
    }

In [None]:
# NetCDF-Driven Pipeline: Build 256x256 Image-Mask Pairs from Filament Annotations

from netCDF4 import Dataset
import pandas as pd
import numpy as np
from shapely.geometry import Point, box
import re
from sentinelhub import SHConfig, BBox, CRS, SentinelHubRequest, MimeType, DataCollection, bbox_to_dimensions
import rasterio
from rasterio.transform import from_origin
import os

def download_patch_and_mask(row, patch_size=256, resolution=10, output_base_dir='patches'):
    lat, lon = row["lat_centroid"], row["lon_centroid"]
    str_time = row["Str_time"]
    filament_id = row["filament_id"]
    x_centroid, y_centroid = int(row["x_centroid"]), int(row["y_centroid"])

    folder_name = f"{filament_id}_{str_time}"
    filament_folder = os.path.join(output_base_dir, folder_name)
    os.makedirs(filament_folder, exist_ok=True)

    # Define bbox around centroid
    half_size = (patch_size * resolution) / 2.0
    meters_per_deg_lat = 111320  # approx
    delta_deg = half_size / meters_per_deg_lat
    bbox = BBox(bbox=[lon - delta_deg, lat - delta_deg, lon + delta_deg, lat + delta_deg], crs=CRS.WGS84)
    size = (patch_size, patch_size)

    evalscript = """
    //VERSION=3
    function setup() {
        return {
            input: [{bands: ["B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B11", "B12"]}],
            output: {bands: 11, sampleType: "UINT16"}
        };
    }
    function evaluatePixel(sample) {
        return [sample.B01, sample.B02, sample.B03, sample.B04, sample.B05, sample.B06, sample.B07,
                sample.B08, sample.B8A, sample.B11, sample.B12];
    }
    """
    start = pd.to_datetime(str_time) - pd.Timedelta(days=1)
    end = pd.to_datetime(str_time) + pd.Timedelta(days=1)

    request = SentinelHubRequest(
        evalscript=evalscript,
        input_data=[
            SentinelHubRequest.input_data(
                data_collection=DataCollection.SENTINEL2_L2A.define_from(
                    name="s2l2a", service_url="https://sh.dataspace.copernicus.eu"
                ),
                time_interval=(start.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d")),
                other_args={"dataFilter": {"mosaickingOrder": "leastCC"},  "processing": {"previewInput": True}},
            )
        ],
        responses=[SentinelHubRequest.output_response("default", MimeType.TIFF)],
        bbox=bbox,
        size=size,
        config=config,
    )

    try:
        img = request.get_data()[0]
        if np.all(img == 0):
            print(f"Empty image (all zeros) for {folder_name}, skipping.")
            return
        print(f"📊 Image stats for {folder_name} → min: {img.min()}, max: {img.max()}")  # shape: (256, 256, 11)
        transform = from_origin(bbox.lower_left[0], bbox.upper_right[1], resolution / 111320, resolution / 111320)
        img_path = os.path.join(filament_folder, f"{folder_name}.tif")
        with rasterio.open(
            img_path,
            "w",
            driver="GTiff",
            height=img.shape[0],
            width=img.shape[1],
            count=img.shape[2],
            dtype=img.dtype,
            crs="EPSG:4326",
            transform=transform,
        ) as dst:
            for band in range(img.shape[2]):
                dst.write(img[:, :, band], band + 1)
        print(f"Image saved for {folder_name}")
    except Exception as e:
        print(f"Failed to fetch image for {folder_name}: {e}")

    # Build binary mask using centroid and offset
    if filament_id not in pixel_map:
        print(f"No pixels found for filament {filament_id}")
        return

    px_fil = pixel_map[filament_id]["px"]
    py_fil = pixel_map[filament_id]["py"]
    mask = np.zeros((patch_size, patch_size), dtype=np.uint8)

    for x, y in zip(px_fil, py_fil):
        rel_x = int(x - x_centroid + patch_size // 2)
        rel_y = int(y - y_centroid + patch_size // 2)
        if 0 <= rel_x < patch_size and 0 <= rel_y < patch_size:
            mask[rel_y, rel_x] = 255

    if mask.sum() == 0:
        print(f"Skipping mask for {folder_name} — no overlapping pixels in patch.")
        return

    mask_path = os.path.join(filament_folder, f"{folder_name}_cl.tif")
    with rasterio.open(
        mask_path,
        "w",
        driver="GTiff",
        height=mask.shape[0],
        width=mask.shape[1],
        count=1,
        dtype=mask.dtype,
        crs="EPSG:4326",
        transform=transform,
    ) as dst:
        dst.write(mask, 1)
    print(f"✅ Mask saved for {folder_name}")

# Download patches for all filtered filaments
for _, row in filament_df.iterrows():
    download_patch_and_mask(row)

# Drop NetCDF handle at the end
ds.close()

# Optional preview:
filament_df.head()

# --- Visualization Utility ---
import matplotlib.pyplot as plt
import glob

def visualize_patch(patch_dir, band_indices=(4, 3, 2)):  # B04 (R), B03 (G), B02 (B)
    image_path = glob.glob(os.path.join(patch_dir, "*.tif"))[0]
    mask_path = glob.glob(os.path.join(patch_dir, "*_cl.tif"))[0]

    with rasterio.open(image_path) as src:
        img = src.read()
        rgb = np.stack([img[band_indices[0]-1], img[band_indices[1]-1], img[band_indices[2]-1]], axis=-1)
        norm = np.percentile(rgb, 98)
        rgb = np.clip(rgb / norm, 0, 1)

    with rasterio.open(mask_path) as msk_src:
        mask = msk_src.read(1)

    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(rgb)
    plt.title("Sentinel-2 Image")
    plt.axis("off")

    plt.subplot(1, 2, 2)
    plt.imshow(rgb)
    plt.imshow(mask, cmap="Reds", alpha=0.4)
    plt.title("Overlay with Mask")
    plt.axis("off")

    plt.tight_layout()
    plt.show()

# Example usage:
# visualize_patch("/content/drive/MyDrive/patches/2798_20170703T083011")


📊 Image stats for 2798_20170723T101031 → min: 0, max: 1
✅ Image saved for 2798_20170723T101031
✅ Mask saved for 2798_20170723T101031
⚠️ Empty image (all zeros) for 2799_20170921T101021, skipping.
⚠️ Empty image (all zeros) for 2802_20180509T101031, skipping.
⚠️ Empty image (all zeros) for 2807_20180323T102021, skipping.
📊 Image stats for 2808_20180323T102021 → min: 0, max: 1
✅ Image saved for 2808_20180323T102021
✅ Mask saved for 2808_20180323T102021
⚠️ Empty image (all zeros) for 2809_20180323T102021, skipping.
⚠️ Empty image (all zeros) for 2829_20200521T102031, skipping.
⚠️ Empty image (all zeros) for 2830_20200521T102031, skipping.
⚠️ Empty image (all zeros) for 2831_20200521T102031, skipping.
⚠️ Empty image (all zeros) for 2856_20201018T102041, skipping.
⚠️ Empty image (all zeros) for 2861_20170517T102031, skipping.
⚠️ Empty image (all zeros) for 2862_20170517T102031, skipping.
⚠️ Empty image (all zeros) for 2863_20170517T102031, skipping.
⚠️ Empty image (all zeros) for 2865_20170

KeyboardInterrupt: 