# Iceland Snow and Ice Monitoring

This notebook implements a workflow for monitoring snow and ice in Iceland using Sentinel-2 data via the EOPF Zarr format.

In [None]:
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib.patches import Patch
from shapely.geometry import box
import shapely.geometry
import pystac_client
from pystac import Item
import xarray as xr
import os
import requests
from datetime import datetime
from pyproj import Transformer
import dask
import warnings

# Suppress pystac deprecation warnings (we handle deprecated items manually)
warnings.filterwarnings("ignore", category=DeprecationWarning, module="pystac")

## Seeds
Load the glacier seeds (points) and define the Area of Interest.

In [None]:
# Load seeds
seeds_gdf = gpd.read_file("data/Iceland_Seeds.geojson")

# Reproject to WGS84 for search
seeds_gdf = seeds_gdf.to_crs("EPSG:4326")

# Get bounding box in WGS84
total_bounds = seeds_gdf.total_bounds
bbox_4326 = list(total_bounds) # [minx, miny, maxx, maxy]

# Define AOI for UTM transformation
spatial_extent = {
    "west": bbox_4326[0],
    "south": bbox_4326[1],
    "east": bbox_4326[2],
    "north": bbox_4326[3],
}

print(f"Bbox (EPSG:4326): {bbox_4326}")

# Convert AOI to UTM 27N (EPSG:32627) - Common for Iceland
# The example used EPSG:32631 for Belgium. For Iceland, we use 32627.
target_crs = "EPSG:32627"
transformer = Transformer.from_crs("EPSG:4326", target_crs, always_xy=True)

west_utm, south_utm = transformer.transform(
    spatial_extent["west"], spatial_extent["south"]
)
east_utm, north_utm = transformer.transform(
    spatial_extent["east"], spatial_extent["north"]
)

# Spatial slice parameters (Note: y is typically north-to-south in these grids, so slice order matters)
# We will verify the order after inspection, but typically it is slice(max_y, min_y) or slice(min_y, max_y) depending on the index.
# The example used slice(north_utm, south_utm) for y, implying descending coordinates.
x_slice = slice(west_utm, east_utm)
y_slice = slice(north_utm, south_utm)

print(f"UTM Bounds ({target_crs}): West={west_utm}, South={south_utm}, East={east_utm}, North={north_utm}")

## STAC Search and Data Loading

This algorithm simulates a file-based workflow by processing full Sentinel-2 scenes (Sentinel-2 L2A) retrieved from the EOPF Zarr store. 
It avoids tile-based optimization and instead loads the full scene extent to compute NDSI and classify snow.

In [None]:
# STAC Search
catalog = pystac_client.Client.open("https://stac.core.eopf.eodc.eu")

# Search for Sentinel-2 L2A items
# Adjust date range as needed.
time_range_str = "2025-06-01/2025-06-30"

print(f"Searching STAC for {time_range_str} over AOI...")
search = catalog.search(
    collections=["sentinel-2-l2a"],
    bbox=bbox_4326,
    datetime=time_range_str,
)

items = list(search.items())
print(f"Found {len(items)} total items.")

# Separate deprecated and valid items
deprecated_items = []
valid_items = []

for item in items:
    if item.properties.get("deprecated", False):
        deprecated_items.append(item)
    else:
        valid_items.append(item)

print(f"  - Valid items: {len(valid_items)}")
print(f"  - Deprecated items: {len(deprecated_items)}")

# Try to find newer versions for deprecated items via 'superseded-by' link
replacement_items = []
for dep_item in deprecated_items:
    # Look for 'superseded-by' link
    superseded_by_link = next(
        (link for link in dep_item.links if link.rel == "superseded-by"), 
        None
    )
    if superseded_by_link:
        try:
            # Fetch the replacement item from the catalog
            replacement = catalog.get_collection(dep_item.collection_id).get_item(
                superseded_by_link.target.split("/")[-1]
            ) if hasattr(superseded_by_link, 'target') else None
            
            # Alternative: directly fetch via href if available
            if replacement is None and superseded_by_link.href:
                resp = requests.get(superseded_by_link.href)
                if resp.status_code == 200:
                    replacement = Item.from_dict(resp.json())
                    
            if replacement and not replacement.properties.get("deprecated", False):
                replacement_items.append(replacement)
                print(f"  Found replacement for {dep_item.id}: {replacement.id}")
            else:
                print(f"  No valid replacement found for {dep_item.id}")
        except Exception as e:
            print(f"  Could not fetch replacement for {dep_item.id}: {e}")
    else:
        print(f"  No 'superseded-by' link for deprecated item: {dep_item.id}")

# Combine valid items with replacements
final_items = valid_items + replacement_items
print(f"\nUsing {len(final_items)} items after resolving deprecations.")

# Filter items that have the 'product' asset to avoid KeyErrors
hrefs = [item.assets["product"].href for item in final_items if "product" in item.assets]

if len(final_items) > 0:
    print(f"First item: {final_items[0].id}")

# Function to extract time from filename (as per example)
def extract_time(ds):
    date_format = "%Y%m%dT%H%M%S"
    try:
        filename = ds.encoding["source"]
        # The example splits by "_" and takes index 2. 
        # Typical EOPF Zarr name: S2A_MSIL2A_20200131T105251_...
        # We need to ensure we parse the correct part.
        basename = os.path.basename(filename)
        parts = basename.split("_")
        # Look for the date string part (starts with 20...)
        date_str = next((p for p in parts if p.startswith("20") and "T" in p), None)
        
        if date_str:
             # Remove fractional seconds if present or other suffixes
             date_str = date_str.split(".")[0] 
             time = datetime.strptime(date_str, date_format)
             # Use expand_dims to ensure time is treated as a dimension, not just a scalar coordinate
             return ds.expand_dims(time=[time])
    except Exception as e:
        pass
    
    # Fallback/Default if extraction fails
    return ds

if not hrefs:
    print("No items found. Skipping data loading.")
else:
    print("Loading data via xarray (Lazy)...")
    
    # We need B03 (Green) and B11 (SWIR) for NDSI (Snow).
    # B03 is usually in r10m, B11 in r20m.
    
    try:
        # Load Green Band (B03) - 10m
        ds_b03 = xr.open_mfdataset(
            hrefs,
            engine="zarr",
            chunks={},
            group="/measurements/reflectance/r10m",
            concat_dim="time",
            combine="nested",
            preprocess=extract_time,
            mask_and_scale=True,
        )[["b03"]].sel(x=x_slice, y=y_slice)

        # Load SWIR Band (B11) - 20m
        ds_b11 = xr.open_mfdataset(
            hrefs,
            engine="zarr",
            chunks={},
            group="/measurements/reflectance/r20m",
            concat_dim="time",
            combine="nested",
            preprocess=extract_time,
            mask_and_scale=True,
        )[["b11"]].sel(x=x_slice, y=y_slice)

        # Load SCL - 20m
        ds_scl = xr.open_mfdataset(
            hrefs,
            engine="zarr",
            chunks={},
            group="/conditions/mask/l2a_classification/r20m",
            concat_dim="time",
            combine="nested",
            preprocess=extract_time,
            mask_and_scale=True,
        )[["scl"]].sel(x=x_slice, y=y_slice)

        # Align grids: Resample 20m bands to 10m (B03 grid)
        print("Resampling to common 10m grid...")
        ds_b11_interp = ds_b11.interp_like(ds_b03, method="nearest")
        ds_scl_interp = ds_scl.interp_like(ds_b03, method="nearest")

        # Merge into single DataCube
        datacube = xr.merge([ds_b03, ds_b11_interp, ds_scl_interp])
        
        # Sort by time
        datacube = datacube.sortby("time")
        
        print("DataCube created:")
        print(datacube)
        
        # Compute NDSI and statistics
        # We process it in memory if small enough, or keep lazy.
        # Given the "Simulate Native" comment in previous code, let's process one time step as demo or all.
        
        # NDSI Calculation
        green = datacube["b03"]
        swir = datacube["b11"]
        scl = datacube["scl"]
        
        # NDSI
        denom = (green + swir)
        ndsi = (green - swir) / denom.where(denom != 0)
        
        # SCL Mask (Keep Snow(11) or clear pixels if needed, or just exclude clouds/shadows)
        # 3: Cloud Shadows, 8: Cloud Medium, 9: Cloud High
        valid_mask = ~scl.isin([3, 8, 9])
        
        # Simple Snow Map
        snow_map = (ndsi > 0.42) & valid_mask
        
        # Trigger computation for the first time step to verify
        if datacube.time.size > 0:
            print("Computing first time step...")
            snow_map_first = snow_map.isel(time=0).compute()
            print(f"Snow pixels in first scene: {snow_map_first.sum().item()}")

    except Exception as e:
        print(f"Error creating datacube: {e}")