# Iceland Snow and Ice Monitoring

This notebook implements a workflow for monitoring snow and ice in Iceland using Sentinel-2 data via the EOPF Zarr format.

In [None]:
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib.patches import Patch
from xcube.core.store import new_data_store
from xcube_eopf.utils import reproject_bbox
from shapely.geometry import box
import shapely.geometry

# Initialize Data Store
store = new_data_store("eopf-zarr")

## Seeds
Load the glacier seeds (points) and define the Area of Interest.

In [None]:
# Load seeds
seeds_gdf = gpd.read_file("data/Iceland_Seeds.geojson")

# Reproject to WGS84 for search
seeds_gdf = seeds_gdf.to_crs("EPSG:4326")

# Create a buffer around seeds (approx 5km for 10x10km tile) or just use bounds
# For the native algorithm, we want scenes covering these seeds.
total_bounds = seeds_gdf.total_bounds # [minx, miny, maxx, maxy]
print(f"Total Bounds (EPSG:4326): {total_bounds}")

## Sentinel Native Algorithm

This algorithm simulates a file-based workflow by processing full Sentinel-2 scenes (Sentinel-2 L2A) retrieved from the EOPF Zarr store. 
It avoids tile-based optimization and instead loads the full scene extent to compute NDSI and classify snow.

In [None]:
def process_scene_native(scene_id, store, threshold=0.42):
    """
    Process a single Sentinel-2 scene using the Native approach.
    Loads the full scene, computes NDSI, and classifies snow.
    """
    print(f"Processing scene: {scene_id}", flush=True)
    
    # Open the dataset using the scene ID directly
    # We do not provide a bbox to open_data to ensure we get the full scene extent available in the store
    # Note: If the store requires a bbox for 'sentinel-2-l2a', we might need to fetch the scene metadata first.
    # Assuming 'scene_id' can be passed as data_id or we filter by it.
    
    # Try opening as a specific product if supported, or filter collection
    try:
        # Attempt to open the specific product. 
        # In some EOPF stores, the data_id is the product ID. 
        ds = store.open_data(data_id=scene_id)
    except Exception as e:
        print(f"Could not open {scene_id} directly: {e}", flush=True)
        return None

    # Mask valid data using SCL (Scene Classification Layer)
    # SCL: 8=Dark, 9=Cloud Shadows, 3=Cloud Shadows (usually 3 is cloud shadow, 8,9 are clouds/cirrus)
    # Sentinel-2 SCL: 3=Cloud Shadows, 8=Cloud Medium Probability, 9=Cloud High Probability
    if "scl" in ds:
        valid_mask = ~ds["scl"].isin([3, 8, 9])
        ds_valid = ds.where(valid_mask)
    else:
        print("SCL layer not found, skipping cloud masking.", flush=True)
        ds_valid = ds

    # NDSI Computation
    # NDSI = (Green - SWIR) / (Green + SWIR)
    # Bands: Green=B03, SWIR=B11
    if "b03" in ds_valid and "b11" in ds_valid:
        green = ds_valid["b03"]
        swir = ds_valid["b11"]
        
        # Load data into memory to simulate "downloading the whole file"
        # This forces retrieval of all chunks for these bands
        green.load()
        swir.load()
        
        ndsi = (green - swir) / (green + swir)
        snow_map = ndsi > threshold
        
        # Calculate Snow Cover Percentage
        # Count valid pixels
        valid_count = valid_mask.sum().compute()
        snow_count = snow_map.where(valid_mask).sum().compute()
        
        if valid_count > 0:
            snow_pct = (snow_count / valid_count) * 100
        else:
            snow_pct = 0
            
        print(f"  Snow Cover: {snow_pct:.2f}%", flush=True)
        return {
            "scene_id": scene_id,
            "snow_pct": snow_pct,
            "snow_area_px": snow_count.item(),
            "snow_map": snow_map # Keep in memory or save if needed
        }
    else:
        print("  Required bands (B03, B11) not found.", flush=True)
        return None

def sentinel_native_algorithm(seeds_gdf, time_range, store):
    """
    Main function for the Native Algorithm.
    1. Search for scenes covering the seeds.
    2. Process each scene (load full, compute NDSI).
    3. If snow > 30%, (conceptually) expand search.
    """
    bbox = list(seeds_gdf.total_bounds)
    
    print(f"Searching for scenes in {time_range} over {bbox}...", flush=True)
    
    scene_ids = []
    try:
        print("  Querying store for scenes...", flush=True)
        # search_data returns an iterator of DataDescriptors
        # We use the collection data_id "sentinel-2-l2a"
        search_result = store.search_data(data_id="sentinel-2-l2a", bbox=bbox, time_range=time_range)
        
        count = 0
        for descriptor in search_result:
            if hasattr(descriptor, "data_id"):
                 scene_ids.append(descriptor.data_id)
                 count += 1
        
        print(f"  Found {count} scenes via store search.", flush=True)
        
    except Exception as e:
        print(f"  Store search failed: {e}", flush=True)
    
    if not scene_ids:
        print("  WARNING: No scenes found. Please ensure the store supports search or provide manual IDs.", flush=True)
    
    results = []
    for scene_id in scene_ids:
        res = process_scene_native(scene_id, store)
        if res:
            results.append(res)
            
            # Check 30% condition
            if res['snow_pct'] > 30:
                print(f"  Snow cover > 30%. Marking for expanded search (logic not implemented).", flush=True)
                
    return results

# Example Usage
# time_range = ["2025-06-01", "2025-06-30"]
# results = sentinel_native_algorithm(seeds_gdf, time_range, store)

## Comparison
Compare the native results with the tile-aware Zarr approach.