# indices

> Extract continuous time series (NDVI, EVI, etc.) from satellite imagery.

In [None]:
#| default_exp indices

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import ee
import pandas as pd
from fastcore.basics import patch
from typing import Literal

from gee_polygons.layers import ContinuousLayer
from gee_polygons.site import Site

## The Continuous Extraction Primitive

While `extract_categorical` handles discrete class data, `extract_continuous` handles floating-point time series like vegetation indices.

```python
site.extract_continuous(
    layer=SENTINEL2_NDVI,
    start_date='2020-01-01',
    end_date='2024-12-31',
    reducer='mean'
)
```

Returns a tidy DataFrame with one row per image (or per year if aggregated).

In [None]:
#| export
@patch
def extract_continuous(
    self: Site,
    layer: ContinuousLayer,
    start_date: str,
    end_date: str,
    reducer: Literal['mean', 'median', 'min', 'max'] = 'mean',
    frequency: Literal['all', 'yearly', 'monthly'] = 'all',
    cloud_pct: int = 20,
    max_pixels: int = int(1e9)
) -> pd.DataFrame:
    """Extract continuous raster statistics over time for a site.
    
    Args:
        layer: ContinuousLayer describing the data source
        start_date: Start date in ISO format (YYYY-MM-DD)
        end_date: End date in ISO format (YYYY-MM-DD)
        reducer: Spatial aggregation - 'mean', 'median', 'min', 'max'
        frequency: Temporal output - 'all' (per image), 'yearly', or 'monthly'
        cloud_pct: Max cloud cover % for filtering (default 20)
        max_pixels: Maximum pixels for reduction
        
    Returns:
        DataFrame with columns: site_id, date (or year/month), value
    """
    reducer_fn = getattr(ee.Reducer, reducer)()
    
    # Build the collection with preprocessing
    collection = _prepare_collection(layer, start_date, end_date, self.geometry, cloud_pct)
    
    if frequency == 'all':
        return _extract_all_images(self, collection, layer, reducer_fn, max_pixels)
    elif frequency == 'yearly':
        return _extract_yearly(self, collection, layer, reducer_fn, start_date, end_date, max_pixels)
    elif frequency == 'monthly':
        return _extract_monthly(self, collection, layer, reducer_fn, start_date, end_date, max_pixels)
    else:
        raise ValueError(f"Unknown frequency: {frequency}")

In [None]:
#| export
def _prepare_collection(layer, start_date, end_date, geometry, cloud_pct):
    """Prepare an ImageCollection with preprocessing based on the layer type."""
    
    collection = (
        ee.ImageCollection(layer.collection_id)
        .filterDate(start_date, end_date)
        .filterBounds(geometry)
    )
    
    # Handle Sentinel-2 specifically
    if 'S2' in layer.collection_id or 'COPERNICUS' in layer.collection_id:
        collection = collection.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', cloud_pct))
        
        if layer.mask_clouds:
            collection = collection.map(_mask_s2_clouds)
        
        # Scale to 0-1 and add vegetation indices
        collection = collection.map(_add_s2_indices)
    
    # Select the requested band
    collection = collection.select(layer.band)
    
    return collection


def _mask_s2_clouds(image):
    """Mask clouds in Sentinel-2 using QA60 band."""
    qa = image.select('QA60')
    cloud_bit_mask = 1 << 10
    cirrus_bit_mask = 1 << 11
    
    mask = (qa.bitwiseAnd(cloud_bit_mask).eq(0)
            .And(qa.bitwiseAnd(cirrus_bit_mask).eq(0)))
    
    return image.updateMask(mask)


def _add_s2_indices(image):
    """Add NDVI and EVI bands to Sentinel-2 image."""
    # Scale reflectance to 0-1
    scaled = image.divide(10000)
    
    ndvi = scaled.normalizedDifference(['B8', 'B4']).rename('NDVI')
    
    evi = scaled.expression(
        '2.5 * ((NIR - RED) / (NIR + 6 * RED - 7.5 * BLUE + 1))',
        {
            'NIR': scaled.select('B8'),
            'RED': scaled.select('B4'),
            'BLUE': scaled.select('B2')
        }
    ).rename('EVI')
    
    return image.addBands([ndvi, evi])

In [None]:
#| export
def _extract_all_images(site, collection, layer, reducer_fn, max_pixels):
    """Extract one value per image in the collection."""
    
    def reduce_image(img):
        stat = img.reduceRegion(
            reducer=reducer_fn,
            geometry=site.geometry,
            scale=layer.scale,
            maxPixels=max_pixels
        )
        return ee.Feature(None, {
            'date': img.date().format('YYYY-MM-dd'),
            'value': stat.get(layer.band)
        })
    
    fc = ee.FeatureCollection(collection.map(reduce_image)).getInfo()
    
    rows = []
    for f in fc['features']:
        props = f['properties']
        if props['value'] is not None:
            rows.append({
                'site_id': site.site_id,
                'date': props['date'],
                'value': props['value']
            })
    
    return pd.DataFrame(rows)

In [None]:
#| export
def _extract_yearly(site, collection, layer, reducer_fn, start_date, end_date, max_pixels):
    """Aggregate by year."""
    start_year = int(start_date[:4])
    end_year = int(end_date[:4])
    
    records = []
    for year in range(start_year, end_year + 1):
        yearly = collection.filterDate(f'{year}-01-01', f'{year}-12-31')
        # Temporal median, then spatial reduce
        composite = yearly.median()
        
        stat = composite.reduceRegion(
            reducer=reducer_fn,
            geometry=site.geometry,
            scale=layer.scale,
            maxPixels=max_pixels
        )
        
        records.append(ee.Feature(None, {
            'year': year,
            'value': stat.get(layer.band)
        }))
    
    fc = ee.FeatureCollection(records).getInfo()
    
    rows = []
    for f in fc['features']:
        props = f['properties']
        if props['value'] is not None:
            rows.append({
                'site_id': site.site_id,
                'year': props['year'],
                'value': props['value']
            })
    
    return pd.DataFrame(rows)

In [None]:
#| export
def _extract_monthly(site, collection, layer, reducer_fn, start_date, end_date, max_pixels):
    """Aggregate by month."""
    import datetime
    
    start = datetime.date.fromisoformat(start_date)
    end = datetime.date.fromisoformat(end_date)
    
    records = []
    current = start.replace(day=1)
    
    while current <= end:
        year, month = current.year, current.month
        # Get next month
        if month == 12:
            next_month = datetime.date(year + 1, 1, 1)
        else:
            next_month = datetime.date(year, month + 1, 1)
        
        monthly = collection.filterDate(
            current.isoformat(),
            next_month.isoformat()
        )
        composite = monthly.median()
        
        stat = composite.reduceRegion(
            reducer=reducer_fn,
            geometry=site.geometry,
            scale=layer.scale,
            maxPixels=max_pixels
        )
        
        records.append(ee.Feature(None, {
            'year': year,
            'month': month,
            'value': stat.get(layer.band)
        }))
        
        current = next_month
    
    fc = ee.FeatureCollection(records).getInfo()
    
    rows = []
    for f in fc['features']:
        props = f['properties']
        if props['value'] is not None:
            rows.append({
                'site_id': site.site_id,
                'year': props['year'],
                'month': props['month'],
                'value': props['value']
            })
    
    return pd.DataFrame(rows)

## Example Usage

In [None]:
# # Initialize Earth Engine
# ee.Authenticate()
# ee.Initialize(project='your-project')

In [None]:
# from gee_polygons.site import load_sites
# from gee_polygons.datasets.sentinel2 import SENTINEL2_NDVI
#
# sites = load_sites('../data/restoration_sites_subset.geojson')
# site = sites[0]
#
# # Dense time series (all images)
# df = site.extract_continuous(
#     SENTINEL2_NDVI,
#     start_date='2020-01-01',
#     end_date='2023-12-31',
#     reducer='mean'
# )
# df.head()

In [None]:
# # Yearly summaries
# df_yearly = site.extract_continuous(
#     SENTINEL2_NDVI,
#     start_date='2018-01-01',
#     end_date='2024-12-31',
#     reducer='median',
#     frequency='yearly'
# )
# df_yearly

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()