# GEE Colab Template (Livestock View)

Colab-first template for reproducible Earth Engine workflows.

Rules:
- Validate dataset IDs against `data/catalog/gee_catalog.csv`.
- Print selected bands and filtered collection sizes before analysis.
- Keep AOI, dates, scale, and CRS explicit.


## 1) Install

In [None]:
import os
import sys

if os.environ.get("COLAB_RELEASE_TAG"):
    !pip -q install earthengine-api geemap pandas
else:
    print("Not running in Colab; dependency install skipped.")


## 2) Auth (`ee.Authenticate` + `ee.Initialize(project=...)`)

In [None]:
import ee

PROJECT_ID = "TODO_YOUR_GCP_PROJECT_ID"

ee.Authenticate()
ee.Initialize(project=PROJECT_ID)
print(f"Earth Engine initialized with project: {PROJECT_ID}")


## 3) Constants (AOI, dates, scale, CRS)

In [None]:
# TODO: set AOI/date windows for your run
AOI_CENTER = (-63.927778, -13.700278)  # (lon, lat)
AOI_BUFFER_KM = 30

PRE_START = "2025-01-01"
PRE_END = "2025-06-01"
POST_START = "2025-06-01"
POST_END = "2025-12-31"

JRC_START = "2025-01-01"
JRC_END = "2026-01-01"

SCALE_M = 30
CRS = "EPSG:4326"
FLOOD_THRESHOLD_DB = -1.5

lon, lat = AOI_CENTER
aoi = ee.Geometry.Point([lon, lat]).buffer(AOI_BUFFER_KM * 1000).bounds()
print("AOI bounds ready")


## 4) Dataset Discovery (print bands + size)

In [None]:
from pathlib import Path
import pandas as pd

S1_ID = "COPERNICUS/S1_GRD"
JRC_MONTHLY_ID = "JRC/GSW1_4/MonthlyHistory"

catalog_path = Path("data/catalog/gee_catalog.csv")
if catalog_path.exists():
    catalog = pd.read_csv(catalog_path)
    print("Catalog rows:", len(catalog))
    print(catalog[catalog["dataset_id"].isin([S1_ID, JRC_MONTHLY_ID])][["dataset_id", "kind"]])
else:
    print("Catalog file not found in this runtime.")

s1_col = (
    ee.ImageCollection(S1_ID)
    .filterBounds(aoi)
    .filterDate(PRE_START, POST_END)
    .filter(ee.Filter.eq("instrumentMode", "IW"))
    .filter(ee.Filter.listContains("transmitterReceiverPolarisation", "VV"))
    .select("VV")
)
print("S1 filtered size:", s1_col.size().getInfo())
print("S1 bands:", ee.Image(s1_col.first()).bandNames().getInfo())

jrc_col = ee.ImageCollection(JRC_MONTHLY_ID).filterBounds(aoi).filterDate(JRC_START, JRC_END)
print("JRC Monthly filtered size:", jrc_col.size().getInfo())
print("JRC Monthly bands:", ee.Image(jrc_col.first()).bandNames().getInfo())


## 5) Example: Sentinel-1 pre/post composite + simple water mask

In [None]:
import geemap

def s1_composite(start_date: str, end_date: str) -> ee.Image:
    col = (
        ee.ImageCollection(S1_ID)
        .filterBounds(aoi)
        .filterDate(start_date, end_date)
        .filter(ee.Filter.eq("instrumentMode", "IW"))
        .filter(ee.Filter.listContains("transmitterReceiverPolarisation", "VV"))
        .select("VV")
    )
    print(f"S1 {start_date} -> {end_date} images:", col.size().getInfo())
    return col.median().rename("vv")

pre_img = s1_composite(PRE_START, PRE_END)
post_img = s1_composite(POST_START, POST_END)
flood_diff = post_img.subtract(pre_img).rename("flood_diff")
water_mask = flood_diff.lt(FLOOD_THRESHOLD_DB).rename("water_mask")

m = geemap.Map()
m.centerObject(aoi, 10)
m.addLayer(pre_img, {"min": -25, "max": 0}, "S1 pre (VV)")
m.addLayer(post_img, {"min": -25, "max": 0}, "S1 post (VV)")
m.addLayer(flood_diff, {"min": -5, "max": 5, "palette": ["blue", "white", "red"]}, "S1 flood diff")
m.addLayer(water_mask.selfMask(), {"palette": ["00BFFF"]}, "Simple water mask")
m


## 6) Example: JRC GSW Monthly History query

In [None]:
jrc_monthly = ee.ImageCollection(JRC_MONTHLY_ID).filterBounds(aoi).filterDate(JRC_START, JRC_END)

def monthly_water_feature(img: ee.Image) -> ee.Feature:
    month = ee.Date(img.get("system:time_start")).format("YYYY-MM")
    water_pixels = img.select("water").eq(2).reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=aoi,
        scale=SCALE_M,
        maxPixels=1e12
    ).get("water")
    return ee.Feature(None, {"month": month, "water_pixels": water_pixels})

monthly_fc = ee.FeatureCollection(jrc_monthly.map(monthly_water_feature))
print("JRC monthly features:", monthly_fc.size().getInfo())
print("Sample rows:")
for feat in monthly_fc.limit(5).getInfo()["features"]:
    print(feat["properties"])


## 7) Export Example (Drive + optional GCS)

In [None]:
EXPORT_TAG = "TODO_run_tag"
DRIVE_FOLDER = "gee_livestock_exports"
GCS_BUCKET = ""  # TODO optional: e.g., 'my-gee-bucket'

drive_task = ee.batch.Export.image.toDrive(
    image=flood_diff.toFloat(),
    description=f"s1_flood_diff_{EXPORT_TAG}",
    folder=DRIVE_FOLDER,
    fileNamePrefix=f"s1_flood_diff_{EXPORT_TAG}",
    region=aoi,
    scale=SCALE_M,
    crs=CRS,
    maxPixels=1e13
)
drive_task.start()
print("Drive task started:", drive_task.id)

if GCS_BUCKET:
    gcs_task = ee.batch.Export.image.toCloudStorage(
        image=flood_diff.toFloat(),
        description=f"s1_flood_diff_{EXPORT_TAG}_gcs",
        bucket=GCS_BUCKET,
        fileNamePrefix=f"gee/exports/s1_flood_diff_{EXPORT_TAG}",
        region=aoi,
        scale=SCALE_M,
        crs=CRS,
        maxPixels=1e13
    )
    gcs_task.start()
    print("GCS task started:", gcs_task.id)
else:
    print("GCS export skipped (set GCS_BUCKET to enable).")

# For large temporal arrays, export task metadata and build Zarr in cloud workflow.


## 8) Logging: print full config

In [None]:
import json
from datetime import datetime, UTC

run_config = {
    "timestamp_utc": datetime.now(UTC).isoformat(),
    "project_id": PROJECT_ID,
    "aoi_center": AOI_CENTER,
    "aoi_buffer_km": AOI_BUFFER_KM,
    "pre_start": PRE_START,
    "pre_end": PRE_END,
    "post_start": POST_START,
    "post_end": POST_END,
    "jrc_start": JRC_START,
    "jrc_end": JRC_END,
    "scale_m": SCALE_M,
    "crs": CRS,
    "flood_threshold_db": FLOOD_THRESHOLD_DB,
    "datasets": {
        "s1": S1_ID,
        "jrc_monthly": JRC_MONTHLY_ID
    }
}

print(json.dumps(run_config, indent=2))
