In [None]:
# ==============================================================================
# notebooks/03_gee_srtm_smap_export.ipynb
# ==============================================================================

# # 03 - GEE SRTM and SMAP Export
# This notebook exports static (SRTM DEM) and dynamic (SMAP soil moisture) data
# from Google Earth Engine (GEE) to Google Cloud Storage (GCS).
# It covers:
# 1.  Loading the necessary setup (`ee_woredas`, `BUCKET_NAME`, etc.)
# 2.  Defining the SRTM DEM and SMAP image collections.
# 3.  Exporting SRTM (elevation) as a static feature per woreda.
# 4.  Exporting monthly aggregated SMAP data for each woreda to GCS.

# ## 1. Load Project Setup and Libraries
# Import `ee` and custom modules, and ensure common variables are available.

import ee
from google.colab import auth # Needed for re-authentication if session expires
import os
import sys
import geopandas as gpd

# Add src to path to import custom modules
project_root = os.path.abspath(os.path.join(os.getcwd(), '../'))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.gee_data_exporter import export_image_collection_by_feature, monitor_tasks

print("Libraries and custom modules loaded.")

# Re-authenticate and re-initialize EE in case the session has expired
try:
    auth.authenticate_user()
    ee.Initialize(project='bensa-coffee-yield') # Use your GEE Project ID here
    print("GEE re-initialized.")
except Exception as e:
    print(f"Error re-initializing GEE: {e}. Please ensure you have authenticated.")

# Define common variables from 00_setup_and_common_data_loading.ipynb
# If running this notebook independently, uncomment and define them:
BUCKET_NAME = 'bensa-coffee-yield' # Make sure this matches your GCS bucket
START_DATE = '2017-01-01'
END_DATE = '2025-12-31'
processed_data_dir = '../data/processed/'
ee_woredas_path = os.path.join(processed_data_dir, 'sidama_woredas.geojson')
try:
    gdf_woredas = gpd.read_file(ee_woredas_path)
    geojson_str = gdf_woredas.to_json()
    ee_woredas = ee.FeatureCollection(geojson_str)
    print(f"Loaded ee_woredas from {ee_woredas_path}")
except FileNotFoundError:
    print(f"Error: '{ee_woredas_path}' not found. Please run '00_setup_and_common_data_loading.ipynb' first.")
    ee_woredas = None # Prevent execution if woreda data is missing

# ## 2. Export SRTM DEM Data to GCS
# SRTM DEM (Digital Elevation Model) provides elevation data. Since it's a static
# dataset, we'll export the mean elevation for each woreda once.

if ee_woredas is not None:
    # SRTM Digital Elevation Model
    srtm = ee.Image('USGS/SRTMGL1_003')
    SRTM_BAND = ['elevation']

    # Function to export SRTM elevation for each feature
    def export_srtm_for_feature(feature):
        feature_id = feature.get('Woreda_ID').getInfo()
        feature_name = feature.get('Woreda Name').getInfo()
        print(f"Processing SRTM for Woreda: {feature_name} (ID: {feature_id})")

        # Reduce the SRTM image over the feature's geometry
        stats = srtm.reduceRegion(
            reducer=ee.Reducer.mean(), # Get the mean elevation
            geometry=feature.geometry(),
            scale=30, # SRTM 1 Arc-Second Global is 30m
            tileScale=4,
            maxPixels=1e13
        )
        
        # Add properties to the dictionary including feature ID
        return ee.Feature(None, stats).set({
            'Woreda_ID': feature_id,
            'Woreda Name': feature_name
        })

    GCS_FOLDER_SRTM = 'gee_exports/srtm/'
    PREFIX_SRTM = 'srtm_elevation_'

    print(f"\nStarting SRTM elevation export to GCS bucket '{BUCKET_NAME}' folder '{GCS_FOLDER_SRTM}'...")

    srtm_export_tasks = []
    feature_list = ee_woredas.toList(ee_woredas.size()).getInfo()
    for f_info in feature_list:
        f = ee.Feature(f_info)
        # We export each feature as a separate table for SRTM since it's static
        task = ee.batch.Export.table.toCloudStorage(
            collection=ee.FeatureCollection([export_srtm_for_feature(f)]), # Create a FeatureCollection for a single feature
            description=f'{PREFIX_SRTM}{f.get("Woreda_ID").getInfo()}_export',
            bucket=BUCKET_NAME,
            fileNamePrefix=f'{GCS_FOLDER_SRTM}/{PREFIX_SRTM}{f.get("Woreda_ID").getInfo()}',
            fileFormat='CSV'
        )
        task.start()
        print(f"  Export task for SRTM {f.get('Woreda Name').getInfo()} ({f.get('Woreda_ID').getInfo()}) started.")
        srtm_export_tasks.append(task)

    print("\nMonitoring SRTM elevation export tasks...")
    monitor_tasks(srtm_export_tasks)
    print("✅ SRTM elevation export tasks initiated. Check GEE Tasks tab for progress.")
else:
    print("Skipping SRTM export due to missing woreda data.")

# ## 3. Define SMAP Soil Moisture Collection and Processing
# SMAP (Soil Moisture Active Passive) provides global soil moisture data.
# We'll export monthly aggregated soil moisture data.

if ee_woredas is not None:
    # SMAP L3_SM_P_E soil moisture data (Passive, Enhanced)
    # The 'sm_surface' and 'sm_rootzone' bands are commonly used.
    # Check GEE documentation for exact band names and their units.
    smap_collection = ee.ImageCollection('NASA/SMAP/SPL3SMP_E/003') \
        .filterDate(START_DATE, END_DATE)

    SMAP_BANDS = [
        'sm_surface',  # Surface soil moisture (0-5 cm)
        'sm_rootzone'  # Rootzone soil moisture (0-100 cm)
    ]

    # Function to create monthly SMAP composites (mean)
    def create_monthly_smap_composite(year, month):
        start = ee.Date.fromYMD(year, month, 1)
        end = start.advance(1, 'month')
        return (
            smap_collection.filterDate(start, end)
            .select(SMAP_BANDS).mean()
            .set('year', year).set('month', month)
        )

    # Generate list of years and months
    years = ee.List.sequence(START_DATE.split('-')[0], END_DATE.split('-')[0]).getInfo()
    months = ee.List.sequence(1, 12).getInfo()

    # Create monthly composites for all years/months
    monthly_smap_composites = ee.ImageCollection([
        create_monthly_smap_composite(year, month)
        for year in years
        for month in months
    ]).filter(ee.Filter.neq('system:index', 'null'))

    print(f"\nCreated {monthly_smap_composites.size().getInfo()} monthly SMAP composites.")
else:
    print("Skipping SMAP collection definition as woreda data is not loaded.")

# ## 4. Export SMAP Data to GCS
# Export the aggregated monthly SMAP data for each woreda.

if ee_woredas is not None and 'monthly_smap_composites' in locals() and monthly_smap_composites is not None:
    GCS_FOLDER_SMAP = 'gee_exports/smap/'
    PREFIX_SMAP = 'smap_'

    print(f"\nStarting SMAP export to GCS bucket '{BUCKET_NAME}' folder '{GCS_FOLDER_SMAP}'...")

    smap_export_tasks = export_image_collection_by_feature(
        collection=monthly_smap_composites,
        feature_collection=ee_woredas,
        reducer=ee.Reducer.mean(), # Mean of the composite over the woreda geometry
        scale=36000, # SMAP resolution is ~36km, use appropriate scale
        bucket_name=BUCKET_NAME,
        folder_name=GCS_FOLDER_SMAP,
        prefix=PREFIX_SMAP,
        start_date=START_DATE,
        end_date=END_DATE,
        band_names=SMAP_BANDS # Ensure correct bands are exported
    )

    # Monitor the tasks (optional)
    print("\nMonitoring SMAP export tasks...")
    monitor_tasks(smap_export_tasks)
    print("✅ SMAP export tasks initiated. Check GEE Tasks tab for progress.")
else:
    print("Skipping SMAP export due to missing data.")
