In [None]:
# ==============================================================================
# notebooks/02_gee_era5_export.ipynb
# ==============================================================================

# # 02 - GEE ERA5-Land Export
# This notebook exports ERA5-Land climate data from Google Earth Engine (GEE) to Google Cloud Storage (GCS).
# It covers:
# 1.  Loading the necessary setup (GEE initialization, `ee_woredas`, `BUCKET_NAME`, etc.)
# 2.  Defining the ERA5-Land image collection and selecting relevant bands.
# 3.  Exporting monthly aggregated ERA5-Land data for each woreda to GCS.

# ## 1. Load Project Setup and Libraries
# Import `ee` and custom modules, and ensure common variables from `00_setup_and_common_data_loading.ipynb` are available.

import ee
from google.colab import auth # Needed for re-authentication if session expires
import os
import sys

# Add src to path to import custom modules
project_root = os.path.abspath(os.path.join(os.getcwd(), '../'))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.gee_data_exporter import export_image_collection_by_feature, monitor_tasks

print("Libraries and custom modules loaded.")

# Re-authenticate and re-initialize EE in case the session has expired
try:
    auth.authenticate_user()
    ee.Initialize(project='bensa-coffee-yield') # Use your GEE Project ID here
    print("GEE re-initialized.")
except Exception as e:
    print(f"Error re-initializing GEE: {e}. Please ensure you have authenticated.")

# Define common variables from 00_setup_and_common_data_loading.ipynb
# If running this notebook independently, uncomment and define them:
BUCKET_NAME = 'bensa-coffee-yield' # Make sure this matches your GCS bucket
START_DATE = '2017-01-01'
END_DATE = '2025-12-31'
processed_data_dir = '../data/processed/'
ee_woredas_path = os.path.join(processed_data_dir, 'sidama_woredas.geojson')
try:
    gdf_woredas = gpd.read_file(ee_woredas_path)
    geojson_str = gdf_woredas.to_json()
    ee_woredas = ee.FeatureCollection(geojson_str)
    print(f"Loaded ee_woredas from {ee_woredas_path}")
except FileNotFoundError:
    print(f"Error: '{ee_woredas_path}' not found. Please run '00_setup_and_common_data_loading.ipynb' first.")
    ee_woredas = None # Prevent execution if woreda data is missing

# ## 2. Define ERA5-Land Image Collection and Processing
# Select the ERA5-Land daily aggregate collection and define which bands (variables) to export.
# ERA5-Land data typically needs scaling applied when reading from GEE; this is often done
# implicitly during aggregation or explicitly via multiplication by the scale factor.
# GEE often handles the original scaling, but be mindful of variable units.

if ee_woredas is not None:
    # ERA5-Land Daily Aggregates (hourly data aggregated to daily means/sums)
    # This collection includes various climate variables.
    era5_collection = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR') \
        .filterDate(START_DATE, END_DATE)
    # Define the bands to export. These are common climate variables.
    # Check GEE documentation for full list and their descriptions/units.
    ERA5_BANDS = [
        'total_precipitation',         # Sum over 24 hours (m)
        'temperature_2m',              # Mean temperature at 2m (K)
        'surface_pressure',            # Mean surface pressure (Pa)
        'soil_temperature_level_1',    # Mean soil temperature (K)
        'soil_volume_water_content_level_1' # Mean soil water content (m^3 m^-3)
    ]

    # Function to create monthly composites (mean for most, sum for precipitation)
    def create_monthly_era5_composite(year, month):
        start = ee.Date.fromYMD(year, month, 1)
        end = start.advance(1, 'month')
        
        filtered_daily_collection = era5_collection.filterDate(start, end)
        
        # Calculate mean for temperature, pressure, soil temp, soil water content
        mean_image = filtered_daily_collection.select([
            'temperature_2m', 'surface_pressure', 'soil_temperature_level_1', 'soil_volume_water_content_level_1'
        ]).mean()
        
        # Calculate sum for total_precipitation
        sum_precipitation = filtered_daily_collection.select('total_precipitation').sum()
        
        # Combine them. Rename bands for clarity
        combined_image = (
            mean_image.addBands(sum_precipitation)
            .rename([
                'era5_temperature_2m',
                'era5_surface_pressure',
                'era5_soil_temperature_level_1',
                'era5_soil_volume_water_content_level_1',
                'era5_total_precipitation'
            ])
        )
            
        return combined_image.set('year', year).set('month', month)

    # Generate list of years and months
    years = ee.List.sequence(START_DATE.split('-')[0], END_DATE.split('-')[0]).getInfo()
    months = ee.List.sequence(1, 12).getInfo()

    # Create monthly composites for all years/months
    monthly_era5_composites = ee.ImageCollection([
        create_monthly_era5_composite(year, month)
        for year in years
        for month in months
    ]).filter(ee.Filter.neq('system:index', 'null')) # Remove composites that might be null

    print(f"Created {monthly_era5_composites.size().getInfo()} monthly ERA5-Land composites.")
else:
    print("Skipping ERA5-Land collection definition as woreda data is not loaded.")

# ## 3. Export ERA5-Land Data to GCS
# Export the aggregated monthly ERA5-Land data for each woreda.
# The reducer here is `mean()` because the composites themselves are already monthly means/sums.

if ee_woredas is not None and 'monthly_era5_composites' in locals() and monthly_era5_composites is not None:
    GCS_FOLDER = 'gee_exports/era5/'
    PREFIX = 'era5_'
    # Band names should match what was set in create_monthly_era5_composite
    ERA5_EXPORT_BANDS = [
        'era5_temperature_2m',
        'era5_surface_pressure',
        'era5_soil_temperature_level_1',
        'era5_soil_volume_water_content_level_1',
        'era5_total_precipitation'
    ]

    print(f"\nStarting ERA5-Land export to GCS bucket '{BUCKET_NAME}' folder '{GCS_FOLDER}'...")

    era5_export_tasks = export_image_collection_by_feature(
        collection=monthly_era5_composites,
        feature_collection=ee_woredas,
        reducer=ee.Reducer.mean(), # Mean of the composite over the woreda geometry
        scale=1000, # ERA5-Land resolution is ~1km
        bucket_name=BUCKET_NAME,
        folder_name=GCS_FOLDER,
        prefix=PREFIX,
        start_date=START_DATE,
        end_date=END_DATE,
        band_names=ERA5_EXPORT_BANDS # Specify the bands to export
    )

    # Monitor the tasks (optional, but good for feedback)
    print("\nMonitoring ERA5-Land export tasks...")
    monitor_tasks(era5_export_tasks)
    print("✅ ERA5-Land export tasks initiated. Check GEE Tasks tab for progress.")
else:
    print("Skipping ERA5-Land export due to missing data.")
