In [None]:
# ==============================================================================
# notebooks/01_gee_sentinel2_export.ipynb
# ==============================================================================

# # 01 - GEE Sentinel-2 Export
# This notebook exports Sentinel-2 data from Google Earth Engine (GEE) to Google Cloud Storage (GCS).
# It focuses on:
# 1.  Loading the necessary setup (GEE initialization, `ee_woredas`, `BUCKET_NAME`, etc.)
# 2.  Defining the Sentinel-2 image collection and processing steps (cloud masking, adding VIs).
# 3.  Exporting monthly aggregated Sentinel-2 data for each woreda to GCS.

# ## 1. Load Project Setup and Libraries
# Import `ee` and custom modules, and ensure common variables from `00_setup_and_common_data_loading.ipynb` are available.

import ee
from google.colab import auth # Needed for re-authentication if session expires
import os
import sys

# Add src to path to import custom modules
project_root = os.path.abspath(os.path.join(os.getcwd(), '../'))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.gee_data_exporter import cloud_mask_s2, add_NDVI_SAVI, export_image_collection_by_feature, monitor_tasks

print("Libraries and custom modules loaded.")

# Re-authenticate and re-initialize EE in case the session has expired
try:
    auth.authenticate_user()
    ee.Initialize(project='bensa-coffee-yield') # Use your GEE Project ID here
    print("GEE re-initialized.")
except Exception as e:
    print(f"Error re-initializing GEE: {e}. Please ensure you have authenticated.")

# Define common variables from 00_setup_and_common_data_loading.ipynb
# In a full pipeline, these would ideally be passed or loaded from a config.
# For notebook workflow, we re-declare or load them.
# Assuming BUCKET_NAME, START_DATE, END_DATE, ee_woredas were set in 00_setup_and_common_data_loading.ipynb
# If running this notebook independently, uncomment and define them:
BUCKET_NAME = 'bensa-coffee-yield' # Make sure this matches your GCS bucket
START_DATE = '2017-01-01'
END_DATE = '2025-12-31'
processed_data_dir = '../data/processed/'
ee_woredas_path = os.path.join(processed_data_dir, 'sidama_woredas.geojson')
try:
    gdf_woredas = gpd.read_file(ee_woredas_path)
    geojson_str = gdf_woredas.to_json()
    ee_woredas = ee.FeatureCollection(geojson_str)
    print(f"Loaded ee_woredas from {ee_woredas_path}")
except FileNotFoundError:
    print(f"Error: '{ee_woredas_path}' not found. Please run '00_setup_and_common_data_loading.ipynb' first.")
    ee_woredas = None # Prevent execution if woreda data is missing

# ## 2. Define Sentinel-2 Image Collection and Processing
# Select the Sentinel-2 SR (Surface Reflectance) collection, apply cloud masking,
# and compute NDVI and SAVI. Then, aggregate to monthly composites.

if ee_woredas is not None:
    # Sentinel-2 Image Collection
    s2_collection = (
        ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
        .filterDate(START_DATE, END_DATE)
        .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20)) # Filter out images with high cloud cover
    )

    # Apply cloud mask and add NDVI/SAVI
    s2_processed = s2_collection.map(cloud_mask_s2).map(add_NDVI_SAVI)

    # Function to create monthly composites
    def create_monthly_composite(year, month):
        start = ee.Date.fromYMD(year, month, 1)
        end = start.advance(1, 'month')
        return s2_processed.filterDate(start, end).mean().set('year', year).set('month', month)

    # Generate list of years and months
    years = ee.List.sequence(START_DATE.split('-')[0], END_DATE.split('-')[0]).getInfo()
    months = ee.List.sequence(1, 12).getInfo()

    # Create monthly composites for all years/months
    monthly_composites = ee.ImageCollection([
        create_monthly_composite(year, month)
        for year in years
        for month in months
    ]).filter(ee.Filter.neq('system:index', 'null')) # Remove composites that might be null (e.g., no images)

    print(f"Created {monthly_composites.size().getInfo()} monthly Sentinel-2 composites.")
else:
    print("Skipping Sentinel-2 collection definition as woreda data is not loaded.")

# ## 3. Export Sentinel-2 Data to GCS
# Export the mean of 'NDVI' and 'SAVI' bands for each woreda, for each monthly composite.
# This will export a CSV for each woreda, containing monthly aggregated VI values.

if ee_woredas is not None and 'monthly_composites' in locals() and monthly_composites is not None:
    GCS_FOLDER = 'gee_exports/sentinel2_woredas/'
    PREFIX = 'sentinel2_'
    BAND_NAMES = ['NDVI', 'SAVI'] # Bands to export

    print(f"\nStarting Sentinel-2 export to GCS bucket '{BUCKET_NAME}' folder '{GCS_FOLDER}'...")

    s2_export_tasks = export_image_collection_by_feature(
        collection=monthly_composites,
        feature_collection=ee_woredas,
        reducer=ee.Reducer.mean(), # We want the mean VI per woreda
        scale=30, # Sentinel-2 resolution
        bucket_name=BUCKET_NAME,
        folder_name=GCS_FOLDER,
        prefix=PREFIX,
        start_date=START_DATE,
        end_date=END_DATE,
        band_names=BAND_NAMES
    )

    # Monitor the tasks (optional, but good for feedback)
    print("\nMonitoring Sentinel-2 export tasks...")
    # This can take a very long time depending on the number of woredas and years.
    # It's recommended to monitor externally via GEE Tasks tab or run this in the background.
    monitor_tasks(s2_export_tasks)
    print("✅ Sentinel-2 export tasks initiated. Check GEE Tasks tab for progress.")
else:
    print("Skipping Sentinel-2 export due to missing data.")
