In [None]:
# ==============================================================================
# notebooks/04_mapspam_coffee_extent_processing.ipynb
# ==============================================================================

# # 04 - MAPSPAM Coffee Extent Processing
# This notebook processes the MAPSPAM coffee extent raster data. The goal is to:
# 1.  Load the raw MAPSPAM coffee area raster.
# 2.  Clip the global/regional MAPSPAM raster to the study area (Sidama region).
# 3.  Mask or create individual coffee extent masks for each woreda,
#     which will be used later to extract statistics only from coffee-growing areas.

# ## 1. Load Project Setup and Libraries
# Import `rasterio`, `geopandas`, `numpy`, and custom geospatial processing modules.

import rasterio
import rasterio.mask
import numpy as np
import geopandas as gpd
import os
import sys

# Add src to path to import custom modules
project_root = os.path.abspath(os.path.join(os.getcwd(), '../'))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.geospatial_processing import mask_raster_with_polygon

print("Libraries and custom modules loaded.")

# Define paths
data_dir = '../data/'
input_data_dir = os.path.join(data_dir, 'input/')
processed_data_dir = os.path.join(data_dir, 'processed/')
coffee_extents_dir = os.path.join(processed_data_dir, 'coffee_extents/')

os.makedirs(coffee_extents_dir, exist_ok=True)

# Load woreda boundaries (processed in 00_setup_and_common_data_loading.ipynb)
PROCESSED_WOREDAS_GEOJSON_PATH = os.path.join(processed_data_dir, 'sidama_woredas.geojson')
gdf_woredas = None
try:
    gdf_woredas = gpd.read_file(PROCESSED_WOREDAS_GEOJSON_PATH)
    if 'Woreda_ID' not in gdf_woredas.columns:
        raise ValueError("GeoDataFrame must contain a 'Woreda_ID' column.")
    print(f"Loaded GeoDataFrame with {len(gdf_woredas)} woredas.")
except FileNotFoundError:
    print(f"Error: '{PROCESSED_WOREDAS_GEOJSON_PATH}' not found. Please run '00_setup_and_common_data_loading.ipynb' first.")
except ValueError as e:
    print(f"Data error: {e}")
except Exception as e:
    print(f"An unexpected error occurred loading woreda data: {e}")

# ## 2. Define MAPSPAM Input and Output Paths
# Specify the path to the raw MAPSPAM raster and where the clipped outputs will be stored.

# Path to the raw MAPSPAM coffee harvested area raster
# Download this from the MAPSPAM website (e.g., SPAM2017 Global Data > Harvested Area > Coffee)
# Make sure to place the .tif file in `data/input/`
RAW_MAPSPAM_RASTER_PATH = os.path.join(input_data_dir, 'spam2017V2r1_SSA_H_COFF_A.tif') # <<< ADJUST FILENAME IF NEEDED

# Path for the clipped MAPSPAM raster for the entire study area (optional, but good intermediate)
CLIPPED_MAPSPAM_RASTER_PATH = os.path.join(processed_data_dir, 'sidama_coffee_extent_clipped.tif')

print(f"Raw MAPSPAM raster expected at: {RAW_MAPSPAM_RASTER_PATH}")
print(f"Clipped MAPSPAM raster will be saved to: {CLIPPED_MAPSPAM_RASTER_PATH}")
print(f"Individual woreda coffee extents will be saved in: {coffee_extents_dir}")

# ## 3. Clip MAPSPAM to Study Area
# Clip the large MAPSPAM raster to the bounding box of your study area (Sidama woredas).
# This reduces the data size for faster processing.

if gdf_woredas is not None:
    if not os.path.exists(RAW_MAPSPAM_RASTER_PATH):
        print(f"Error: Raw MAPSPAM raster not found at {RAW_MAPSPAM_RASTER_PATH}.")
        print("Please download 'spam2017V2r1_SSA_H_COFF_A.tif' (or equivalent) and place it in 'data/input/'.")
    else:
        print("\nClipping MAPSPAM raster to study area bounding box...")
        try:
            with rasterio.open(RAW_MAPSPAM_RASTER_PATH) as src:
                # Use the total bounds of the woreda GeoDataFrame for clipping
                out_image, out_transform = rasterio.mask.mask(src, [gdf_woredas.geometry.unary_union], crop=True)
                out_meta = src.meta.copy()

                out_meta.update({"driver": "GTiff",
                                 "height": out_image.shape[1],
                                 "width": out_image.shape[2],
                                 "transform": out_transform,
                                 "nodata": 0 # Assuming 0 is NoData for coffee extent
                                })

                with rasterio.open(CLIPPED_MAPSPAM_RASTER_PATH, "w", **out_meta) as dest:
                    dest.write(out_image)
            print(f"✅ Clipped MAPSPAM raster saved to {CLIPPED_MAPSPAM_RASTER_PATH}")

        except Exception as e:
            print(f"Error clipping MAPSPAM raster: {e}")
else:
    print("Skipping MAPSPAM clipping as woreda data is not loaded.")

# ## 4. Create Individual Woreda Coffee Extent Masks
# For each woreda, create a separate raster mask representing the coffee-growing areas within that woreda.
# These individual masks will be used later to extract statistics only from coffee areas.

if gdf_woredas is not None and os.path.exists(CLIPPED_MAPSPAM_RASTER_PATH):
    print("\nCreating individual woreda coffee extent masks...")
    
    masked_count = 0
    skipped_count = 0

    for index, row in gdf_woredas.iterrows():
        woreda_id = row['Woreda_ID']
        woreda_name = row['Woreda Name']
        woreda_geometry = gpd.GeoDataFrame([row], crs=gdf_woredas.crs)

        output_woreda_mask_path = os.path.join(coffee_extents_dir, f'{woreda_id}_coffee_extent.tif')

        if os.path.exists(output_woreda_mask_path):
            # print(f"  Skipping existing mask for {woreda_name} (ID: {woreda_id}).")
            skipped_count += 1
            continue
        
        # Mask the clipped MAPSPAM raster with the current woreda's geometry
        success = mask_raster_with_polygon(CLIPPED_MAPSPAM_RASTER_PATH, woreda_geometry, output_woreda_mask_path)
        if success:
            print(f"  Created mask for {woreda_name} (ID: {woreda_id})")
            masked_count += 1
        else:
            print(f"  Failed to create mask for {woreda_name} (ID: {woreda_id})")

    print(f"\n✅ Finished creating individual woreda coffee extent masks. {masked_count} new masks created, {skipped_count} skipped (already existed).")
else:
    print("Skipping individual woreda mask creation due to missing woreda data or clipped MAPSPAM raster.")
