# NDVI EVI Statistics Processing Script

**Authors:** Gabriel Ortega & Michela Perrone

## Libraries

In [11]:
import ee
import geemap as gm
import pandas as pd
import geopandas as gpd
import time
import os
from datetime import datetime
ee.Authenticate()
ee.Initialize(project='gortega-research')

## 1. Settings

In [12]:
# Local file configuration
input_folder = "edge_bck"
uniqueID_file = os.path.join(input_folder, "wdpaid.csv")
uniqueID_col = "wdpaid"

# Collection & Bands
collection = 'MODIS/061/MOD13Q1'
bands = ['NDVI', 'EVI']
qualityBand = 'SummaryQA'
qualityValue = 0
scaleFactor = 0.0001

# Date Range
startYear = 2020
endYear = 2024
years = list(range(startYear, endYear + 1))
years_ee = ee.List(years)

# Export Config
current_date = datetime.now().strftime('%Y-%m-%d')
export_folder = f'GEEMAP_NDVI_EVI_wdpa_{current_date}'

## 2. Function(s)

In [13]:
def processData(year, features, imageCollection):
    """
    Reduces the image collection to a 90th percentile composite 
    and calculates regional stats.
    """
    # 90th percentile composite for the year (Greenest pixel approx)
    processedImage = imageCollection.reduce(ee.Reducer.percentile([90])).multiply(scaleFactor)
    
    def get_stats(feature):
        # Calculate Mean and StdDev for the geometry
        stats = processedImage.reduceRegion(
            reducer=ee.Reducer.mean().combine(ee.Reducer.stdDev(), None, True),
            geometry=feature.geometry(),
            scale=250,
            maxPixels=1e13,
            bestEffort=True,
            tileScale=16, 
        )
        # Set the results, keeping the unique ID and adding the year
        return feature.set(stats).set("year", year)

    return features.map(get_stats)

def per_year_wrapper(y, eepolygon):
    """
    Wrapper to handle the yearly filtering and calling processData.
    """
    y = ee.Number(y)
    # Filter collection for the specific year
    ic_y = (
        ee.ImageCollection(collection)
        .select(bands + [qualityBand])
        .filterBounds(eepolygon)
        .filterDate(
            ee.Date.fromYMD(y, 1, 1),
            ee.Date.fromYMD(y.add(1), 1, 1),
        )
        .map(lambda image: image
             .updateMask(image.select(qualityBand).eq(qualityValue))
             .select(bands))
    )
    return processData(y, eepolygon, ic_y)


## 3. Execution

In [None]:
if not os.path.exists(uniqueID_file):
    print(f"CRITICAL ERROR: Input file not found: {uniqueID_file}")
else:
    # wdpaid.csv has a single column with no header
    uniqueID = pd.read_csv(uniqueID_file, header=None, names=[uniqueID_col])
    
    # Iterate through the WDPA IDs
    for index, row in uniqueID.iterrows():
        wdpaid = row[uniqueID_col]
        gpkg_path = os.path.join(input_folder, f"wdpa_wdpaid_{wdpaid}.gpkg")
        
        if not os.path.exists(gpkg_path):
            print(f"Skipping {wdpaid}: File not found at {gpkg_path}")
            continue

        try:
            # Load local vector and convert to EE
            polygon = gpd.read_file(gpkg_path, layer="sql_statement")
            eepolygon = gm.geopandas_to_ee(polygon)

            # Map the processing function over the years
            features_all_years = ee.FeatureCollection(
                years_ee.map(lambda y: per_year_wrapper(y, eepolygon))
            ).flatten()

            fileName = f"mean_sd_NDVI_EVI_WDPA_{wdpaid}"

            # Check if there is something to export
            if features_all_years.size().getInfo() > 0:
                
                # Export
                task = ee.batch.Export.table.toDrive(
                    collection=features_all_years,
                    description=fileName,
                    fileFormat="CSV",
                    folder=export_folder,
                )
                
                # Check if the queue is full
                while True:
                    try:
                        # Get list of submitted tasks
                        tasks = ee.batch.Task.list()
                        # Filter for active tasks (READY or RUNNING)
                        active_tasks = [t for t in tasks if t.state in ['READY', 'RUNNING']]
                        active_count = len(active_tasks)
                        
                        if active_count < 3000:
                            break # Run next task submission
                        
                        print(f"Queue full ({active_count} active tasks). Waiting 2 minutes...")
                        time.sleep(120)
                        
                    except Exception as e:
                        print(f"Warning: Failed to check task queue ({e}). Retrying in 60s...")
                        time.sleep(60)

                task.start()
                print(f"Task started: {fileName}")
                
                # Sleep to avoid potential rate limits
                time.sleep(5) 
            
            else:
                print(f"Skipping {wdpaid}: No valid data generated for this polygon.")

        except Exception as e:
            print(f"Error processing {wdpaid}: {e}")

    print("All tasks submitted.")

ERROR 1: PROJ: proj_create_from_database: Open of /opt/miniconda3/envs/geostatistics/share/proj failed


Task started: mean_sd_NDVI_EVI_WDPA_10
Task started: mean_sd_NDVI_EVI_WDPA_12
Task started: mean_sd_NDVI_EVI_WDPA_16
Task started: mean_sd_NDVI_EVI_WDPA_18
Task started: mean_sd_NDVI_EVI_WDPA_20
Task started: mean_sd_NDVI_EVI_WDPA_21
Task started: mean_sd_NDVI_EVI_WDPA_23
Task started: mean_sd_NDVI_EVI_WDPA_35
Task started: mean_sd_NDVI_EVI_WDPA_36
Task started: mean_sd_NDVI_EVI_WDPA_51
Task started: mean_sd_NDVI_EVI_WDPA_64
Task started: mean_sd_NDVI_EVI_WDPA_65
Task started: mean_sd_NDVI_EVI_WDPA_66
Task started: mean_sd_NDVI_EVI_WDPA_74
Task started: mean_sd_NDVI_EVI_WDPA_77
Task started: mean_sd_NDVI_EVI_WDPA_78
Task started: mean_sd_NDVI_EVI_WDPA_86
Task started: mean_sd_NDVI_EVI_WDPA_89
Task started: mean_sd_NDVI_EVI_WDPA_99
Task started: mean_sd_NDVI_EVI_WDPA_101
Task started: mean_sd_NDVI_EVI_WDPA_104
Task started: mean_sd_NDVI_EVI_WDPA_105
Task started: mean_sd_NDVI_EVI_WDPA_107
Task started: mean_sd_NDVI_EVI_WDPA_110
Task started: mean_sd_NDVI_EVI_WDPA_111
Task started: mean_

Before running the Earth Engine code, you need to authenticate with your Google account to grant Colab access to Earth Engine. Run the next cell and follow the instructions.