In [1]:
# Core analysis functions - modular and efficient
import ee
from utils import *
# Process geometries
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
import pyproj

# Initialize GEE
ee.Authenticate()
ee.Initialize(project='dse-staff')

# Global datasets
PROTECTED_AREAS = ee.FeatureCollection('WCMC/WDPA/current/polygons')
ECOREGIONS = ee.FeatureCollection("RESOLVE/ECOREGIONS/2017")
WATER_MASK = ee.Image("JRC/GSW1_0/GlobalSurfaceWater").select('max_extent').eq(0)
HM_IMAGE = ee.ImageCollection('CSP/HM/GlobalHumanModification').mean()
MODIS = ee.ImageCollection('MODIS/006/MOD09A1')

*** Earth Engine *** Share your feedback by taking our Annual Developer Satisfaction Survey: https://google.qualtrics.com/jfe/form/SV_7TDKVSyKvBdmMqW?ref=4i2o6

Attention required for JRC/GSW1_0/GlobalSurfaceWater! You are using a deprecated asset.
To make sure your code keeps working, please update it.
Learn more: https://developers.google.com/earth-engine/datasets/catalog/JRC_GSW1_0_GlobalSurfaceWater


Attention required for MODIS/006/MOD09A1! You are using a deprecated asset.
To make sure your code keeps working, please update it.
Learn more: https://developers.google.com/earth-engine/datasets/catalog/MODIS_006_MOD09A1



In [2]:
from config import * 
sites = ee.FeatureCollection("projects/dse-staff/assets/movement_metadata").geometry().buffer(50000) #50km
wdpaids = pas_at_movement_sites(sites)

In [4]:
gdf = gpd.read_file('../data/global_wdpa_June2021/Global_wdpa_wInfo_June2021.shp')
#wdpa_ids = list(gdf['WDPA_PID'].dropna().unique())
parks_subset = gdf[gdf['WDPA_PID'].isin(wdpaids)]
zones_list = []
print(f"Running with {len(wdpaids)}")

Running with 851


In [5]:
for idx, park in parks_subset.iterrows():
    geom = park.geometry
    
    # Fast local buffer operations
    small_ring = geom.buffer(1000).difference(geom.buffer(-1000))
    large_buffer = geom.buffer(5000).difference(geom.buffer(-5000))
    large_ring = large_buffer.difference(small_ring)
    
    base_props = {
        'WDPA_PID': park['WDPA_PID'],
        'ORIG_NAME': park['ORIG_NAME'],
        'GOV_TYPE': park['GOV_TYPE'],
        'OWN_TYPE': park['OWN_TYPE'],
        'STATUS_YR': park['STATUS_YR'],
        'IUCN_CAT': park['IUCN_CAT'],
        'GIS_AREA': park['GIS_AREA'],
        'PA_RATIO': park['PA_RATIO'],
        'BIOME_NAME': park['BIOME_NAME']
    }
    
    zones_list.extend([
        {**base_props, 'zone': '1_km', 'geometry': small_ring},
        {**base_props, 'zone': '5_km', 'geometry': large_ring}
    ])

zones_gdf = gpd.GeoDataFrame(zones_list, crs=gdf.crs)
zones_gdf.to_file('/workspace/data/zones/zones.shp', driver='ESRI Shapefile')
#ADD EXPORT TO ASSETS

In [6]:
zones = ee.FeatureCollection('projects/dse-staff/assets/zones')

In [7]:
hm_masked = HM_IMAGE #.updateMask(WATER_MASK)

hm_results = hm_masked.reduceRegions(
    collection=zones,
    reducer=ee.Reducer.mean().combine(ee.Reducer.stdDev(), '', True)
        .setOutputs(['hm_mean', 'hm_stddev']),
    scale=500,
    tileScale=8
)

In [8]:
# Process multiple years of MODIS gradient data with task queue management
import time

years = list(range(2001, 2024))  # 2001 to 2023
max_concurrent_tasks = 10
submitted_tasks = []

def check_task_status():
    """Check status of submitted tasks and remove completed ones"""
    global submitted_tasks
    active_tasks = []
    for task_obj, year in submitted_tasks:
        task_status = task_obj.status()
        if task_status['state'] in ['COMPLETED', 'FAILED', 'CANCELLED']:
            print(f"Task {year} {task_status['state']}")
        else:
            active_tasks.append((task_obj, year))
    submitted_tasks = active_tasks
    return len(submitted_tasks)

for i, year in enumerate(years):
    # Wait if we have too many active tasks
    while check_task_status() >= max_concurrent_tasks:
        print(f"Waiting... {len(submitted_tasks)} tasks active")
        time.sleep(30)  # Check every 30 seconds
    
    print(f"Processing year {year} ({i+1}/{len(years)})...")
    
    # Get MODIS and calculate NDVI
    modis = MODIS.filterDate(f'{year}-01-01', f'{year}-12-31') \
        .median() \
        .select(['sur_refl_b01', 'sur_refl_b02'])  # Red and NIR bands
    
    # Calculate NDVI
    ndvi = modis.normalizedDifference(['sur_refl_b02', 'sur_refl_b01']).rename('ndvi').select('ndvi')
    
    # Calculate gradient of NDVI
    grad = ndvi.gradient()
    magnitude = grad.expression('sqrt(x*x + y*y)', {'x': grad.select('x'), 'y': grad.select('y')}).rename('gradient_magnitude')
    magnitude_masked = magnitude #.updateMask(WATER_MASK)

    # Reduce with explicit CRS and scale matching MODIS
    final_results = magnitude_masked.reduceRegions(
        collection=hm_results,
        reducer=ee.Reducer.mean().combine(ee.Reducer.stdDev(), '', True)
            .setOutputs(['gradient_mean', 'gradient_stddev']),
        scale=500,  
        tileScale=8
    )
    
    # Export results and track task
    export_task = ee.batch.Export.table.toCloudStorage(
        collection=final_results,
        description=f'results_{year}',
        bucket='dse-staff',
        fileNamePrefix=f'protected_areas/results/results_{year}',
        fileFormat='CSV',
        selectors=['WDPA_PID', 'ORIG_NAME', 'GOV_TYPE', 'OWN_TYPE',
                   'STATUS_YR', 'IUCN_CAT', 'GIS_AREA', 'PA_RATIO', 'BIOME_NAME',
                   'zone', 'hm_mean', 'hm_stddev', 'gradient_mean', 'gradient_stddev']
    )
    export_task.start()
    submitted_tasks.append((export_task, year))
    print(f"Export task started: {export_task.id} for {year}")

# Wait for remaining tasks to complete
print("Waiting for remaining tasks to complete...")
while check_task_status() > 0:
    print(f"Still waiting for {len(submitted_tasks)} tasks...")
    time.sleep(30)

print("All export tasks completed!")

Processing year 2001 (1/23)...
Export task started: SPTFDFI5UQHPXEE3L245NNUD for 2001
Processing year 2002 (2/23)...
Export task started: GEVR4OVNN5RQQUT5BXPICC5D for 2002
Processing year 2003 (3/23)...
Export task started: KYLYYES3EXBVHD6LVIQH26IW for 2003
Processing year 2004 (4/23)...
Export task started: 4P6VJLISAZZWYWHBGZDE63GG for 2004
Processing year 2005 (5/23)...
Export task started: ZYIAKMIZ42HPJE4MUM77PZP2 for 2005
Processing year 2006 (6/23)...
Export task started: Z6UCPVHMS377UHXBZII6RXHC for 2006
Processing year 2007 (7/23)...
Export task started: WE5GVZOST3GTAZWSUCLWBQV7 for 2007
Processing year 2008 (8/23)...
Export task started: EVLMZSDHAMDKZBFYQUTM75PO for 2008
Processing year 2009 (9/23)...
Export task started: DOIPZ66CH4DZVV7MAP7LBSSI for 2009
Processing year 2010 (10/23)...
Export task started: CHUVGEHPLPOPVNOYWFFPFOGB for 2010
Waiting... 10 tasks active
Waiting... 10 tasks active
Waiting... 10 tasks active
Task 2002 COMPLETED
Task 2003 COMPLETED
Processing year 20

In [None]:
# Simplified version - just use gsutil and handle errors better
import subprocess
import tempfile
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt

# Simple download with error handling
temp_dir = tempfile.mkdtemp()
try:
    result = subprocess.run(['gsutil', 'ls', 'gs://dse-staff/protected_areas/results/*.csv'], 
                          capture_output=True, text=True, check=True)
    files = result.stdout.strip().split('\n')
    print(f"Found {len(files)} files")
    
    # Download all files
    subprocess.run(['gsutil', '-m', 'cp'] + files + [temp_dir], check=True)
    
    # Load and combine
    all_data = []
    for file in glob.glob(os.path.join(temp_dir, '*.csv')):
        year = int(os.path.basename(file).split('_')[-1].split('.')[0])
        df = pd.read_csv(file)
        df['year'] = year
        all_data.append(df)
    
    combined_df = pd.concat(all_data, ignore_index=True)
    combined_df['pid_zone'] = combined_df['WDPA_PID'].astype(str) + '_' + combined_df['zone']
    
    # Plot
    plt.figure(figsize=(15, 8))
    for pid_zone in combined_df['pid_zone'].unique():
        subset = combined_df[combined_df['pid_zone'] == pid_zone].sort_values('year')
        plt.plot(subset['year'], subset['gradient_mean'], alpha=0.7, linewidth=1)
    
    plt.xlabel('Year')
    plt.ylabel('NDVI Gradient Mean')
    plt.title('Timeline of NDVI Gradient by Protected Area and Buffer Zone')
    plt.grid(True, alpha=0.3)
    plt.show()
    
except subprocess.CalledProcessError as e:
    print(f"Error: {e}")
    print("Maybe no files exist yet or gsutil auth issue")