In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.transform import pyramid_gaussian, resize

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [4]:
# --- 1. SYNTHETIC DATA GENERATION ---
# We will create a 100x100 grid to represent our world.
# Each pixel is a "grid cell" as described in your paper.
MAP_SIZE = (100, 100)

In [6]:
def create_synthetic_data(size):
    """
    Generates synthetic geospatial maps for our features and target.
    This simulates the data you would source from GEE, WDPA, etc.
    """
    print(f"Creating synthetic {size} maps...")
    
    # --- Target Variable (y) ---
    # This is our "World Database on Protected Areas (WDPA)" layer
    # 0 = Not Protected, 1 = Protected
    existing_pas = np.zeros(size)
    # Create a 30x30 "National Park" in a high-value area
    existing_pas[10:40, 10:40] = 1
    # Create a smaller 15x15 "Reserve"
    existing_pas[70:85, 10:25] = 1

    # --- Feature Variables (X) ---
    
    # Feature 1: Environmental Value (like NDVI)
    # Hypothesis: PAs are MORE likely in high-value areas.
    env_value_map = np.zeros(size)
    # The "National Park" area is a high-value hotspot
    env_value_map[5:45, 5:45] = 0.9
    # The "Reserve" area is also high-value
    env_value_map[65:90, 5:30] = 0.8
    # Add another "unprotected" hotspot (where transition risk should be high)
    env_value_map[20:40, 70:90] = 0.85
    
    # Feature 2: Economic Activity (like GDP / Night Lights)
    # Hypothesis: PAs are LESS likely in high-GDP areas (high opportunity cost).
    gdp_map = np.zeros(size)
    # Create a 25x25 "high-GDP city" where protection is unlikely
    gdp_map[50:75, 60:85] = 1.0 
    # Add some noise
    gdp_map += np.random.rand(*size) * 0.1
    
    # Feature 3: Population Density
    # Hypothesis: PAs are LESS likely in high-population areas.
    pop_map = np.zeros(size)
    # Population is high near the city
    pop_map[45:80, 55:90] = 0.8
    # Add some "villages"
    pop_map[15:25, 50:60] = 0.3
    pop_map += np.random.rand(*size) * 0.05
    
    # Return a dictionary of feature maps and the single target map
    feature_maps = {
        'env_value': env_value_map,
        'gdp': gdp_map,
        'population': pop_map
    }
    
    return feature_maps, existing_pas