In [9]:
import geopandas as gpd
import subprocess
import ee
from utils import remove_pa_transects_in_chunks, combine_chunks_to_files

ee.Authenticate()
ee.Initialize(project='dse-staff')

In [2]:
wdpa_filtered = gpd.read_file("../data/wdpa_filtered/wdpa_filtered.shp")
crs = wdpa_filtered.crs
len(wdpa_filtered)  # 4176

4176

In [3]:
# Set parameters 
sample_dist = 500  # transect spacing (meters)
transect_unit = 2500  # distance between samples along a transect (meters)
transect_pts = 2  # number of points on each side of boundary point
buffer_dist = transect_unit * transect_pts + 500  # size of inner buffer, evaluates point validity

In [4]:
# Create all interior buffers at once (takes ~2min)
print("Creating inner buffers for all protected areas...")
wdpa_buffers = wdpa_filtered[['WDPA_PID', 'geometry']].copy()
wdpa_buffers['geometry'] = wdpa_buffers.geometry.buffer(-buffer_dist)
wdpa_buffer_dict = dict(zip(wdpa_buffers['WDPA_PID'], wdpa_buffers['geometry']))
del wdpa_buffers  # Free memory - only need the dictionary

Creating inner buffers for all protected areas...


In [5]:
# Process PAs and write to chunks (~15min)
stats = remove_pa_transects_in_chunks(
    wdpa_gdf=wdpa_filtered,
    wdpa_buffer_dict=wdpa_buffer_dict,
    sample_dist=sample_dist,
    transect_unit=transect_unit,
    transect_pts=transect_pts,
    output_dir="../data/transect_chunks",
    chunk_size=400
)

# Print statistics
#print(f"Total PAs in dataset: {stats['total_pas']:,}") # 4176
#print(f"PAs with empty buffer (too small): {stats['empty_buffer']:,}") # 237
#print(f"PAs with all transects filtered out: {stats['all_filtered']:,}") # 0
#print(f"PAs successfully processed: {stats['pas_processed']:,}") # 3939
#print(f"Transects filtered - inside buffer (bad angle): {stats['bad_inside_buffer']:,}") # 39,475
#print(f"Transects filtered - outside PA (crossed): {stats['bad_outside_pa']:,}") # 403,265
#print(f"Total transect points (filtered): {stats['total_points']:,}") # 8,650,425
#print(f"Total unique transects: {stats['total_transects']:,}") # 1,730,085
#print(f"Average transects per PA: {stats['total_transects'] / stats['pas_processed']:.1f}") # 439.2 from ~500 per PA
#print(f"Average points per transect: {stats['total_points'] / stats['total_transects']:.1f}") # 5
#print(f"Created {len(stats['chunk_files'])} chunk files") # 8 files

Processing 4176 protected areas with streaming filter...
  Processed 421/4176 PAs, wrote chunk 1 | Total points: 852,750
  Processed 848/4176 PAs, wrote chunk 2 | Total points: 1,621,780
  Processed 1277/4176 PAs, wrote chunk 3 | Total points: 2,540,875
  Processed 1701/4176 PAs, wrote chunk 4 | Total points: 3,331,240
  Processed 2131/4176 PAs, wrote chunk 5 | Total points: 4,096,590
  Processed 2554/4176 PAs, wrote chunk 6 | Total points: 4,995,060
  Processed 2972/4176 PAs, wrote chunk 7 | Total points: 6,029,330
  Processed 3389/4176 PAs, wrote chunk 8 | Total points: 6,855,570
  Processed 3823/4176 PAs, wrote chunk 9 | Total points: 7,838,660
  Wrote final chunk 10


In [6]:
# Combine chunk shapefiles into transect and attribute (metadata) CSVs
combine_chunks_to_files(
    chunk_pattern='../data/transect_chunks/chunk_*.shp',
    transect_output='../data/transects_final.csv',
    attributes_output='../data/attributes_final.csv',
    transect_cols=['WDPA_PID', 'transectID', 'pointID', 'x', 'y']
)
# Chunk shapefiles are now ready to upload directly to GEE (already in EPSG:4326)

Found 10 chunk files
Combining into:
  - ../data/transects_final.csv
  - ../data/attributes_final.csv

Transect columns: ['WDPA_PID', 'transectID', 'pointID', 'x', 'y']
Attribute columns: ['WDPA_PID']

Writing transects file...
  Wrote chunk 1/10
  Wrote chunk 2/10
  Wrote chunk 3/10
  Wrote chunk 4/10
  Wrote chunk 5/10
  Wrote chunk 6/10
  Wrote chunk 7/10
  Wrote chunk 8/10
  Wrote chunk 9/10
  Wrote chunk 10/10
Transects saved: 439.1 MB

Extracting unique attributes by WDPA_PID...
  Processed 3/10 chunks
  Processed 6/10 chunks
  Processed 9/10 chunks
  Processed 10/10 chunks

Attributes saved: 0.0 MB
Unique WDPA_PIDs: 3939

Combining complete!
