<a href="https://colab.research.google.com/github/boothmanrylan/nonStandReplacingDisturbances/blob/main/nonStandReplacingDisturbances.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/boothmanrylan/nonStandReplacingDisturbances.git
%cd nonStandReplacingDisturbances

In [None]:
import os

import google
from google.colab import auth
import ee
import geemap

In [None]:
auth.authenticate_user()

project = 'api-project-269347469410'
asset_path = f"projects/{project}/assets/rylan-nonstandreplacingdisturbances"

os.environ['GOOGLE_CLOUD_PROJECT'] = project
!gcloud config set project {project}

credentials, _ = google.auth.default()
ee.Initialize(
    credentials,
    project=project,
    # opt_url='https://earthengine-highvolume.googleapis.com',
)

In [None]:
NUM_POINTS = 500  # points per train/test/val group

disturbed_regions = ee.FeatureCollection(f"{asset_path}/my-data/usfs-nsr-disturbances")
buffered_disturbed_regions = disturbed_regions.map(
    lambda x: x.buffer(500, 100).bounds(100)
)
buffered_geometry = buffered_disturbed_regions.geometry(100).dissolve(100)

def split_multipolygon(multipolygon):
    # based on: https://gis.stackexchange.com/a/444779
    size = multipolygon.coordinates().size()
    indices = ee.List.sequence(0, size.subtract(1))

    def grab_polygon(i):
        geom = ee.Geometry.Polygon(multipolygon.coordinates().get(i))
        return ee.Feature(geom, {'id': i, 'area': geom.area(100)})

    return ee.FeatureCollection(indices.map(grab_polygon))

split_geometry = split_multipolygon(buffered_geometry)

# split into approx. 1/3 area to each of train/test/val by sorting by area and
# then extracting every third geometry
split_geometry = split_geometry.sort('area', False)
N = split_geometry.size().subtract(1)

train_indices = ee.List.sequence(0, N, 3)
test_indices = ee.List.sequence(1, N, 3)
val_indices = ee.List.sequence(2, N, 3)

train_geometries = split_geometry.filter(ee.Filter.inList('id', train_indices))
test_geometries = split_geometry.filter(ee.Filter.inList('id', test_indices))
val_geometries = split_geometry.filter(ee.Filter.inList('id', val_indices))

train_points = ee.FeatureCollection.randomPoints(
    region=train_geometries,
    points=NUM_POINTS,
    seed=42,
)

test_points = ee.FeatureCollection.randomPoints(
    region=test_geometries,
    points=NUM_POINTS,
    seed=42,
)

val_points = ee.FeatureCollection.randomPoints(
    region=val_geometries,
    points=NUM_POINTS,
    seed=42,
)

disturbance_mask = disturbed_regions.map(
    lambda x: x.set('foo', 1)
).reduceToImage(
    ['foo'], ee.Reducer.first()
).unmask().gt(0)

In [None]:
Map = geemap.Map()
Map.addLayer(disturbance_mask, {}, 'Disturbed Regions')
Map.addLayer(split_geometry, {'color': 'white'}, 'ROI')
Map.addLayer(train_points, {'color': 'red'}, 'Train Centroids')
Map.addLayer(test_points, {'color': 'blue'}, 'Test Centroids')
Map.addLayer(val_points, {'color': 'yellow'}, 'Val Centroids')
Map