Many project geometries are big and complicated. This makes it very slow to repeatedly load and reproject data for 100+ projects. This notebook packages geometries we're interested in tracking into nice simplified geometries that are easier to work with. 

In [1]:
import json
from pathlib import Path
import subprocess

import geopandas
import fsspec
import pandas as pd
from tqdm import tqdm

In [7]:
s3 = fsspec.filesystem('s3', anon=False)

# files from ARB endpoint (raw)
# files from registiries (listed)
s3_buckets = [
    'carbonplan-forest-offsets/carb-geometries/raw',
    'carbonplan-forest-offsets/carb-geometries/listed',
]

s3_paths = [fn for s3_bucket in s3_buckets for fn in s3.glob(s3_bucket + '/*.json')]

In [8]:
def simplify_geometry(fn) -> geopandas.GeoDataFrame:
    """ "Pass raw geometry through mapshaper"""
    opr_id = Path(fn).stem
    s3 = fsspec.filesystem('s3', anon=False)
    with s3.open(fn) as f:
        d = json.load(f)

    # mapshaper uses `-` to denote stdin/stdout, so read from - and write to -
    # ACR361 shapefile is so broken we have to really goose the simplification
    if opr_id in ['ACR361']:
        result = subprocess.run(
            'mapshaper -i - -simplify 5% -o -',
            text=True,
            capture_output=True,
            shell=True,
            input=json.dumps(d),
        )
    else:
        result = subprocess.run(
            'mapshaper -i - -simplify 80% -o -',
            text=True,
            capture_output=True,
            shell=True,
            input=json.dumps(d),
        )
    gdf = geopandas.GeoDataFrame.from_features(json.loads(result.stdout))
    gdf = gdf.set_crs('epsg:4326')
    gdf = gdf.to_crs('epsg:5070')
    return gdf

In [9]:
results = []
for fn in tqdm(s3_paths):
    result = simplify_geometry(fn)
    results.append(result)

100%|██████████| 169/169 [09:05<00:00,  3.23s/it]


In [10]:
gdf = pd.concat(results)

In [11]:
with s3.open(f's3://carbonplan-forest-offsets/carb-geometries/all_carb_geoms.parquet', 'wb') as f:
    gdf.to_parquet(f, compression='gzip', compression_level=9)