In [10]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0, '../src')
import geopandas as gpd
from postprocess_detections import process_mgrs_tile
from joblib import Parallel, delayed
import pandas as pd
from tqdm import tqdm

v2_dataset = gpd.read_parquet("gs://demeter-labs/tea/classifier-datasets/tile_classifier_dataset_v2_java-sumatra.parquet")
# this is a local tile directory, we could also use the google cloud bucket but we would need to change the function
tiles_dir = '/home/christopher.x.ren/embeddings/ra_tea/valid_tiles'
v2_dataset['mgrs_id'] = [x[:5] for x in v2_dataset['tile_id']]
detections = v2_dataset[v2_dataset['class'] == 'ei_pos']

# this combines the detections with the tile geometries, instead of point geometries
tile_gdfs = Parallel(n_jobs=-1, verbose=10)(
    delayed(process_mgrs_tile)(mgrs_id, tiles_dir, detections)
    for mgrs_id in detections['mgrs_id'].unique()
)
BUFFER_SIZE = 160
tile_gdfs = [gdf for gdf in tile_gdfs if gdf is not None]
to_process = []
# Buffer each tile by 640m in its local UTM zone
for tile_gdf in tqdm(tile_gdfs):
    tile_gdf_copy = tile_gdf.copy()
    utm_zone = tile_gdf['utm_zone'].iloc[0]
    tile_gdf_copy = tile_gdf_copy.to_crs(utm_zone)
    tile_gdf_copy.geometry = tile_gdf_copy.geometry.buffer(BUFFER_SIZE)
    tile_gdf_copy = tile_gdf_copy.to_crs(epsg=4326)
    to_process.append(tile_gdf_copy)


detection_tiles = pd.concat(to_process)
detections = detections.merge(
    detection_tiles, on='tile_id', suffixes=('', '_tile')).set_geometry('geometry_tile')

unioned = detections.geometry_tile.union_all()
exploded_gdf = gpd.GeoDataFrame(geometry=[unioned], crs=detection_tiles.crs).explode(index_parts=True)
exploded_gdf.reset_index(drop=True, inplace=True)
exploded_gdf['polygon_id'] = exploded_gdf.index
detections_with_polygons = gpd.sjoin(
    detections, exploded_gdf, how='left', predicate='intersects')

detections_with_polygons.to_parquet(
    f"gs://demeter-labs/tea/ei-datasets/tile_geom_farm_id_pos_gdf_v2_java_sumatra_2024-11-11_buffer{BUFFER_SIZE}.parquet")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  16 | elapsed:    0.6s remaining:    2.7s
[Parallel(n_jobs=-1)]: Done   5 out of  16 | elapsed:    0.7s remaining:    1.5s
[Parallel(n_jobs=-1)]: Done   7 out of  16 | elapsed:    1.0s remaining:    1.3s
[Parallel(n_jobs=-1)]: Done   9 out of  16 | elapsed:    1.1s remaining:    0.8s
[Parallel(n_jobs=-1)]: Done  11 out of  16 | elapsed:    1.3s remaining:    0.6s
[Parallel(n_jobs=-1)]: Done  13 out of  16 | elapsed:    1.6s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done  16 out of  16 | elapsed:    2.0s finished
100%|██████████| 16/16 [00:00<00:00, 45.57it/s]
