In [4]:
import geopandas as gpd
import pandas as pd
from joblib import Parallel, delayed
import glob

detections = gpd.read_parquet("/home/christopher.x.ren/datasets/ra_tea/tile_classifier_predictions_v2_java-sumatra.parquet")
detections = detections[detections.prediction_probability > 0.9]

# Add MGRS ID column
detections['mgrs_id'] = [x[:5] for x in detections['tile_id']]
tiles_dir = "/home/christopher.x.ren/embeddings/ra_tea/tiles"

def process_mgrs_tile(mgrs_id, tiles_dir, detections):
    matching_files = glob.glob(f"{tiles_dir}/*{mgrs_id}*.parquet")
    if not matching_files:
        return None
    
    tile_file = matching_files[0]
    tile_gdf = gpd.read_parquet(tile_file).to_crs(epsg=4326)
    tile_gdf = tile_gdf[tile_gdf['tile_id'].isin(detections['tile_id'])]
    return tile_gdf

# Process tiles in parallel
tile_gdfs = Parallel(n_jobs=-1, verbose=10)(
    delayed(process_mgrs_tile)(mgrs_id, tiles_dir, detections)
    for mgrs_id in detections['mgrs_id'].unique()
)

# Filter out None results and concatenate
tile_gdfs = [gdf for gdf in tile_gdfs if gdf is not None]
if tile_gdfs:
    detection_tiles = pd.concat(tile_gdfs)
    detections = detections.merge(detection_tiles, on='tile_id', suffixes=('', '_tile')).set_geometry('geometry_tile')

# Create unary union of all geometries
unioned = detections.geometry_tile.unary_union

# Convert to GeoDataFrame and explode into individual polygons
exploded_gdf = gpd.GeoDataFrame(geometry=[unioned], crs=detection_tiles.crs).explode(
    index_parts=True)
# Calculate centroids
exploded_gdf['centroid'] = exploded_gdf.geometry.centroid

# Get UTM zone for each centroid
exploded_gdf['utm_zone'] = exploded_gdf.apply(
    lambda row: int(((row.centroid.x + 180) / 6) % 60) + 1, axis=1)
exploded_gdf['hemisphere'] = exploded_gdf.apply(
    lambda row: 'N' if row.centroid.y >= 0 else 'S', axis=1)

# Project each polygon to its UTM zone and calculate area
def get_area(row):
    utm_crs = f"EPSG:{'326' if row.hemisphere == 'N' else '327'}{row.utm_zone:02d}"
    return gpd.GeoDataFrame(geometry=[row.geometry], crs='epsg:4326').to_crs(utm_crs).area.iloc[0]

exploded_gdf['area'] = exploded_gdf.apply(get_area, axis=1)
# Create unary union of all geometries
unioned = detections.geometry_tile.unary_union
# Convert to GeoDataFrame and explode into individual polygons
exploded_gdf = gpd.GeoDataFrame(geometry=[unioned], crs=detection_tiles.crs).explode(index_parts=True)
exploded_gdf.drop(columns=['centroid', 'utm_zone', 'hemisphere']).to_file("exploded_gdf_v2.geojson")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  38 | elapsed:    3.3s remaining:   38.3s
[Parallel(n_jobs=-1)]: Done   7 out of  38 | elapsed:    3.8s remaining:   16.9s
[Parallel(n_jobs=-1)]: Done  11 out of  38 | elapsed:    4.4s remaining:   10.7s
[Parallel(n_jobs=-1)]: Done  15 out of  38 | elapsed:    4.6s remaining:    7.1s
[Parallel(n_jobs=-1)]: Done  19 out of  38 | elapsed:    5.1s remaining:    5.1s
[Parallel(n_jobs=-1)]: Done  23 out of  38 | elapsed:    5.2s remaining:    3.4s
[Parallel(n_jobs=-1)]: Done  27 out of  38 | elapsed:    5.3s remaining:    2.1s
[Parallel(n_jobs=-1)]: Done  31 out of  38 | elapsed:    5.3s remaining:    1.2s
[Parallel(n_jobs=-1)]: Done  35 out of  38 | elapsed:    5.4s remaining:    0.5s
[Parallel(n_jobs=-1)]: Done  38 out of  38 | elapsed:    5.5s finished


In [20]:
exploded_gdf

Unnamed: 0,Unnamed: 1,geometry,centroid,utm_zone,hemisphere,area
0,0,"POLYGON ((98.84315 2.10759, 98.84315 2.11049, ...",POINT (98.84458 2.10904),47,N,102400.000000
0,1,"POLYGON ((98.79997 2.16404, 98.79997 2.16693, ...",POINT (98.80141 2.16549),47,N,102400.000000
0,2,"POLYGON ((98.62155 2.17124, 98.62155 2.17414, ...",POINT (98.62299 2.17269),47,N,102400.000000
0,3,"POLYGON ((98.72658 2.21903, 98.72658 2.21759, ...",POINT (98.72471 2.22048),47,N,255999.998848
0,4,"POLYGON ((98.68197 2.2335, 98.67909 2.2335, 98...",POINT (98.68053 2.23567),47,N,153599.999568
0,...,...,...,...,...,...
0,1443,"POLYGON ((110.86609 -6.6035, 110.86536 -6.6035...",POINT (110.86609 -6.60206),49,S,153599.990685
0,1444,"POLYGON ((110.92326 -6.59773, 110.92615 -6.597...",POINT (110.92471 -6.59917),49,S,102400.000000
0,1445,"POLYGON ((110.86681 -6.59627, 110.86681 -6.593...",POINT (110.86826 -6.59482),49,S,102400.000000
0,1446,"POLYGON ((110.91385 -6.58904, 110.91602 -6.589...",POINT (110.91367 -6.59103),49,S,204800.023507
