In [2]:
import geopandas as gpd
from shapely.geometry import Point

CLEANUP_DIR = "/Users/tlahtolli/dev/drone_warfare/data/cleanup/"
GEOJSON_DIR = "/Users/tlahtolli/dev/drone_warfare/data/geoBoundaries"

countries = ['AFG', 'PAK', 'SOM', 'YEM']

In [3]:
country_names = {
    "AFG": "Afghanistan",
    "PAK": "Pakistan",
    "SOM": "Somalia",
    "YEM": "Yemen"
}

def verify_child_adm(row, gdf_parent):
    # Get one point from the child adm level, [longitude, latitude]
    point = Point(row['geometry'].centroid.x, row['geometry'].centroid.y)
    parent_adm = gdf_parent[gdf_parent.geometry.contains(point)]
    
    # Return the parent ADM1 name if it exists, otherwise return None
    return parent_adm['shapeName'].values[0] if not parent_adm.empty else None

def update_shapeName_and_drop_column(gdf, column_name):
    if column_name in gdf.columns:
        gdf['shapeName'] = gdf[column_name]
        gdf.drop(columns=[column_name], inplace=True)
    return gdf

for country in countries:
    # Load your GeoJSON shape
    levels = ['ADM0', 'ADM1', 'ADM2'] if country == 'PAK' else ['ADM0', 'ADM1']
    for level_idx, level in enumerate(levels):
        ADM_parent = f'{GEOJSON_DIR}/{country}/geoBoundaries-{country}-ADM{level_idx}_simplified.geojson'
        ADM_child = f'{GEOJSON_DIR}/{country}/geoBoundaries-{country}-ADM{level_idx + 1}_simplified.geojson'

        gdf_parent = gpd.read_file(ADM_parent)
        gdf_child = gpd.read_file(ADM_child)

        # Use the function for both parent and child GeoDataFrames
        gdf_parent = update_shapeName_and_drop_column(gdf_parent, 'PROV_34_NA')
        gdf_child = update_shapeName_and_drop_column(gdf_child, 'PROV_34_NA')

        gdf_child[f'parentAdm'] = gdf_child.apply(lambda row: verify_child_adm(row, gdf_parent), axis=1)

        # Exceptions
        if level == 'ADM0':
            gdf_child['parentAdm'] = gdf_child['parentAdm'].fillna(country_names[country])

        if (level == 'ADM0' or level == 'AMD1' or level == 'AMD2') and country == 'PAK':
            gdf_child['parentAdm'] = gdf_child['parentAdm'].fillna('Azad Kashmir')

        if level == 'ADM1' and country == 'YEM':
            gdf_child['parentAdm'] = gdf_child['parentAdm'].fillna('Al Hudaydah Governorate')

        gdf_child.to_file(f'{CLEANUP_DIR}/8_geojson_adm_levels/geoBoundaries-{country}-ADM{level_idx + 1}_simplified.geojson', driver='GeoJSON')

        # Use below to check which parent rows were dropped (meaning there weren't any child rows that were found within the parent shape)
        # # Save the filter condition
        # mask = gdf_child[f'parent_{levels[level_idx - 1]}'].notna()

        # # Filter the dataframe using the mask
        # gdf_child_filtered = gdf_child[mask]

        # # Check which rows were dropped
        # dropped_rows = gdf_child[~mask]
        # print("Dropped rows:")
        # print(country, level, '\n', dropped_rows, '\n')