In [1]:
import geopandas as gpd
import pandas as pd
from shapely import wkt
from shapely.geometry import shape

map_country = "Ukraine"
adm1_name = "oblast"
adm2_name = "raion"
adm3_name = "hromada"

# ───── 1. Load and Fix Geometry ────────────────────────────────

adm1 = gpd.read_file("/Users/ekowaddai/Documents/GitHub/Personal/Geojson/ukr_admbnd_sspe_20250131_ab_shp/ukr_admbnda_adm1_sspe_20240416.shp").to_crs(3857)
adm2 = gpd.read_file("/Users/ekowaddai/Documents/GitHub/Personal/Geojson/ukr_admbnd_sspe_20250131_ab_shp/ukr_admbnda_adm2_sspe_20240416.shp").to_crs(3857)
adm3 = gpd.read_file("/Users/ekowaddai/Documents/GitHub/Personal/Geojson/ukr_admbnd_sspe_20250131_ab_shp/ukr_admbnda_adm3_sspe_20240416.shp").to_crs(3857)

for gdf in (adm1, adm2, adm3):
    gdf["geometry"] = gdf["geometry"].buffer(0)  # fix invalid polygons

# Standardize column names
adm1 = adm1.rename(columns={adm1.columns[0]: "adm1_name"})  # first col assumed name
adm2 = adm2.rename(columns={adm2.columns[0]: "adm2_name"})
adm3 = adm3.rename(columns={adm3.columns[0]: "adm3_name"})

# ───── 2. Match ADM3 to ADM2 by Largest Area Overlap ───────────

inter32 = gpd.overlay(
    adm3[["adm3_name", "geometry"]],
    adm2[["adm2_name", "geometry"]],
    how="intersection",
    keep_geom_type=False
)
inter32["area"] = inter32.area

# Total ADM3 areas for percentages
adm3["area_tot"] = adm3.geometry.area
inter32 = inter32.merge(adm3[["adm3_name", "area_tot"]], on="adm3_name")
inter32["area_pct"] = inter32["area"] / inter32["area_tot"]

# Keep best match (largest overlap)
best_adm2 = (
    inter32.sort_values(["adm3_name", "area_pct"], ascending=[True, False])
           .drop_duplicates("adm3_name")[["adm3_name", "adm2_name"]]
)

# ───── 3. Match ADM2 to ADM1 by Largest Area Overlap ───────────

inter21 = gpd.overlay(
    adm2[["adm2_name", "geometry"]],
    adm1[["adm1_name", "geometry"]],
    how="intersection",
    keep_geom_type=False
)
inter21["area"] = inter21.area

adm2["area_tot"] = adm2.geometry.area
inter21 = inter21.merge(adm2[["adm2_name", "area_tot"]], on="adm2_name")
inter21["area_pct"] = inter21["area"] / inter21["area_tot"]

# Keep best match
best_adm1 = (
    inter21.sort_values(["adm2_name", "area_pct"], ascending=[True, False])
           .drop_duplicates("adm2_name")[["adm2_name", "adm1_name"]]
)

# ───── 4. Merge All into ADM3 ─────────────────────────────────

adm3_enriched = (
    adm3.merge(best_adm2, on="adm3_name", how="left")
        .merge(best_adm1, on="adm2_name", how="left")
        .assign(country=map_country)  # replace with actual name
        [["country", "adm1_name", "adm2_name", "adm3_name", "geometry"]]
)

# ───── 5. Save or Inspect ─────────────────────────────────────

# adm3_enriched.to_file("adm3_enriched.geojson", driver="GeoJSON")

adm3_enriched.head(1000)

Unnamed: 0,country,adm1_name,adm2_name,adm3_name,geometry
0,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Aromatnenska,"POLYGON ((3770100.474 5597912.272, 3770206.963..."
1,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Bakhchysaraiska,"MULTIPOLYGON (((3786956.664 5578118.303, 37868..."
2,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Verkhorichenska,"POLYGON ((3816115.3 5584298.353, 3815740.265 5..."
3,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Vilinska,"POLYGON ((3753474.598 5600633.048, 3753506.324..."
4,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Holubynska,"POLYGON ((3782731.249 5560434.93, 3782753.962 ..."
...,...,...,...,...,...
995,Ukraine,Lvivska,Lvivskyi,Hlynianska,"POLYGON ((2739716.1 6416341.168, 2739527.123 6..."
996,Ukraine,Khmelnytska,Khmelnytskyi,Horodotska,"POLYGON ((2633509.181 6419348.315, 2633561.691..."
997,Ukraine,Lvivska,Lvivskyi,Davydivska,"POLYGON ((2689170.453 6412220.409, 2689458.328..."
998,Ukraine,Lvivska,Lvivskyi,Dobrosynsko-Maherivska,"POLYGON ((2661577.358 6484811.164, 2661542.997..."


In [2]:
# 1. Strip " Oblast" from adm1_name if there is one
adm3_enriched["adm1_name"] = adm3_enriched["adm1_name"].str.replace(" Oblast", "", regex=False)

adm3_enriched.head(-1)

Unnamed: 0,country,adm1_name,adm2_name,adm3_name,geometry
0,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Aromatnenska,"POLYGON ((3770100.474 5597912.272, 3770206.963..."
1,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Bakhchysaraiska,"MULTIPOLYGON (((3786956.664 5578118.303, 37868..."
2,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Verkhorichenska,"POLYGON ((3816115.3 5584298.353, 3815740.265 5..."
3,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Vilinska,"POLYGON ((3753474.598 5600633.048, 3753506.324..."
4,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Holubynska,"POLYGON ((3782731.249 5560434.93, 3782753.962 ..."
...,...,...,...,...,...
1763,Ukraine,Chernihivska,Chernihivskyi,Ripkynska,"POLYGON ((3437042.483 6794121.482, 3437032.47 ..."
1764,Ukraine,Chernihivska,Chernihivskyi,Sednivska,"POLYGON ((3536505.967 6748145.26, 3536333.392 ..."
1765,Ukraine,Chernihivska,Chernihivskyi,Tupychivska,"POLYGON ((3511092.883 6749854.244, 3511098.281..."
1766,Ukraine,Zaporizka,Berdianskyi,Chernihivska,"MULTIPOLYGON (((3482199.99 6718536.097, 348219..."


In [3]:
# 2. Rename columns

adm3_enriched = adm3_enriched.rename(columns={
    "adm1_name": adm1_name,
    "adm2_name": adm2_name,
    "adm3_name": adm3_name
})

# 3. Optional: reorder columns
adm3_enriched = adm3_enriched[["country", "oblast", "raion", "hromada", "geometry"]]

adm3_enriched.head(-1)

Unnamed: 0,country,oblast,raion,hromada,geometry
0,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Aromatnenska,"POLYGON ((3770100.474 5597912.272, 3770206.963..."
1,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Bakhchysaraiska,"MULTIPOLYGON (((3786956.664 5578118.303, 37868..."
2,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Verkhorichenska,"POLYGON ((3816115.3 5584298.353, 3815740.265 5..."
3,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Vilinska,"POLYGON ((3753474.598 5600633.048, 3753506.324..."
4,Ukraine,Autonomous Republic of Crimea,Bakhchysaraiskyi,Holubynska,"POLYGON ((3782731.249 5560434.93, 3782753.962 ..."
...,...,...,...,...,...
1763,Ukraine,Chernihivska,Chernihivskyi,Ripkynska,"POLYGON ((3437042.483 6794121.482, 3437032.47 ..."
1764,Ukraine,Chernihivska,Chernihivskyi,Sednivska,"POLYGON ((3536505.967 6748145.26, 3536333.392 ..."
1765,Ukraine,Chernihivska,Chernihivskyi,Tupychivska,"POLYGON ((3511092.883 6749854.244, 3511098.281..."
1766,Ukraine,Zaporizka,Berdianskyi,Chernihivska,"MULTIPOLYGON (((3482199.99 6718536.097, 348219..."


In [4]:
# Optional: fix invalid geometries first using buffer(0)
adm3_enriched["geometry"] = adm3_enriched["geometry"].apply(
    lambda g: g if g.is_valid else g.buffer(0)
)

# Validate and convert to WKT with checks
records = []
for _, row in adm3_enriched.iterrows():
    hromada = row["hromada"]
    raion = row["raion"]
    oblast = row["oblast"]
    country = row["country"]
    geom = row["geometry"]

    geometry_wkt = geom.wkt if geom else None
    is_valid_geom = geom.is_valid if geom else False

    # Simulate serialization & deserialization
    try:
        _ = wkt.loads(geometry_wkt) if geometry_wkt else None
        is_valid_wkt = True
    except Exception:
        is_valid_wkt = False
        print(f"❗ Invalid WKT geometry for: {hromada}")

    records.append({
        "country": country,
        "oblast": oblast,
        "raion": raion,
        "hromada": hromada,
        "geometry_wkt": geometry_wkt,
        "is_valid_geom": is_valid_geom,
        "is_valid_wkt": is_valid_wkt
    })

# Create final DataFrame for export
df_geo_check = pd.DataFrame(records)

# Save full output
df_geo_check.to_csv("UA-HROMADA-geometry-checked.csv", index=False)

# # Save invalid WKT cases only
# df_geo_check[~df_geo_check["is_valid_wkt"]].to_csv("UA-HROMADA-invalid-wkt.csv", index=False)

print("✅ Export complete:\n- UA-HROMADA-geometry-checked.csv\n- UA-HROMADA-invalid-wkt.csv")


✅ Export complete:
- UA-HROMADA-geometry-checked.csv
- UA-HROMADA-invalid-wkt.csv


In [5]:
len(adm3)

1769