In [8]:
import geopandas as gpd
import pandas as pd
from shapely import wkt
from shapely.geometry import shape


# ───── 1. Load and Fix Geometry ────────────────────────────────

adm1 = gpd.read_file("/Users/ekowaddai/Documents/GitHub/Personal/Geojson/geoBoundaries-UKR-ADM1.geojson").to_crs(3857)
adm2 = gpd.read_file("/Users/ekowaddai/Documents/GitHub/Personal/Geojson/geoBoundaries-UKR-ADM2.geojson").to_crs(3857)
adm3 = gpd.read_file("/Users/ekowaddai/Documents/GitHub/Personal/Geojson/geoBoundaries-UKR-ADM3.geojson").to_crs(3857)

for gdf in (adm1, adm2, adm3):
    gdf["geometry"] = gdf["geometry"].buffer(0)  # fix invalid polygons

# Standardize column names
adm1 = adm1.rename(columns={adm1.columns[0]: "adm1_name"})  # first col assumed name
adm2 = adm2.rename(columns={adm2.columns[0]: "adm2_name"})
adm3 = adm3.rename(columns={adm3.columns[0]: "adm3_name"})

# ───── 2. Match ADM3 to ADM2 by Largest Area Overlap ───────────

inter32 = gpd.overlay(
    adm3[["adm3_name", "geometry"]],
    adm2[["adm2_name", "geometry"]],
    how="intersection"
)
inter32["area"] = inter32.area

# Total ADM3 areas for percentages
adm3["area_tot"] = adm3.geometry.area
inter32 = inter32.merge(adm3[["adm3_name", "area_tot"]], on="adm3_name")
inter32["area_pct"] = inter32["area"] / inter32["area_tot"]

# Keep best match (largest overlap)
best_adm2 = (
    inter32.sort_values(["adm3_name", "area_pct"], ascending=[True, False])
           .drop_duplicates("adm3_name")[["adm3_name", "adm2_name"]]
)

# ───── 3. Match ADM2 to ADM1 by Largest Area Overlap ───────────

inter21 = gpd.overlay(
    adm2[["adm2_name", "geometry"]],
    adm1[["adm1_name", "geometry"]],
    how="intersection"
)
inter21["area"] = inter21.area

adm2["area_tot"] = adm2.geometry.area
inter21 = inter21.merge(adm2[["adm2_name", "area_tot"]], on="adm2_name")
inter21["area_pct"] = inter21["area"] / inter21["area_tot"]

# Keep best match
best_adm1 = (
    inter21.sort_values(["adm2_name", "area_pct"], ascending=[True, False])
           .drop_duplicates("adm2_name")[["adm2_name", "adm1_name"]]
)

# ───── 4. Merge All into ADM3 ─────────────────────────────────

adm3_enriched = (
    adm3.merge(best_adm2, on="adm3_name", how="left")
        .merge(best_adm1, on="adm2_name", how="left")
        .assign(country="Ukraine")  # replace with actual name
        [["country", "adm1_name", "adm2_name", "adm3_name", "geometry"]]
)

# ───── 5. Save or Inspect ─────────────────────────────────────

# adm3_enriched.to_file("adm3_enriched.geojson", driver="GeoJSON")

adm3_enriched.head(-1)

Unnamed: 0,country,adm1_name,adm2_name,adm3_name,geometry
0,Ukraine,Autonomous Republic of Crimea,Nakhimovskyi,Tinystivska,"POLYGON ((3753702.669 5576974.276, 3753611.203..."
1,Ukraine,Autonomous Republic of Crimea,Nakhimovskyi,Uiutnenska,"POLYGON ((3709480.121 5656443.356, 3709424.489..."
2,Ukraine,Autonomous Republic of Crimea,Lenine,Marfivska,"POLYGON ((4026057.96 5650496.962, 4021900.956 ..."
3,Ukraine,Autonomous Republic of Crimea,Chornomorske,Medvedivska,"MULTIPOLYGON (((3846641.236 5756316.915, 38455..."
4,Ukraine,Autonomous Republic of Crimea,Pervomaiske,Oleksiivska,"POLYGON ((3757345.399 5713903.01, 3753549.516 ..."
...,...,...,...,...,...
10369,Ukraine,Sumy Oblast,Sumy,Verkhnosyrovatska,"POLYGON ((3902269.684 6587039.159, 3901821.623..."
10370,Ukraine,Kherson Oblast,Holo Prystan,Sadivska,"POLYGON ((3858972.191 6591772.333, 3859077.61 ..."
10371,Ukraine,Ivano-Frankivsk Oblast,Halych,Mezhyhoretska,"POLYGON ((2758698.932 6293899.485, 2758398.607..."
10372,Ukraine,Sumy Oblast,Sumy,Sumska,"POLYGON ((3881262.724 6611222.766, 3881246.096..."


In [10]:
# 1. Strip " Oblast" from adm1_name
adm3_enriched["adm1_name"] = adm3_enriched["adm1_name"].str.replace(" Oblast", "", regex=False)

adm3_enriched.head(-1)

Unnamed: 0,country,adm1_name,adm2_name,adm3_name,geometry
0,Ukraine,Autonomous Republic of Crimea,Nakhimovskyi,Tinystivska,"POLYGON ((3753702.669 5576974.276, 3753611.203..."
1,Ukraine,Autonomous Republic of Crimea,Nakhimovskyi,Uiutnenska,"POLYGON ((3709480.121 5656443.356, 3709424.489..."
2,Ukraine,Autonomous Republic of Crimea,Lenine,Marfivska,"POLYGON ((4026057.96 5650496.962, 4021900.956 ..."
3,Ukraine,Autonomous Republic of Crimea,Chornomorske,Medvedivska,"MULTIPOLYGON (((3846641.236 5756316.915, 38455..."
4,Ukraine,Autonomous Republic of Crimea,Pervomaiske,Oleksiivska,"POLYGON ((3757345.399 5713903.01, 3753549.516 ..."
...,...,...,...,...,...
10369,Ukraine,Sumy,Sumy,Verkhnosyrovatska,"POLYGON ((3902269.684 6587039.159, 3901821.623..."
10370,Ukraine,Kherson,Holo Prystan,Sadivska,"POLYGON ((3858972.191 6591772.333, 3859077.61 ..."
10371,Ukraine,Ivano-Frankivsk,Halych,Mezhyhoretska,"POLYGON ((2758698.932 6293899.485, 2758398.607..."
10372,Ukraine,Sumy,Sumy,Sumska,"POLYGON ((3881262.724 6611222.766, 3881246.096..."


In [11]:
# 2. Rename columns
adm3_enriched = adm3_enriched.rename(columns={
    "adm1_name": "oblast",
    "adm2_name": "raion",
    "adm3_name": "hromada"
})

# 3. Optional: reorder columns
adm3_enriched = adm3_enriched[["country", "oblast", "raion", "hromada", "geometry"]]

adm3_enriched.head(-1)

Unnamed: 0,country,oblast,raion,hromada,geometry
0,Ukraine,Autonomous Republic of Crimea,Nakhimovskyi,Tinystivska,"POLYGON ((3753702.669 5576974.276, 3753611.203..."
1,Ukraine,Autonomous Republic of Crimea,Nakhimovskyi,Uiutnenska,"POLYGON ((3709480.121 5656443.356, 3709424.489..."
2,Ukraine,Autonomous Republic of Crimea,Lenine,Marfivska,"POLYGON ((4026057.96 5650496.962, 4021900.956 ..."
3,Ukraine,Autonomous Republic of Crimea,Chornomorske,Medvedivska,"MULTIPOLYGON (((3846641.236 5756316.915, 38455..."
4,Ukraine,Autonomous Republic of Crimea,Pervomaiske,Oleksiivska,"POLYGON ((3757345.399 5713903.01, 3753549.516 ..."
...,...,...,...,...,...
10369,Ukraine,Sumy,Sumy,Verkhnosyrovatska,"POLYGON ((3902269.684 6587039.159, 3901821.623..."
10370,Ukraine,Kherson,Holo Prystan,Sadivska,"POLYGON ((3858972.191 6591772.333, 3859077.61 ..."
10371,Ukraine,Ivano-Frankivsk,Halych,Mezhyhoretska,"POLYGON ((2758698.932 6293899.485, 2758398.607..."
10372,Ukraine,Sumy,Sumy,Sumska,"POLYGON ((3881262.724 6611222.766, 3881246.096..."


In [12]:
# Optional: fix invalid geometries first using buffer(0)
adm3_enriched["geometry"] = adm3_enriched["geometry"].apply(
    lambda g: g if g.is_valid else g.buffer(0)
)

# Validate and convert to WKT with checks
records = []
for _, row in adm3_enriched.iterrows():
    hromada = row["hromada"]
    raion = row["raion"]
    oblast = row["oblast"]
    country = row["country"]
    geom = row["geometry"]

    geometry_wkt = geom.wkt if geom else None
    is_valid_geom = geom.is_valid if geom else False

    # Simulate serialization & deserialization
    try:
        _ = wkt.loads(geometry_wkt) if geometry_wkt else None
        is_valid_wkt = True
    except Exception:
        is_valid_wkt = False
        print(f"❗ Invalid WKT geometry for: {hromada}")

    records.append({
        "country": country,
        "oblast": oblast,
        "raion": raion,
        "hromada": hromada,
        "geometry_wkt": geometry_wkt,
        "is_valid_geom": is_valid_geom,
        "is_valid_wkt": is_valid_wkt
    })

# Create final DataFrame for export
df_geo_check = pd.DataFrame(records)

# Save full output
df_geo_check.to_csv("UA-HROMADA-geometry-checked.csv", index=False)

# # Save invalid WKT cases only
# df_geo_check[~df_geo_check["is_valid_wkt"]].to_csv("UA-HROMADA-invalid-wkt.csv", index=False)

print("✅ Export complete:\n- UA-HROMADA-geometry-checked.csv\n- UA-HROMADA-invalid-wkt.csv")


✅ Export complete:
- UA-HROMADA-geometry-checked.csv
- UA-HROMADA-invalid-wkt.csv
