In [16]:
import geopandas as gpd
from shapely.geometry import Point      # Shapely ≥ 1.8 or 2.0 both fine
import pandas as pd
from shapely.ops import unary_union

In [18]:
# ───── 0.  Load & pre-clean  ─────────────────────────────────────

adm1 = (
    gpd.read_file("/Users/ekowaddai/Documents/GitHub/Personal/Geojson/geoBoundaries-UKR-ADM1.geojson")
       .to_crs(3857)
       .rename(columns={"shapeName": "oblast_name"})
       [["oblast_name", "shapeISO", "shapeID", "geometry"]]
       .assign(oblast_id=lambda df: df["shapeID"].str[-2:])   # e.g. UKR-01
)

adm2 = (
    gpd.read_file("/Users/ekowaddai/Documents/GitHub/Personal/Geojson/geoBoundaries-UKR-ADM2.geojson")
       .to_crs(3857)
       .rename(columns={"shapeName": "raion_name"})
       [["raion_name", "shapeISO", "shapeID", "geometry"]]
       .assign(raion_id=lambda df: df["shapeID"].str[-4:])    # e.g. 0101
)

adm3 = (
    gpd.read_file("/Users/ekowaddai/Documents/GitHub/Personal/Geojson/geoBoundaries-UKR-ADM3.geojson")
       .to_crs(3857)
       .rename(columns={"shapeName": "hromada_name"})
       [["hromada_name", "shapeISO", "shapeID", "geometry"]]
       .assign(hromada_id=lambda df: df["shapeID"].str[-6:])  # e.g. 010101
)

# optional: fix invalid multipolygons quickly
for gdf in (adm1, adm2, adm3):
    gdf["geometry"] = gdf["geometry"].buffer(0)

display(adm1.head(5), adm2.head(5), adm3.head(5))

Unnamed: 0,oblast_name,shapeISO,shapeID,geometry,oblast_id
0,Kherson Oblast,UA-65,14850775B65901307765467,"POLYGON ((3922166.418 5747036.375, 3893408.452...",67
1,Volyn Oblast,UA-07,14850775B13681962240800,"POLYGON ((2795539.555 6496175.226, 2795145.017...",0
2,Rivne Oblast,UA-56,14850775B83802928232754,"POLYGON ((2795539.555 6496175.226, 2795396.31 ...",54
3,Zhytomyr Oblast,UA-18,14850775B79197734087513,"POLYGON ((3027438.983 6544220.848, 3027504.928...",13
4,Kyiv Oblast,UA-32,14850775B37539954297462,"MULTIPOLYGON (((3284533.996 6385755.09, 328550...",62


Unnamed: 0,raion_name,shapeISO,shapeID,geometry,raion_id
0,Bakhchysarai,,74538382B84610439401970,"POLYGON ((3779775.96 5544069.488, 3776591.314 ...",1970
1,Simferopol,,74538382B84040377374615,"POLYGON ((3806857.192 5576742.122, 3805651.203...",4615
2,Bilohirsk,,74538382B31597126471541,"POLYGON ((3853022.803 5600995.024, 3843070.786...",1541
3,Kirovske,,74538382B85800934600856,"POLYGON ((3883276.934 5623685.622, 3883675.014...",856
4,Lenine,,74538382B91806639169097,"POLYGON ((3938037.612 5646035.008, 3939543.885...",9097


Unnamed: 0,hromada_name,shapeISO,shapeID,geometry,hromada_id
0,Tinystivska,,10664576B81501417156730,"POLYGON ((3753702.669 5576974.276, 3753611.203...",156730
1,Uiutnenska,,10664576B47223079839563,"POLYGON ((3709480.121 5656443.356, 3709424.489...",839563
2,Marfivska,,10664576B49290266382661,"POLYGON ((4026057.96 5650496.962, 4021900.956 ...",382661
3,Medvedivska,,10664576B91614316888436,"MULTIPOLYGON (((3846641.236 5756316.915, 38455...",888436
4,Oleksiivska,,10664576B29409252235488,"POLYGON ((3757345.399 5713903.01, 3753549.516 ...",235488


In [19]:
# ───── 1.  Intersect ADM-3 ↔ ADM-2 and keep area %  ────────────

inter = gpd.overlay(
    adm3[["hromada_id", "hromada_name", "geometry"]],
    adm2[["raion_id", "raion_name", "geometry"]],
    how="intersection",
).rename(columns={"geometry": "geom_int"})

inter["area_int"] = inter.geom_int.area

# total area per hromada (needed for %)
tot_area = (
    adm3[["hromada_id", "geometry"]]
      .assign(area_tot=lambda df: df.geometry.area)
)

inter = inter.merge(tot_area, on="hromada_id")
inter["area_pct"] = inter["area_int"] / inter["area_tot"]

In [20]:
# ───── 2.  Pick the raion with the *largest* share  ────────────

best_raion = (
    inter.sort_values(["hromada_id", "area_pct"], ascending=[True, False])
         .drop_duplicates("hromada_id")
         [["hromada_id", "raion_id", "raion_name"]]
)

In [21]:
# ───── 3.  Attach oblast the same way (raion already knows it) ─

best_raion = best_raion.merge(
    adm2[["raion_id", "shapeISO"]].rename(columns={"shapeISO": "raion_iso"}),
    on="raion_id",
    how="left",
)

# map raion_iso → oblast by its first 4 chars (UA-__).  If your ADM-2
# layer already stores the parent’s shapeID or ISO, use that instead.
best_raion["oblast_iso"] = best_raion["raion_iso"].str[:-2]

best_raion = best_raion.merge(
    adm1[["oblast_name", "shapeISO"]],
    left_on="oblast_iso",
    right_on="shapeISO",
    how="left",
).drop(columns=["raion_iso", "shapeISO"])

In [24]:
# ───── 4.  Glue everything back on ADM-3  ───────────────────────

adm3_enriched = (
    adm3.merge(best_raion, on="hromada_id", how="left")
        [["hromada_id", "hromada_name", "raion_name", "oblast_name",
          "geometry"]]
)

adm3_enriched.head(-5)

Unnamed: 0,hromada_id,hromada_name,raion_name,oblast_name,geometry
0,156730,Tinystivska,Nakhimovskyi,,"POLYGON ((3753702.669 5576974.276, 3753611.203..."
1,839563,Uiutnenska,Nakhimovskyi,,"POLYGON ((3709480.121 5656443.356, 3709424.489..."
2,382661,Marfivska,Lenine,,"POLYGON ((4026057.96 5650496.962, 4021900.956 ..."
3,382661,Marfivska,Lenine,,"POLYGON ((4026057.96 5650496.962, 4021900.956 ..."
4,382661,Marfivska,Lenine,,"POLYGON ((4026057.96 5650496.962, 4021900.956 ..."
...,...,...,...,...,...
11082,557379,Khmilnytska,Khmilnyk,,"POLYGON ((3115515.99 6370431.816, 3115576.873 ..."
11083,247839,Velykochernechchynska,Sumy,,"POLYGON ((3898225.336 6620307.629, 3898003.142..."
11084,397969,Pishchanska,Sumy,,"POLYGON ((3862415.652 6613505.455, 3862520.281..."
11085,085821,Verkhnosyrovatska,Sumy,,"POLYGON ((3902269.684 6587039.159, 3901821.623..."


In [None]:

# ───── 5.  (Optional) flag hromadas that straddle >1 raion  ─────

crossers = (
    inter.groupby("hromada_id")
         .size()
         .reset_index(name="raion_pieces")
)
adm3_enriched = adm3_enriched.merge(crossers, on="hromada_id")
# raion_pieces > 1  → multipolygon spans several raions

In [None]:
# ───── 6.  Duplicate-name audit exactly as before  ──────────────

dupes = (
    adm3_enriched.groupby("hromada_name")
                 .size()
                 .reset_index(name="GeometryCount")
                 .query("GeometryCount > 1")
                 .sort_values("GeometryCount", ascending=False)
)

# adm3_enriched.to_file("UA_adm3_enriched.gpkg", layer="adm3", driver="GPKG")