In [1]:
%pip install geopandas shapely rtree tqdm

Collecting rtree
  Downloading rtree-1.4.0-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading rtree-1.4.0-py3-none-win_amd64.whl (385 kB)
Installing collected packages: rtree
Successfully installed rtree-1.4.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
 %pip install --upgrade pip

Collecting pip
  Downloading pip-25.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-25.1.1-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 1.8/1.8 MB 19.4 MB/s eta 0:00:00
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 25.0.1
    Uninstalling pip-25.0.1:
      Successfully uninstalled pip-25.0.1
Successfully installed pip-25.1.1
Note: you may need to restart the kernel to use updated packages.


In [1]:
import geopandas as gpd
import pandas as pd
import os
from shapely.geometry import Point
from tqdm import tqdm
from scipy.spatial import cKDTree
import numpy as np

# === CONFIG ===
FACILITY_FILE = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\master_dataset_final.csv"
SNAPPED_VILLAGE_DIR = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_villages_by_district"
OUTPUT_DIR = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district"

os.makedirs(OUTPUT_DIR, exist_ok=True)

# === Load facilities
facilities_df = pd.read_csv(FACILITY_FILE)
districts = sorted(facilities_df["Districtname"].dropna().str.lower().unique())

In [2]:
def snap_facilities_to_nearest_road(village_path, facility_df, district_name):
    # Load snapped village file for reference
    village_gdf = gpd.read_file(village_path)
    village_gdf = village_gdf.to_crs(epsg=4326)

    # Prepare KD-Tree for fast nearest neighbor lookup
    village_coords = np.array(list(zip(village_gdf.geometry.x, village_gdf.geometry.y)))
    tree = cKDTree(village_coords)

    # Filter facilities for this district
    df = facility_df[facility_df["Districtname"].str.lower() == district_name]
    df = df.dropna(subset=["longitude", "latitude", "NIN"])

    snapped_rows = []

    for _, row in tqdm(df.iterrows(), total=len(df), desc=f"Snapping facilities in {district_name}"):
        point = (row["longitude"], row["latitude"])
        dist, idx = tree.query(point)
        snapped_point = village_coords[idx]
        geom = Point(snapped_point)

        row_data = row.to_dict()
        row_data["snapped_lon"] = snapped_point[0]
        row_data["snapped_lat"] = snapped_point[1]
        row_data["geometry"] = geom
        snapped_rows.append(row_data)

    snapped_gdf = gpd.GeoDataFrame(snapped_rows, geometry="geometry", crs="EPSG:4326")
    return snapped_gdf

In [3]:
for district in tqdm(districts, desc="🔁 Snapping All Districts"):
    village_file = os.path.join(SNAPPED_VILLAGE_DIR, f"{district}_villages_snapped.geojson")
    if not os.path.exists(village_file):
        print(f"❌ Missing: {district}")
        continue

    gdf = snap_facilities_to_nearest_road(village_file, facilities_df, district)
    out_path = os.path.join(OUTPUT_DIR, f"{district}_facilities_snapped.geojson")
    gdf.to_file(out_path, driver="GeoJSON")
    print(f"✅ Saved: {out_path}")

Snapping facilities in anugul: 100%|██████████| 210/210 [00:00<00:00, 1076.25it/s]
🔁 Snapping All Districts:   3%|▎         | 1/30 [00:01<00:43,  1.52s/it]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\anugul_facilities_snapped.geojson


Snapping facilities in balangir: 100%|██████████| 286/286 [00:00<00:00, 1866.03it/s]
🔁 Snapping All Districts:   7%|▋         | 2/30 [00:01<00:22,  1.23it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\balangir_facilities_snapped.geojson


Snapping facilities in baleshwar: 100%|██████████| 363/363 [00:00<00:00, 2213.71it/s]
🔁 Snapping All Districts:  10%|█         | 3/30 [00:02<00:17,  1.58it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\baleshwar_facilities_snapped.geojson


Snapping facilities in bargarh: 100%|██████████| 266/266 [00:00<00:00, 1088.44it/s]
🔁 Snapping All Districts:  13%|█▎        | 4/30 [00:02<00:14,  1.79it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\bargarh_facilities_snapped.geojson


Snapping facilities in bhadrak: 100%|██████████| 237/237 [00:00<00:00, 1025.28it/s]
🔁 Snapping All Districts:  17%|█▋        | 5/30 [00:03<00:12,  1.96it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\bhadrak_facilities_snapped.geojson


Snapping facilities in boudh: 100%|██████████| 85/85 [00:00<00:00, 1402.93it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\boudh_facilities_snapped.geojson


🔁 Snapping All Districts:  20%|██        | 6/30 [00:03<00:09,  2.47it/s]




Snapping facilities in cuttack: 100%|██████████| 412/412 [00:00<00:00, 2102.12it/s]
🔁 Snapping All Districts:  23%|██▎       | 7/30 [00:03<00:09,  2.51it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\cuttack_facilities_snapped.geojson


Snapping facilities in deogarh: 100%|██████████| 54/54 [00:00<00:00, 1236.75it/s]
🔁 Snapping All Districts:  27%|██▋       | 8/30 [00:03<00:07,  3.14it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\deogarh_facilities_snapped.geojson


Snapping facilities in dhenkanal: 100%|██████████| 212/212 [00:00<00:00, 1432.15it/s]
🔁 Snapping All Districts:  30%|███       | 9/30 [00:04<00:06,  3.14it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\dhenkanal_facilities_snapped.geojson


Snapping facilities in gajapati: 100%|██████████| 165/165 [00:00<00:00, 1054.65it/s]
🔁 Snapping All Districts:  33%|███▎      | 10/30 [00:04<00:06,  2.99it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\gajapati_facilities_snapped.geojson


Snapping facilities in ganjam: 100%|██████████| 583/583 [00:00<00:00, 1347.80it/s]
🔁 Snapping All Districts:  37%|███▋      | 11/30 [00:05<00:08,  2.17it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\ganjam_facilities_snapped.geojson


Snapping facilities in jagatsinghapur: 100%|██████████| 236/236 [00:00<00:00, 1618.40it/s]
🔁 Snapping All Districts:  40%|████      | 12/30 [00:05<00:07,  2.44it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\jagatsinghapur_facilities_snapped.geojson


Snapping facilities in jajapur: 100%|██████████| 329/329 [00:00<00:00, 891.33it/s]
🔁 Snapping All Districts:  43%|████▎     | 13/30 [00:06<00:07,  2.19it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\jajapur_facilities_snapped.geojson


Snapping facilities in jharsuguda: 100%|██████████| 89/89 [00:00<00:00, 467.30it/s]
🔁 Snapping All Districts:  47%|████▋     | 14/30 [00:06<00:06,  2.33it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\jharsuguda_facilities_snapped.geojson


Snapping facilities in kalahandi: 100%|██████████| 303/303 [00:00<00:00, 1077.38it/s]
🔁 Snapping All Districts:  50%|█████     | 15/30 [00:07<00:07,  2.06it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\kalahandi_facilities_snapped.geojson


Snapping facilities in kandhamal: 100%|██████████| 224/224 [00:00<00:00, 1036.96it/s]
🔁 Snapping All Districts:  53%|█████▎    | 16/30 [00:07<00:06,  2.11it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\kandhamal_facilities_snapped.geojson


Snapping facilities in kendrapara: 100%|██████████| 282/282 [00:00<00:00, 1083.86it/s]
🔁 Snapping All Districts:  57%|█████▋    | 17/30 [00:08<00:06,  2.15it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\kendrapara_facilities_snapped.geojson


Snapping facilities in kendujhar: 100%|██████████| 431/431 [00:00<00:00, 934.06it/s] 
🔁 Snapping All Districts:  60%|██████    | 18/30 [00:08<00:06,  1.84it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\kendujhar_facilities_snapped.geojson


Snapping facilities in khordha: 100%|██████████| 275/275 [00:00<00:00, 971.79it/s]
🔁 Snapping All Districts:  63%|██████▎   | 19/30 [00:09<00:05,  1.87it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\khordha_facilities_snapped.geojson


Snapping facilities in koraput: 100%|██████████| 370/370 [00:00<00:00, 1324.13it/s]
🔁 Snapping All Districts:  67%|██████▋   | 20/30 [00:09<00:05,  1.89it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\koraput_facilities_snapped.geojson


Snapping facilities in malkangiri: 100%|██████████| 191/191 [00:00<00:00, 950.65it/s]
🔁 Snapping All Districts:  70%|███████   | 21/30 [00:10<00:04,  2.11it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\malkangiri_facilities_snapped.geojson


Snapping facilities in mayurbhanj: 100%|██████████| 703/703 [00:00<00:00, 1222.85it/s]
🔁 Snapping All Districts:  73%|███████▎  | 22/30 [00:11<00:04,  1.67it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\mayurbhanj_facilities_snapped.geojson


Snapping facilities in nabarangpur: 100%|██████████| 340/340 [00:00<00:00, 1917.51it/s]
🔁 Snapping All Districts:  77%|███████▋  | 23/30 [00:11<00:03,  1.95it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\nabarangpur_facilities_snapped.geojson


Snapping facilities in nayagarh: 100%|██████████| 216/216 [00:00<00:00, 1465.79it/s]
🔁 Snapping All Districts:  80%|████████  | 24/30 [00:11<00:02,  2.24it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\nayagarh_facilities_snapped.geojson


Snapping facilities in nuapada: 100%|██████████| 119/119 [00:00<00:00, 965.35it/s] 
🔁 Snapping All Districts:  83%|████████▎ | 25/30 [00:11<00:01,  2.57it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\nuapada_facilities_snapped.geojson


Snapping facilities in puri: 100%|██████████| 304/304 [00:00<00:00, 1878.23it/s]
🔁 Snapping All Districts:  87%|████████▋ | 26/30 [00:12<00:01,  2.64it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\puri_facilities_snapped.geojson


Snapping facilities in rayagada: 100%|██████████| 284/284 [00:00<00:00, 1446.42it/s]
🔁 Snapping All Districts:  90%|█████████ | 27/30 [00:12<00:01,  2.53it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\rayagada_facilities_snapped.geojson


Snapping facilities in sambalpur: 100%|██████████| 212/212 [00:00<00:00, 1201.74it/s]
🔁 Snapping All Districts:  93%|█████████▎| 28/30 [00:12<00:00,  2.73it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\sambalpur_facilities_snapped.geojson


Snapping facilities in sonepur: 100%|██████████| 114/114 [00:00<00:00, 1401.10it/s]


✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\sonepur_facilities_snapped.geojson

🔁 Snapping All Districts:  97%|█████████▋| 29/30 [00:13<00:00,  3.14it/s]




Snapping facilities in sundargarh: 100%|██████████| 471/471 [00:00<00:00, 2953.57it/s]
🔁 Snapping All Districts: 100%|██████████| 30/30 [00:13<00:00,  2.22it/s]

✅ Saved: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\sundargarh_facilities_snapped.geojson





In [4]:
import geopandas as gpd
import pandas as pd

# 📂 Path to the Anugul snapped facilities file
file_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data\snapped_facilities_by_district\anugul_facilities_snapped.geojson"

# ✅ Load the GeoJSON
gdf = gpd.read_file(file_path)

# 📋 Summary checks
print("✅ Total snapped facilities:", len(gdf))

# 🔍 Check for missing or duplicate NINs
missing_nin = gdf['NIN'].isnull().sum()
duplicate_nin = gdf[gdf.duplicated(subset='NIN', keep=False)]

print("❌ Missing NINs:", missing_nin)
print("⚠️ Duplicate NINs:", duplicate_nin['NIN'].nunique())

# 📍 Validate geometry
invalid_geom = gdf[~gdf.geometry.is_valid | gdf.geometry.is_empty]
print("❌ Invalid geometries:", len(invalid_geom))

# 📑 Quick preview
print("\n📌 Sample entries:")
print(gdf[['NIN', 'Facilityname', 'Districtname', 'facility_type_standardized', 'geometry']].head())

✅ Total snapped facilities: 210
❌ Missing NINs: 0
⚠️ Duplicate NINs: 1
❌ Invalid geometries: 0

📌 Sample entries:
          NIN     Facilityname Districtname facility_type_standardized  \
0  6728864528          Bantala       Anugul                        chc   
1  7324817373         MADHAPUR       Anugul                        chc   
2  7352515139         BANARPAL       Anugul                        chc   
3  5687542687  RAJKISHORENAGAR       Anugul                        chc   
4  8282723678           Kosala       Anugul                        chc   

                    geometry  
0  POINT (85.07392 20.73864)  
1  POINT (84.42443 20.78356)  
2  POINT (85.21591 20.84181)  
3   POINT (84.4844 20.95473)  
4   POINT (84.9333 21.01649)  
