In [2]:
import os
import pandas as pd
import geopandas as gpd
import numpy as np
from sklearn.neighbors import BallTree
import requests
from tqdm import tqdm

# -------- PATH CONFIG -------- #
DATA_ROOT = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Final_version\Data"
FAC_FOLDER = os.path.join(DATA_ROOT, "snapped_facilities_by_district")
VILLAGE_FOLDER = os.path.join(DATA_ROOT, "snapped_villages_by_district")
OUTPUT_DIR = os.path.join(DATA_ROOT.replace("Data", "routing_dh"))
OSRM_URL = "http://localhost:5000/table/v1/driving"
BYPASS_RATIO = 1.2
FUNC_FLAG = "is_functional"
K = 2
BATCH_SIZE = 100

os.makedirs(OUTPUT_DIR, exist_ok=True)

def build_tree(df):
    coords = np.radians(df[["latitude", "longitude"]].values)
    return BallTree(coords, metric="haversine"), df.reset_index(drop=True)

def osrm_table(origins, destinations):
    if not origins or not destinations:
        return None, None
    all_coords = origins + destinations
    src_idx = ";".join(str(i) for i in range(len(origins)))
    dst_idx = ";".join(str(len(origins)+i) for i in range(len(destinations)))
    coord_str = ";".join([f"{lon},{lat}" for lon,lat in all_coords])
    # ---- FIX: Add annotations=duration,distance ----
    url = f"{OSRM_URL}/{coord_str}?sources={src_idx}&destinations={dst_idx}&annotations=duration,distance"
    try:
        r = requests.get(url)
        j = r.json()
        if "durations" not in j:
            return None, None
        durations = np.array(j["durations"]) / 60.0
        distances = np.array(j["distances"]) / 1000.0 if "distances" in j else np.zeros_like(j["durations"])
        return durations, distances
    except Exception as e:
        print(f"OSRM Table API error: {e}")
        return None, None

for fn in tqdm(os.listdir(VILLAGE_FOLDER), desc="Districts"):
    if not fn.endswith("_villages_snapped.geojson"):
        continue
    district = fn.split("_")[0]
    v_path = os.path.join(VILLAGE_FOLDER, fn)
    f_path = os.path.join(FAC_FOLDER, f"{district}_facilities_snapped.geojson")
    if not os.path.exists(f_path):
        print(f"❌ Skipping {district}: facilities file not found.")
        continue

    # --- Load villages and facilities ---
    villages = gpd.read_file(v_path)
    facilities = gpd.read_file(f_path)
    if villages.empty or facilities.empty:
        continue

    villages["longitude"] = villages.geometry.x
    villages["latitude"] = villages.geometry.y
    facilities["longitude"] = facilities.geometry.x if "geometry" in facilities else facilities["snapped_lon"]
    facilities["latitude"] = facilities.geometry.y if "geometry" in facilities else facilities["snapped_lat"]

    # Use correct LGD code column: always string and integer, no decimals
    if "lgd_villagecode" in villages.columns:
        villages['lgd_villagecode'] = villages['lgd_villagecode'].astype(float).astype(int).astype(str)
        village_id_col = 'lgd_villagecode'
    else:
        raise ValueError("No lgd_villagecode column found in villages file!")

    if "NIN" not in facilities.columns:
        raise ValueError("No NIN column found in facilities file!")
    facilities["NIN"] = facilities["NIN"].astype(str)

    facilities['ftype_lower'] = facilities['facility_type_standardized'].str.lower().str.strip()
    dh_like_types = ['dh', 'sdh', 'district hospital', 'sub district hospital']
    dh_df = facilities[facilities['ftype_lower'].isin(dh_like_types)].copy()
    phc_df = facilities[facilities['ftype_lower'].str.contains("phc")].copy()
    chc_df = facilities[facilities['ftype_lower'].str.contains("chc")].copy()

    phc_tree, phc_df = build_tree(phc_df) if not phc_df.empty else (None, phc_df)
    chc_tree, chc_df = build_tree(chc_df) if not chc_df.empty else (None, chc_df)
    dh_tree, dh_df = build_tree(dh_df) if not dh_df.empty else (None, dh_df)

    v_lats = villages.latitude.values
    v_lons = villages.longitude.values
    v_coords = np.vstack([v_lats, v_lons]).T
    v_rad = np.radians(v_coords)

    if phc_tree:
        phc_dists, phc_idxs = phc_tree.query(v_rad, k=min(K, len(phc_df)))
    else:
        phc_idxs = np.full((len(villages), 1), -1)
    if chc_tree:
        chc_dists, chc_idxs = chc_tree.query(v_rad, k=min(K, len(chc_df)))
    else:
        chc_idxs = np.full((len(villages), 1), -1)
    if dh_tree:
        dh_dists, dh_idxs = dh_tree.query(v_rad, k=min(K, len(dh_df)))
    else:
        dh_idxs = np.full((len(villages), 1), -1)

    candidate_phcs = [[phc_df.iloc[idx] if idx != -1 else None for idx in row] for row in phc_idxs]
    candidate_chcs = [[chc_df.iloc[idx] if idx != -1 else None for idx in row] for row in chc_idxs]
    candidate_dhs = [[dh_df.iloc[idx] if idx != -1 else None for idx in row] for row in dh_idxs]

    n = len(villages)
    rows = []
    for batch_start in range(0, n, BATCH_SIZE):
        batch_end = min(batch_start + BATCH_SIZE, n)
        b_idx = range(batch_start, batch_end)
        batch_v = villages.iloc[list(b_idx)]
        origins = [(row.longitude, row.latitude) for row in batch_v.itertuples()]

        def batch_cands(cands):
            uniq = []
            m = []
            for i in b_idx:
                c = cands[i][0]
                if c is None:
                    m.append(-1)
                    continue
                tup = (c.longitude, c.latitude)
                if tup not in uniq:
                    uniq.append(tup)
                m.append(uniq.index(tup))
            return uniq, m

        batch_phcs, batch_phc_map = batch_cands(candidate_phcs)
        batch_chcs, batch_chc_map = batch_cands(candidate_chcs)
        batch_dhs, batch_dh_map = batch_cands(candidate_dhs)

        t_phc_mat, d_phc_mat = osrm_table(origins, batch_phcs) if batch_phcs else (None, None)
        t_chc_mat, d_chc_mat = osrm_table(origins, batch_chcs) if batch_chcs else (None, None)
        t_dh_mat, d_dh_mat = osrm_table(origins, batch_dhs) if batch_dhs else (None, None)

        for j, v in enumerate(batch_v.itertuples()):
            i = batch_start + j
            phc_cand = candidate_phcs[i][0] if candidate_phcs[i][0] is not None else None
            chc_cand = candidate_chcs[i][0] if candidate_chcs[i][0] is not None else None
            dh_cand = candidate_dhs[i][0] if candidate_dhs[i][0] is not None else None

            t_phc = t_phc_mat[j, batch_phc_map[j]] if t_phc_mat is not None and batch_phc_map[j] != -1 else None
            t_chc = t_chc_mat[j, batch_chc_map[j]] if t_chc_mat is not None and batch_chc_map[j] != -1 else None
            t_dh = t_dh_mat[j, batch_dh_map[j]] if t_dh_mat is not None and batch_dh_map[j] != -1 else None
            d_phc = d_phc_mat[j, batch_phc_map[j]] if d_phc_mat is not None and batch_phc_map[j] != -1 else None
            d_chc = d_chc_mat[j, batch_chc_map[j]] if d_chc_mat is not None and batch_chc_map[j] != -1 else None
            d_dh = d_dh_mat[j, batch_dh_map[j]] if d_dh_mat is not None and batch_dh_map[j] != -1 else None

            times = {"PHC": t_phc if t_phc is not None else np.inf,
                     "CHC": t_chc if t_chc is not None else np.inf,
                     "DH/SDH": t_dh if t_dh is not None else np.inf}
            closest_type = min(times, key=times.get)

            phc_func = phc_cand[FUNC_FLAG] if phc_cand is not None and FUNC_FLAG in phc_cand else True
            chc_func = chc_cand[FUNC_FLAG] if chc_cand is not None and FUNC_FLAG in chc_cand else True

            if (t_dh is not None and
                (
                 (t_phc is None or not phc_func or t_dh <= t_phc * BYPASS_RATIO) and
                 (t_chc is None or not chc_func or t_dh <= t_chc * BYPASS_RATIO)
                )):
                first_stop = "Village→DH/SDH"
                final_dh = dh_cand.NIN if dh_cand is not None else None
                comment = "Direct to DH/SDH"
            elif t_phc is not None and t_phc <= (t_chc or np.inf) and phc_func:
                first_stop = phc_cand.NIN if phc_cand is not None else None
                final_dh = dh_cand.NIN if dh_cand is not None else None
                comment = "PHC is closer and functional"
            elif t_chc is not None and chc_func:
                first_stop = chc_cand.NIN if chc_cand is not None else None
                final_dh = dh_cand.NIN if dh_cand is not None else None
                comment = "CHC is closer and functional"
            else:
                first_stop = "Village→DH/SDH"
                final_dh = dh_cand.NIN if dh_cand is not None else None
                comment = "Fallback to DH/SDH"

            rows.append({
                "village_id": v.lgd_villagecode,
                "first_stop": first_stop,
                "assigned_DH_SDH": final_dh,
                "closest_facility_type": closest_type,
                "t_phc_min": t_phc,
                "t_chc_min": t_chc,
                "t_dh_sdh_min": t_dh,
                "d_phc_km": d_phc,
                "d_chc_km": d_chc,
                "d_dh_sdh_km": d_dh,
                "phc_is_functional": phc_func,
                "chc_is_functional": chc_func,
                "comment": comment
            })

    out_df = pd.DataFrame(rows)
    out_path = os.path.join(OUTPUT_DIR, f"{district}_village_to_dh_sdh.csv")
    out_df.to_csv(out_path, index=False)
    print(f"✅ {district} → saved {len(out_df)} records")

Districts:   3%|▎         | 1/30 [00:04<02:05,  4.32s/it]

✅ anugul → saved 1889 records


Districts:   7%|▋         | 2/30 [00:08<01:56,  4.16s/it]

✅ balangir → saved 1792 records


Districts:  10%|█         | 3/30 [00:14<02:22,  5.27s/it]

✅ baleshwar → saved 2936 records


Districts:  13%|█▎        | 4/30 [00:18<01:58,  4.54s/it]

✅ bargarh → saved 1214 records


Districts:  17%|█▋        | 5/30 [00:21<01:42,  4.12s/it]

✅ bhadrak → saved 1321 records


Districts:  20%|██        | 6/30 [00:24<01:26,  3.62s/it]

✅ boudh → saved 1188 records


Districts:  23%|██▎       | 7/30 [00:29<01:37,  4.26s/it]

✅ cuttack → saved 1961 records


Districts:  27%|██▋       | 8/30 [00:31<01:15,  3.43s/it]

✅ deogarh → saved 879 records


Districts:  30%|███       | 9/30 [00:34<01:07,  3.19s/it]

✅ dhenkanal → saved 1213 records
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Ta

Districts:  33%|███▎      | 10/30 [00:38<01:07,  3.38s/it]

OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
✅ gajapati → saved 1618 records
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Tab

Districts:  37%|███▋      | 11/30 [00:47<01:39,  5.22s/it]

✅ ganjam → saved 3233 records


Districts:  40%|████      | 12/30 [00:53<01:36,  5.34s/it]

✅ jagatsinghapur → saved 1296 records


Districts:  43%|████▎     | 13/30 [00:56<01:19,  4.67s/it]

✅ jajapur → saved 1794 records


Districts:  47%|████▋     | 14/30 [00:56<00:55,  3.47s/it]

✅ jharsuguda → saved 356 records


Districts:  50%|█████     | 15/30 [01:00<00:53,  3.59s/it]

✅ kalahandi → saved 2258 records


Districts:  53%|█████▎    | 16/30 [01:06<01:01,  4.37s/it]

✅ kandhamal → saved 2591 records


Districts:  57%|█████▋    | 17/30 [01:12<01:02,  4.80s/it]

✅ kendrapara → saved 1549 records


Districts:  60%|██████    | 18/30 [01:16<00:55,  4.60s/it]

✅ kendujhar → saved 2132 records


Districts:  63%|██████▎   | 19/30 [01:20<00:46,  4.25s/it]

✅ khordha → saved 1562 records


Districts:  67%|██████▋   | 20/30 [01:24<00:41,  4.19s/it]

✅ koraput → saved 2030 records


Districts:  70%|███████   | 21/30 [01:26<00:32,  3.56s/it]

✅ malkangiri → saved 973 records


Districts:  73%|███████▎  | 22/30 [01:34<00:39,  4.92s/it]

✅ mayurbhanj → saved 3970 records


Districts:  77%|███████▋  | 23/30 [01:36<00:28,  4.04s/it]

✅ nabarangpur → saved 892 records


Districts:  80%|████████  | 24/30 [01:40<00:23,  3.95s/it]

✅ nayagarh → saved 1701 records


Districts:  83%|████████▎ | 25/30 [01:41<00:16,  3.25s/it]

✅ nuapada → saved 670 records
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'


Districts:  87%|████████▋ | 26/30 [01:48<00:17,  4.29s/it]

✅ puri → saved 1713 records
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'
OSRM Table API error: unsupported operand type(s) for /: 'NoneType' and 'float'


Districts:  90%|█████████ | 27/30 [01:52<00:12,  4.30s/it]

✅ rayagada → saved 2673 records


Districts:  93%|█████████▎| 28/30 [01:55<00:07,  3.81s/it]

✅ sambalpur → saved 1318 records


Districts:  97%|█████████▋| 29/30 [01:57<00:03,  3.19s/it]

✅ sonepur → saved 966 records


Districts: 100%|██████████| 30/30 [02:00<00:00,  4.02s/it]

✅ sundargarh → saved 1776 records





In [1]:
# -----------------------------------------------------------------------------
# Odisha Village-to-Facility Road-based Routing: Step-by-Step Process
# -----------------------------------------------------------------------------
# 1. Data Preparation
#    - Loaded snapped village centroids (GeoJSON) and snapped health facility points (GeoJSON) for each district.
#    - Ensured consistent, unique identifiers:
#        - Villages: 'lgd_villagecode' (converted to string, no decimals)
#        - Facilities: 'NIN' (converted to string)
#    - Extracted latitude and longitude from geometry for all points.
#
# 2. Facility Type Filtering
#    - Filtered health facilities by standardized type (PHC, CHC, DH, SDH).
#    - Created separate datasets for each tier:
#        - PHC: Includes all Primary Health Centres.
#        - CHC: Includes all Community Health Centres.
#        - DH/SDH: Includes District Hospitals and Sub-District Hospitals.
#
# 3. Nearest Neighbor Shortlisting (BallTree)
#    - For each village, shortlisted up to K=2 nearest PHCs, CHCs, and DH/SDHs using BallTree (haversine distance).
#    - This reduces the number of road-based route queries and ensures computational efficiency.
#
# 4. Road-based Routing (OSRM Table API)
#    - Batched villages (default 100 per batch) and calculated real travel time and distance from each village to its nearest PHC, CHC, DH/SDH.
#    - Used OSRM Table API with 'annotations=duration,distance' for road-based results.
#    - Handled missing routes with error checks; records with no valid route are marked as NaN.
#
# 5. Facility Assignment and Routing Logic
#    - For each village, compared road-based travel time to PHC, CHC, and DH/SDH.
#    - Assignment logic:
#         a) If DH/SDH is not much further than PHC/CHC (<=1.2x) or if PHC/CHC are non-functional, assign village directly to DH/SDH.
#         b) If PHC is closer and functional, assign PHC as first stop and DH/SDH as final referral.
#         c) If CHC is closer and functional, assign CHC as first stop and DH/SDH as final referral.
#         d) Fallback: assign DH/SDH if all else fails.
#    - Output all logic, facility assignments, distances, and times in the final CSV.
#
# 6. Output and Validation
#    - Saved a CSV for each district in 'Final_version/routing_dh/', containing:
#        - Village and facility IDs, all distance/time results, assigned facilities, closest tier, and logic comments.
#    - Outputs are suitable for policy analysis, dashboarding, and research.
#
# 7. Error Handling and Quality Assurance
#    - OSRM errors (e.g., disconnected roads) are logged and non-disruptive.
#    - The pipeline is modular and can be rerun for new data, additional facility types, or routing profiles (e.g., foot, bicycle).
#
# -----------------------------------------------------------------------------
# This workflow enables a scalable, reproducible, and policy-grade assessment of
# healthcare accessibility and referral routing across all villages in Odisha.
# -----------------------------------------------------------------------------
