In [16]:
# ✅ Install required packages (run only once if needed)
# !pip install geopandas requests

import geopandas as gpd
import pandas as pd
import os
import requests
import matplotlib.pyplot as plt

# 🗂️ Define file paths
OSM_GEOJSON_PATH = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Raw\hotosm_ind_health_facilities_points_geojson.geojson"
GOV_CSV_PATH = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Raw\geocode_health_centre.csv"
ESRI_URL = "https://livingatlas.esri.in/server1/rest/services/Health/IN_HealthcareFacility/MapServer/0/query"

# 📁 Output directory
OUTPUT_DIR = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed"
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("✅ Environment ready with fallback method (no arcgis package)")

✅ Environment ready with fallback method (no arcgis package)


In [None]:
from shapely.geometry import Point
import geopandas as gpd
import requests
import pandas as pd

def fetch_and_parse_shape_field(base_url, max_records=1000):
    offset = 0
    all_rows = []

    while True:
        params = {
            'where': '1=1',
            'outFields': '*',
            'f': 'json',
            'returnGeometry': 'true',
            'resultOffset': offset,
            'resultRecordCount': max_records
        }

        print(f"📥 Fetching records {offset} to {offset + max_records}...")
        res = requests.get(base_url, params=params)
        res.raise_for_status()
        data = res.json()

        features = data.get("features", [])
        if not features:
            break

        for feat in features:
            attr = feat.get("attributes", {})
            geom = feat.get("geometry", {})

            if geom and 'x' in geom and 'y' in geom:
                point = Point(geom['x'], geom['y'])
                attr['geometry'] = point
                all_rows.append(attr)

        offset += max_records

    print(f"✅ Parsed {len(all_rows)} records with valid x/y geometry")

    # Create DataFrame and then GeoDataFrame
    df = pd.DataFrame(all_rows)
    gdf = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326')
    return gdf

# 🔄 Fetch and convert
esri_gdf = fetch_and_parse_shape_field(ESRI_URL)
print("🌐 Extracted with geometry:", len(esri_gdf))

# 🧽 Filter Odisha
esri_gdf['state'] = esri_gdf['state'].astype(str).str.strip().str.lower()
odisha_gdf = esri_gdf[esri_gdf['state'] == 'odisha'].copy()
print("✅ Odisha records:", len(odisha_gdf))

# 💾 Save clean file
raw_esri_path = os.path.join(OUTPUT_DIR, "odisha_health_facilities_esri.geojson")
odisha_gdf.to_file(raw_esri_path, driver="GeoJSON")
print(f"💾 Saved Odisha facilities to: {raw_esri_path}")

In [27]:
# ✅ Cell: Process and save Government CSV for Odisha only

import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# 📄 Path
gov_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Raw\geocode_health_centre.csv"

# 📥 Load
gov_df = pd.read_csv(gov_path, dtype=str)
gov_df.columns = gov_df.columns.str.lower().str.strip()

# ✅ Rename key columns
gov_df = gov_df.rename(columns={
    'state name': 'state',
    'district name': 'district',
    'facility type': 'facilitytype',
    'facility name': 'facilityname'
})

# 🧽 Convert coordinates
gov_df['latitude'] = pd.to_numeric(gov_df['latitude'], errors='coerce')
gov_df['longitude'] = pd.to_numeric(gov_df['longitude'], errors='coerce')
gov_df = gov_df.dropna(subset=['latitude', 'longitude'])

# 🎯 Filter for Odisha
gov_df['state'] = gov_df['state'].str.strip().str.lower()
gov_odisha = gov_df[gov_df['state'] == 'odisha'].copy()

# 🌍 Create GeoDataFrame
gov_odisha['geometry'] = gov_odisha.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
gov_gdf = gpd.GeoDataFrame(gov_odisha, geometry='geometry', crs='EPSG:4326')

# 📊 Summary
print("✅ Odisha records with geometry:", len(gov_gdf))
print("🏥 Facility types:", gov_gdf['facilitytype'].dropna().unique())
print("📍 Districts covered:", gov_gdf['district'].str.strip().str.lower().nunique())

# 💾 Save clean output
output_path = os.path.join(OUTPUT_DIR, "odisha_health_facilities_gov.csv")
gov_gdf.to_csv(output_path, index=False)
print(f"💾 Saved to: {output_path}")

✅ Odisha records with geometry: 8432
🏥 Facility types: ['chc' 'dis_h' 'phc' 's_t_h' 'sub_cen']
📍 Districts covered: 30
💾 Saved to: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\odisha_health_facilities_gov.csv


In [28]:
# ✅ Cell: Load and inspect HOTOSM OSM health facilities GeoJSON

import geopandas as gpd

# 📄 File path
osm_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Raw\hotosm_ind_health_facilities_points_geojson.geojson"

# 📥 Load dataset
osm_gdf = gpd.read_file(osm_path)

# 🧾 Show all columns
print("📌 Columns:")
print(list(osm_gdf.columns))

# 🔍 Display sample
print("\n🔍 Sample records:")
display(osm_gdf.head(5))

# 🧮 Value counts of key tags
print("\n📊 Non-null counts for key attributes:")
for col in ['name', 'amenity', 'healthcare', 'healthcare_facility', 'operator', 'addr:district', 'state']:
    if col in osm_gdf.columns:
        print(f"{col}: {osm_gdf[col].notnull().sum()}")

# 📍 Check geometry
print("\n✅ Total records:", len(osm_gdf))
print("🧭 Geometry valid:", osm_gdf.geometry.notnull().sum())

📌 Columns:
['name', 'name:en', 'amenity', 'building', 'healthcare', 'healthcare:speciality', 'operator:type', 'capacity:persons', 'addr:full', 'addr:city', 'source', 'name:hi', 'name:ta', 'osm_id', 'osm_type', 'geometry']

🔍 Sample records:


Unnamed: 0,name,name:en,amenity,building,healthcare,healthcare:speciality,operator:type,capacity:persons,addr:full,addr:city,source,name:hi,name:ta,osm_id,osm_type,geometry
0,Sri Lakshmi Super Speciality Hospitals,,hospital,,,,,,,,,,,5030732983,nodes,POINT (80.44819 16.30289)
1,Chandsi Clinic,,clinic,,,,,,"417/15, -126102",,Local Knowledge,,,7623032009,nodes,POINT (76.31985 29.31949)
2,Nitya Bajaj Dental Clinic & Implant Centre,,dentist,,,,,,"1841, Housing Board Colony, Sector 6",,Local Knowledge,,,7623032016,nodes,POINT (77.01146 29.67773)
3,Saksham Clinic,,clinic,,,,,,"Kalandari Gate, Near Kastoori Dargah",,Local Knowledge,,,7623032017,nodes,POINT (76.99283 29.6821)
4,Dr. Sunil Arora's Psychiatry,,doctors,,,,,,"Sector 13, Karnal",,Local Knowledge,,,7623032019,nodes,POINT (76.99684 29.68817)



📊 Non-null counts for key attributes:
name: 127696
amenity: 89014
healthcare: 70881

✅ Total records: 129968
🧭 Geometry valid: 129968


In [29]:
import geopandas as gpd
from shapely.geometry import Point

# 📄 File paths
osm_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Raw\hotosm_ind_health_facilities_points_geojson.geojson"
gadm_lvl1_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\GIS Data\GADM Data\gadm41_IND_1.dbf"

# ✅ Step 1: Load OSM and GADM State Boundaries
osm_gdf = gpd.read_file(osm_path).to_crs(epsg=4326)
gadm_gdf = gpd.read_file(gadm_lvl1_path).to_crs(epsg=4326)

# ✅ Step 2: Filter for Odisha
odisha_poly = gadm_gdf[gadm_gdf['NAME_1'].str.lower() == 'odisha'].copy()
print(f"✅ Odisha boundary loaded: {len(odisha_poly)} polygon(s)")

# ✅ Step 3: Spatial Join to keep points inside Odisha
osm_in_odisha = gpd.sjoin(osm_gdf, odisha_poly, predicate='within', how='inner')

# ✅ Step 4: Drop join columns and save
osm_in_odisha = osm_in_odisha.drop(columns=['index_right'])

# 🧾 Summary
print("✅ OSM records within Odisha polygon:", len(osm_in_odisha))
print("🏥 Unique facility types (amenity):", osm_in_odisha['amenity'].dropna().unique())

# 💾 Save final filtered GeoJSON
osm_path_out = os.path.join(OUTPUT_DIR, "odisha_health_facilities_osm.geojson")
osm_in_odisha.to_file(osm_path_out, driver="GeoJSON")
print(f"💾 Saved OSM Odisha facilities to: {osm_path_out}")

✅ Odisha boundary loaded: 1 polygon(s)
✅ OSM records within Odisha polygon: 3266
🏥 Unique facility types (amenity): ['clinic' 'hospital' 'dentist' 'doctors' 'pharmacy' 'laboratory']


2025-05-19 23:24:10,421 - INFO - Created 3,266 records


💾 Saved OSM Odisha facilities to: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\odisha_health_facilities_osm.geojson


In [31]:
# ✅ Cell: Clean and prepare HMIS data for matching

import pandas as pd

# 📄 File path
hmis_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\data\processed\Copy of Monthly Reporting Dashboard Odisha - 2024-25.xlsx - HMIS data (1).csv"

# 📥 Load
hmis_df = pd.read_csv(hmis_path)
hmis_df.columns = hmis_df.columns.str.strip().str.lower().str.replace(" ", "_")

# ✅ Rename for consistency
hmis_df = hmis_df.rename(columns={
    'districtname': 'district',
    'facilityname': 'facility_name',
    'healthfacilitytype': 'facility_type'
})

# ✅ Standardize text columns
hmis_df['district'] = hmis_df['district'].astype(str).str.strip().str.lower()
hmis_df['facility_name'] = hmis_df['facility_name'].astype(str).str.strip().str.lower()
hmis_df['facility_type'] = hmis_df['facility_type'].astype(str).str.strip().str.lower()

# 🧾 Summary
print("✅ HMIS records loaded:", len(hmis_df))
print("🏥 Unique facility types in HMIS:", hmis_df['facility_type'].dropna().unique())
print("📍 Districts covered:", hmis_df['district'].nunique())

# 🧪 Preview cleaned HMIS data
display(hmis_df.head(5))

✅ HMIS records loaded: 8545
🏥 Unique facility types in HMIS: ['district hospital' 'health sub centre' 'primary health centre' 'uphc'
 'community health centre' 'sub district hospital' 'oh' 'idh' 'uchc'
 'medical college']
📍 Districts covered: 30


Unnamed: 0,count,statename,district,subdistrictname,blockname,healthblockname,facility_type,facility_name,nin
0,8545,Odisha,anugul,Anugul,Anugul,Notmapped,district hospital,angul,4588168262
1,8545,Odisha,anugul,Anugul,Anugul,Anugul,health sub centre,bentapur,8816671831
2,8545,Odisha,anugul,Anugul,Anugul,Anugul,health sub centre,gadataras,5245155717
3,8545,Odisha,anugul,Anugul,Anugul,Anugul,health sub centre,ikarabandha,7727871571
4,8545,Odisha,anugul,Anugul,Anugul,Anugul,health sub centre,jagannathpur,4767538178


In [33]:
# 📄 ESRI GeoJSON path
esri_path = os.path.join(OUTPUT_DIR, "odisha_health_facilities_esri.geojson")

# 🧭 Load and check columns
esri_df = gpd.read_file(esri_path)
print("📌 ESRI columns:", list(esri_df.columns))
display(esri_df.head())

📌 ESRI columns: ['objectid', 'facilityname', 'facilitytype', 'landline', 'address', 'street', 'landmark', 'locality', 'subdistrict', 'district', 'state', 'posatalcode', 'country', 'geometry']


Unnamed: 0,objectid,facilityname,facilitytype,landline,address,street,landmark,locality,subdistrict,district,state,posatalcode,country,geometry
0,85568,Moranda,Sub Centre,,Moranda Phc(N),Moranda,"Moronda, Near Moronda Seba Ashram",Jamda,Bahalda,Mayurbhanj,odisha,757046,India,POINT (9577648.4627 2554966.4763)
1,85579,Deogan Sc,Sub Centre,,Deogan Sc,Deogan,Near Deogan UGME School,,Tiring,Mayurbhanj,odisha,757053,India,POINT (9573547.4527 2565998.506)
2,85583,Pandhda Sc,Sub Centre,,Pandhda Sc,Pandhda,Inside Village,,Tiring,Mayurbhanj,odisha,757053,India,POINT (9578130.4761 2561904.8969)
3,85600,Bholagadia,Sub Centre,,Bholagadia Phc,Bholagadia,Infront of Bholagadia Pond,,Tiring,Mayurbhanj,odisha,757053,India,POINT (9578293.0026 2564680.8301)
4,85602,Talsa Sc,Sub Centre,,Talsa Sc,Talsa,Near UGME School,,Tiring,Mayurbhanj,odisha,757053,India,POINT (9576715.6054 2572464.3339)


In [34]:
import pandas as pd
import geopandas as gpd

# 📍 Load ESRI Odisha health facilities
esri_path = os.path.join(OUTPUT_DIR, "odisha_health_facilities_esri.geojson")
esri_df = gpd.read_file(esri_path)

# ✅ Normalize ESRI columns
esri_df['district'] = esri_df['district'].astype(str).str.strip().str.lower()
esri_df['facilityname'] = esri_df['facilityname'].astype(str).str.strip().str.lower()
esri_df['facilitytype'] = esri_df['facilitytype'].astype(str).str.strip().str.lower()

# ✅ Normalize HMIS again for safety
hmis_df['district'] = hmis_df['district'].astype(str).str.strip().str.lower()
hmis_df['facility_name'] = hmis_df['facility_name'].astype(str).str.strip().str.lower()
hmis_df['facility_type'] = hmis_df['facility_type'].astype(str).str.strip().str.lower()

# ✅ Exact match on facility name + district
matched_df = pd.merge(
    esri_df,
    hmis_df,
    left_on=['facilityname', 'district'],
    right_on=['facility_name', 'district'],
    how='inner',
    suffixes=('_esri', '_hmis')
)

# ❌ Unmatched in ESRI
esri_unmatched = esri_df.merge(
    hmis_df[['facility_name', 'district']],
    left_on=['facilityname', 'district'],
    right_on=['facility_name', 'district'],
    how='left',
    indicator=True
).query('_merge == "left_only"').drop(columns=['_merge', 'facility_name'])

# ❌ Unmatched in HMIS
hmis_unmatched = hmis_df.merge(
    esri_df[['facilityname', 'district']],
    left_on=['facility_name', 'district'],
    right_on=['facilityname', 'district'],
    how='left',
    indicator=True
).query('_merge == "left_only"').drop(columns=['_merge', 'facilityname'])

# 📊 Summary
print(f"✅ Matched facilities: {len(matched_df)}")
print(f"❌ Unmatched in ESRI (extra/outdated): {len(esri_unmatched)}")
print(f"❌ Unmatched in HMIS (missing in ESRI): {len(hmis_unmatched)}")

# 💾 Save audit outputs
matched_df.to_file(os.path.join(OUTPUT_DIR, "matched_esri_hmis.geojson"), driver="GeoJSON")
esri_unmatched.to_file(os.path.join(OUTPUT_DIR, "unmatched_esri.geojson"), driver="GeoJSON")
hmis_unmatched.to_csv(os.path.join(OUTPUT_DIR, "unmatched_hmis.csv"), index=False)

✅ Matched facilities: 5408
❌ Unmatched in ESRI (extra/outdated): 4907
❌ Unmatched in HMIS (missing in ESRI): 4274


2025-05-19 23:32:39,787 - INFO - Created 5,408 records
2025-05-19 23:32:40,302 - INFO - Created 4,907 records


In [35]:
%pip install rapidfuzz


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [36]:
# ✅ Import test cell
from rapidfuzz import process, fuzz
print("✅ RapidFuzz is working.")

✅ RapidFuzz is working.


In [37]:
from rapidfuzz import process, fuzz
import pandas as pd
import geopandas as gpd

# Load ESRI and HMIS
esri_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\odisha_health_facilities_esri.geojson"
hmis_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\data\processed\Copy of Monthly Reporting Dashboard Odisha - 2024-25.xlsx - HMIS data (1).csv"

esri_df = gpd.read_file(esri_path)
esri_df['district'] = esri_df['district'].astype(str).str.strip().str.lower()
esri_df['facilityname'] = esri_df['facilityname'].astype(str).str.strip().str.lower()
esri_df['facilitytype'] = esri_df['facilitytype'].astype(str).str.strip().str.lower()

hmis_df = pd.read_csv(hmis_path)
hmis_df.columns = hmis_df.columns.str.strip().str.lower().str.replace(" ", "_")
hmis_df = hmis_df.rename(columns={'districtname': 'district', 'facilityname': 'facility_name', 'healthfacilitytype': 'facility_type'})
hmis_df['district'] = hmis_df['district'].astype(str).str.strip().str.lower()
hmis_df['facility_name'] = hmis_df['facility_name'].astype(str).str.strip().str.lower()
hmis_df['facility_type'] = hmis_df['facility_type'].astype(str).str.strip().str.lower()

# Get unmatched sets
matched = pd.merge(esri_df, hmis_df, left_on=['facilityname', 'district'], right_on=['facility_name', 'district'], how='inner')
esri_unmatched = esri_df.merge(hmis_df[['facility_name', 'district']], left_on=['facilityname', 'district'], right_on=['facility_name', 'district'], how='left', indicator=True)
esri_unmatched = esri_unmatched[esri_unmatched['_merge'] == 'left_only'].drop(columns=['_merge', 'facility_name'])

hmis_unmatched = hmis_df.merge(esri_df[['facilityname', 'district']], left_on=['facility_name', 'district'], right_on=['facilityname', 'district'], how='left', indicator=True)
hmis_unmatched = hmis_unmatched[hmis_unmatched['_merge'] == 'left_only'].drop(columns=['_merge', 'facilityname'])

# Run fuzzy match
esri_names = esri_unmatched[['facilityname', 'district']].drop_duplicates()
hmis_names = hmis_unmatched[['facility_name', 'district']].drop_duplicates()

fuzzy_matches = []

for idx, hmis_row in hmis_names.iterrows():
    hmis_name = hmis_row['facility_name']
    district = hmis_row['district']
    
    esri_subset = esri_names[esri_names['district'] == district]
    if esri_subset.empty:
        continue

    match, score, _ = process.extractOne(hmis_name, esri_subset['facilityname'], scorer=fuzz.token_sort_ratio)

    if score >= 75:
        fuzzy_matches.append({
            'facility_name_hmis': hmis_name,
            'facility_name_esri': match,
            'district': district,
            'score': score
        })

# Convert to DataFrame and show top matches
fuzzy_matches_df = pd.DataFrame(fuzzy_matches).sort_values(by='score', ascending=False)
print("✅ Fuzzy matches found:", len(fuzzy_matches_df))
fuzzy_matches_df.head(20)

✅ Fuzzy matches found: 3641


Unnamed: 0,facility_name_hmis,facility_name_esri,district,score
241,patnagarh sdh,sdh patnagarh,balangir,100.0
130,balangir dhh,dhh balangir,balangir,100.0
2572,mv 22,mv 22,malkangiri,100.0
3306,bahadajhola (k dakua),bahadajhola (k. dakua),nayagarh,97.674419
935,sriram chandra pur,sriram chandrapur,dhenkanal,97.142857
877,nischintakoili,nischaintakoili,cuttack,96.551724
1329,j dayanidhipur,j. dayanidhipur,ganjam,96.551724
878,purushottampur,purusottampur,cuttack,96.296296
965,kamakhya nagar,kamakhyanagar,dhenkanal,96.296296
3626,jaunrabhauunra,jaunrabhaunra,sonepur,96.296296


In [38]:
import geopandas as gpd
import pandas as pd

# ✅ Paths
esri_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\odisha_health_facilities_esri.geojson"
hmis_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\data\processed\Copy of Monthly Reporting Dashboard Odisha - 2024-25.xlsx - HMIS data (1).csv"
output_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\master_matched_facilities_esri_hmis.geojson"

# ✅ Load original datasets
esri_df = gpd.read_file(esri_path)
hmis_df = pd.read_csv(hmis_path)

# ✅ Standardize columns
esri_df['district'] = esri_df['district'].astype(str).str.strip().str.lower()
esri_df['facilityname'] = esri_df['facilityname'].astype(str).str.strip().str.lower()
esri_df['facilitytype'] = esri_df['facilitytype'].astype(str).str.strip().str.lower()

hmis_df.columns = hmis_df.columns.str.strip().str.lower().str.replace(" ", "_")
hmis_df = hmis_df.rename(columns={'districtname': 'district', 'facilityname': 'facility_name', 'healthfacilitytype': 'facility_type'})
hmis_df['district'] = hmis_df['district'].astype(str).str.strip().str.lower()
hmis_df['facility_name'] = hmis_df['facility_name'].astype(str).str.strip().str.lower()
hmis_df['facility_type'] = hmis_df['facility_type'].astype(str).str.strip().str.lower()

# ✅ Load fuzzy match results
fuzzy_matches_df = fuzzy_matches_df.drop_duplicates(subset=['facility_name_hmis', 'district'])

# ✅ Merge fuzzy matches from HMIS and ESRI
fuzzy_hmis = pd.merge(fuzzy_matches_df, hmis_df, left_on=['facility_name_hmis', 'district'], right_on=['facility_name', 'district'], how='left')
fuzzy_esri = pd.merge(fuzzy_hmis, esri_df, left_on=['facility_name_esri', 'district'], right_on=['facilityname', 'district'], how='left', suffixes=('_hmis', '_esri'))

# ✅ Check facility type alignment
fuzzy_clean = fuzzy_esri[fuzzy_esri['facility_type'].str.lower().str.strip() == fuzzy_esri['facilitytype'].str.lower().str.strip()].copy()
fuzzy_clean_gdf = gpd.GeoDataFrame(fuzzy_clean, geometry='geometry', crs=esri_df.crs)

# ✅ Create exact matches (where facility name and type match)
exact_df = pd.merge(esri_df, hmis_df, left_on=['facilityname', 'district'], right_on=['facility_name', 'district'], how='inner')
exact_df = exact_df[exact_df['facilitytype'] == exact_df['facility_type']]
exact_gdf = gpd.GeoDataFrame(exact_df, geometry='geometry', crs=esri_df.crs)

# ✅ Combine both matched layers
master_gdf = pd.concat([exact_gdf, fuzzy_clean_gdf], ignore_index=True)
master_gdf = master_gdf.drop_duplicates(subset=['facility_name', 'district'])

# ✅ Save final master file
master_gdf.to_file(output_path, driver="GeoJSON")
print(f"✅ Saved clean matched master file with {len(master_gdf)} facilities")

2025-05-19 23:49:18,039 - INFO - Created 392 records


✅ Saved clean matched master file with 392 facilities


In [39]:
from rapidfuzz import process, fuzz
import pandas as pd
import geopandas as gpd
import os

# Paths
esri_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\odisha_health_facilities_esri.geojson"
hmis_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\data\processed\Copy of Monthly Reporting Dashboard Odisha - 2024-25.xlsx - HMIS data (1).csv"
output_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\master_matched_facilities_esri_hmis.geojson"

# Load
esri_df = gpd.read_file(esri_path)
hmis_df = pd.read_csv(hmis_path)

# Standardize
esri_df['district'] = esri_df['district'].astype(str).str.strip().str.lower()
esri_df['facilityname'] = esri_df['facilityname'].astype(str).str.strip().str.lower()
esri_df['facilitytype'] = esri_df['facilitytype'].astype(str).str.strip().str.lower()
hmis_df.columns = hmis_df.columns.str.strip().str.lower().str.replace(" ", "_")
hmis_df = hmis_df.rename(columns={'districtname': 'district', 'facilityname': 'facility_name', 'healthfacilitytype': 'facility_type'})
hmis_df['district'] = hmis_df['district'].astype(str).str.strip().str.lower()
hmis_df['facility_name'] = hmis_df['facility_name'].astype(str).str.strip().str.lower()
hmis_df['facility_type'] = hmis_df['facility_type'].astype(str).str.strip().str.lower()

# Unmatched
matched = pd.merge(esri_df, hmis_df, left_on=['facilityname', 'district'], right_on=['facility_name', 'district'], how='inner')
esri_unmatched = esri_df.merge(hmis_df[['facility_name', 'district']], left_on=['facilityname', 'district'], right_on=['facility_name', 'district'], how='left', indicator=True)
esri_unmatched = esri_unmatched[esri_unmatched['_merge'] == 'left_only'].drop(columns=['_merge', 'facility_name'])
hmis_unmatched = hmis_df.merge(esri_df[['facilityname', 'district']], left_on=['facility_name', 'district'], right_on=['facilityname', 'district'], how='left', indicator=True)
hmis_unmatched = hmis_unmatched[hmis_unmatched['_merge'] == 'left_only'].drop(columns=['_merge', 'facilityname'])

# Fuzzy match
esri_names = esri_unmatched[['facilityname', 'district']].drop_duplicates()
hmis_names = hmis_unmatched[['facility_name', 'district']].drop_duplicates()

fuzzy_matches = []
for idx, hmis_row in hmis_names.iterrows():
    hmis_name = hmis_row['facility_name']
    district = hmis_row['district']
    esri_subset = esri_names[esri_names['district'] == district]
    if esri_subset.empty:
        continue
    match, score, _ = process.extractOne(hmis_name, esri_subset['facilityname'], scorer=fuzz.token_sort_ratio)
    if score >= 75:
        fuzzy_matches.append({'facility_name_hmis': hmis_name, 'facility_name_esri': match, 'district': district, 'score': score})

fuzzy_matches_df = pd.DataFrame(fuzzy_matches)

# Merge fuzzy results
fuzzy_hmis = pd.merge(fuzzy_matches_df, hmis_df, left_on=['facility_name_hmis', 'district'], right_on=['facility_name', 'district'], how='left')
fuzzy_esri = pd.merge(fuzzy_hmis, esri_df, left_on=['facility_name_esri', 'district'], right_on=['facilityname', 'district'], how='left', suffixes=('_hmis', '_esri'))

# Standardize types
type_map = {
    'phc': 'primary health centre', 'primary health centre': 'primary health centre', 'uphc': 'primary health centre',
    'chc': 'community health centre', 'community health centre': 'community health centre', 'uchc': 'community health centre',
    'sub centre': 'health sub centre', 'health sub centre': 'health sub centre', 'sc': 'health sub centre',
    'district hospital': 'district hospital', 'dh': 'district hospital', 'dhh': 'district hospital',
    'sub district hospital': 'sub district hospital', 'sdh': 'sub district hospital',
    'medical college': 'medical college', 'mc': 'medical college', 'idh': 'idh', 'oh': 'oh'
}

fuzzy_esri['facility_type_std'] = fuzzy_esri['facility_type'].map(lambda x: type_map.get(str(x).strip().lower(), str(x).strip().lower()))
fuzzy_esri['facilitytype_std'] = fuzzy_esri['facilitytype'].map(lambda x: type_map.get(str(x).strip().lower(), str(x).strip().lower()))
fuzzy_clean = fuzzy_esri[fuzzy_esri['facility_type_std'] == fuzzy_esri['facilitytype_std']]
fuzzy_clean_gdf = gpd.GeoDataFrame(fuzzy_clean, geometry='geometry', crs=esri_df.crs)

# Standardize exact matches
exact_df = matched.copy()
exact_df['facility_type_std'] = exact_df['facility_type'].map(lambda x: type_map.get(str(x).strip().lower(), str(x).strip().lower()))
exact_df['facilitytype_std'] = exact_df['facilitytype'].map(lambda x: type_map.get(str(x).strip().lower(), str(x).strip().lower()))
exact_clean = exact_df[exact_df['facility_type_std'] == exact_df['facilitytype_std']]
exact_gdf = gpd.GeoDataFrame(exact_clean, geometry='geometry', crs=esri_df.crs)

# Combine
master_gdf = pd.concat([exact_gdf, fuzzy_clean_gdf], ignore_index=True)
master_gdf = master_gdf.drop_duplicates(subset=['facility_name', 'district'])

# Save final master
master_gdf.to_file(output_path, driver="GeoJSON")
print(f"✅ Saved master file with {len(master_gdf)} matched facilities:\n{output_path}")


2025-05-19 23:55:31,936 - INFO - Created 6,212 records


✅ Saved master file with 6212 matched facilities:
C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\master_matched_facilities_esri_hmis.geojson


In [40]:
# 📌 Paths
hmis_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\data\processed\Copy of Monthly Reporting Dashboard Odisha - 2024-25.xlsx - HMIS data (1).csv"
master_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\master_matched_facilities_esri_hmis.geojson"

import pandas as pd
import geopandas as gpd

# 🔁 Load datasets
hmis_df = pd.read_csv(hmis_path)
master_gdf = gpd.read_file(master_path)

# 🔁 Standardize key columns
hmis_df.columns = hmis_df.columns.str.strip().str.lower().str.replace(" ", "_")
hmis_df = hmis_df.rename(columns={'districtname': 'district', 'healthfacilitytype': 'facility_type', 'facilityname': 'facility_name'})
hmis_df['facility_name'] = hmis_df['facility_name'].astype(str).str.strip().str.lower()
hmis_df['district'] = hmis_df['district'].astype(str).str.strip().str.lower()

master_gdf['facility_name'] = master_gdf['facility_name'].astype(str).str.strip().str.lower()
master_gdf['district'] = master_gdf['district'].astype(str).str.strip().str.lower()

# 🔍 Find unmatched
matched = master_gdf[['facility_name', 'district']].drop_duplicates()
unmatched_hmis = hmis_df.merge(matched, on=['facility_name', 'district'], how='left', indicator=True)
unmatched_hmis = unmatched_hmis[unmatched_hmis['_merge'] == 'left_only'].drop(columns=['_merge'])

# 💾 Optional: Save unmatched for review
unmatched_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\unmatched_hmis_facilities.csv"
unmatched_hmis.to_csv(unmatched_path, index=False)

# ✅ Summary
print("🧮 Unmatched HMIS facilities:", len(unmatched_hmis))
unmatched_hmis[['facility_name', 'facility_type', 'district']].head(10)

🧮 Unmatched HMIS facilities: 1076


Unnamed: 0,facility_name,facility_type,district
0,angul,District Hospital,anugul
1,bentapur,Health Sub Centre,anugul
7,kuio,Health Sub Centre,anugul
15,rantalei,Health Sub Centre,anugul
16,saradhapur a,Health Sub Centre,anugul
18,uphc angul,UPHC,anugul
20,aida,Health Sub Centre,anugul
25,lunahandi,Health Sub Centre,anugul
27,mahendrapur,Health Sub Centre,anugul
30,athmallik,Sub District Hospital,anugul


In [46]:
import pandas as pd
from rapidfuzz import process, fuzz

# 📥 Load and standardize
gov_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\odisha_health_facilities_gov.csv"
gov_df = pd.read_csv(gov_path)
gov_df.columns = gov_df.columns.str.strip().str.lower().str.replace(" ", "_")

# ✅ Rename for consistency
gov_df = gov_df.rename(columns={
    'district': 'district',
    'facilitytype': 'facility_type',
    'facilityname': 'facility_name'
})

# ✅ Normalize values
gov_df['facility_name'] = gov_df['facility_name'].astype(str).str.strip().str.lower()
gov_df['district'] = gov_df['district'].astype(str).str.strip().str.lower()
gov_df['facility_type'] = gov_df['facility_type'].astype(str).str.strip().str.lower()

unmatched_hmis['facility_name'] = unmatched_hmis['facility_name'].astype(str).str.strip().str.lower()
unmatched_hmis['district'] = unmatched_hmis['district'].astype(str).str.strip().str.lower()
unmatched_hmis['facility_type'] = unmatched_hmis['facility_type'].astype(str).str.strip().str.lower()

# ✅ Fuzzy match logic
matches = []
for idx, hmis_row in unmatched_hmis.iterrows():
    hmis_name = hmis_row['facility_name']
    hmis_district = hmis_row['district']
    hmis_type = hmis_row['facility_type']
    
    candidates = gov_df[
        (gov_df['district'] == hmis_district) &
        (gov_df['facility_type'] == hmis_type)
    ]
    
    if len(candidates) == 0:
        continue

    match_name, score, _ = process.extractOne(hmis_name, candidates['facility_name'], scorer=fuzz.token_sort_ratio)

    if score >= 90:
        best_match = candidates[candidates['facility_name'] == match_name].iloc[0]
        matches.append({
            'facility_name_hmis': hmis_name,
            'district': hmis_district,
            'facility_type': hmis_type,
            'matched_gov_name': best_match['facility_name'],
            'latitude': best_match.get('latitude', None),
            'longitude': best_match.get('longitude', None),
            'score': score
        })

# ✅ Create match table
matched_gov_df = pd.DataFrame(matches)

# 💾 Save for traceability
matched_gov_df.to_csv(
    r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\gov_matched_unmatched_hmis.csv",
    index=False
)

print("✅ Total matches found from Gov CSV:", len(matched_gov_df))
matched_gov_df.head(10)

✅ Total matches found from Gov CSV: 0


In [47]:
import geopandas as gpd
import pandas as pd
from rapidfuzz import process, fuzz

# 📍 Load OSM data
osm_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\odisha_health_facilities_osm.geojson"
osm_gdf = gpd.read_file(osm_path)

# 🧼 Normalize column names
osm_gdf.columns = osm_gdf.columns.str.strip().str.lower().str.replace(":", "_")
osm_gdf['name'] = osm_gdf['name'].astype(str).str.strip().str.lower()
osm_gdf['amenity'] = osm_gdf['amenity'].astype(str).str.strip().str.lower()

# 🧠 Map amenity to HMIS-equivalent facility type
def map_amenity_to_type(amenity):
    return {
        'hospital': 'district hospital',
        'clinic': 'primary health centre',
        'doctors': 'primary health centre',
        'pharmacy': 'health sub centre',
        'laboratory': 'health sub centre',
        'dentist': 'health sub centre'
    }.get(amenity, 'unknown')

osm_gdf['facility_type'] = osm_gdf['amenity'].apply(map_amenity_to_type)
osm_valid = osm_gdf[osm_gdf['facility_type'] != 'unknown'].copy()

# 🧼 Standardize unmatched HMIS names
unmatched_hmis['facility_name'] = unmatched_hmis['facility_name'].astype(str).str.strip().str.lower()
unmatched_hmis['facility_type'] = unmatched_hmis['facility_type'].astype(str).str.strip().str.lower()

# 🔁 Fuzzy match on name + facility_type
matches = []
for idx, hmis_row in unmatched_hmis.iterrows():
    hmis_name = hmis_row['facility_name']
    hmis_type = hmis_row['facility_type']
    
    candidates = osm_valid[osm_valid['facility_type'] == hmis_type]
    if candidates.empty:
        continue
    
    match_name, score, _ = process.extractOne(hmis_name, candidates['name'], scorer=fuzz.token_sort_ratio)
    if score >= 90:
        best_match = candidates[candidates['name'] == match_name].iloc[0]
        matches.append({
            'facility_name_hmis': hmis_name,
            'district': hmis_row['district'],
            'facility_type': hmis_type,
            'matched_osm_name': best_match['name'],
            'geometry': best_match['geometry'],
            'score': score
        })

# 📦 Convert to GeoDataFrame
matched_osm_df = pd.DataFrame(matches)
matched_osm_gdf = gpd.GeoDataFrame(matched_osm_df, geometry='geometry', crs=osm_valid.crs)

# 💾 Save results
output_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\osm_matched_unmatched_hmis.geojson"
matched_osm_gdf.to_file(output_path, driver="GeoJSON")

print(f"✅ OSM Matches: {len(matched_osm_gdf)} saved to:\n{output_path}")

2025-05-20 00:08:21,836 - INFO - Created 2 records


✅ OSM Matches: 2 saved to:
C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\osm_matched_unmatched_hmis.geojson


In [49]:
import geopandas as gpd
import pandas as pd

# 📂 Paths
esri_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\master_matched_facilities_esri_hmis.geojson"
osm_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\osm_matched_unmatched_hmis.geojson"
output_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\master_matched_facilities_odisha.geojson"

# ✅ Load files
esri_gdf = gpd.read_file(esri_path)
osm_gdf = gpd.read_file(osm_path)

# ✅ Rename for consistency
osm_gdf = osm_gdf.rename(columns={'facility_name_hmis': 'facility_name'})

# ✅ Select only key fields
common_cols = ['facility_name', 'district', 'facility_type', 'geometry']
esri_gdf = esri_gdf[common_cols]
osm_gdf = osm_gdf[common_cols]

# ✅ Merge
final_gdf = pd.concat([esri_gdf, osm_gdf], ignore_index=True).drop_duplicates(subset=['facility_name', 'district'])

# ✅ Save
final_gdf.to_file(output_path, driver="GeoJSON")

print("✅ Final master file saved:")
print(f"📍 {output_path}")
print(f"📊 Total facilities in final file: {len(final_gdf)}")

2025-05-20 00:20:20,946 - INFO - Created 6,214 records


✅ Final master file saved:
📍 C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\master_matched_facilities_odisha.geojson
📊 Total facilities in final file: 6214


In [53]:
import pandas as pd

# === Step 1: Load both files ===
gov_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\odisha_health_facilities_gov.csv"
hmis_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\data\processed\Copy of Monthly Reporting Dashboard Odisha - 2024-25.xlsx - HMIS data (1).csv"

gov_df = pd.read_csv(gov_path)
hmis_df = pd.read_csv(hmis_path)

# === Step 2: Inspect column names ===
print("📑 GOV CSV Columns:")
print(gov_df.columns.tolist())

print("\n📑 HMIS CSV Columns:")
print(hmis_df.columns.tolist())

📑 GOV CSV Columns:
['state', 'district', 'subdistrict name', 'facilitytype', 'facilityname', 'facility address', 'latitude', 'longitude', 'activeflag_c', 'notional_physical', 'location type', 'type of facility', 'nin_n', 'geometry']

📑 HMIS CSV Columns:
['Count', 'Statename', 'Districtname', 'Subdistrictname', 'Blockname', 'Healthblockname', 'Healthfacilitytype', 'Facilityname', 'NIN']


In [54]:
# Rename for consistency
gov_df.rename(columns={
    'facilityname': 'facility_name',
    'district': 'district',
    'facilitytype': 'facility_type'
}, inplace=True)

hmis_df.rename(columns={
    'Facilityname': 'facility_name',
    'Districtname': 'district',
    'Healthfacilitytype': 'facility_type'
}, inplace=True)

# Lowercase and strip whitespaces
for col in ['facility_name', 'district', 'facility_type']:
    gov_df[col] = gov_df[col].astype(str).str.lower().str.strip()
    hmis_df[col] = hmis_df[col].astype(str).str.lower().str.strip()

print("✅ Columns standardized")

✅ Columns standardized


In [55]:
# Facility type mapping
type_map = {
    'district hospital': 'dh',
    'sub-divisional hospital': 'sdh',
    'sub divisional hospital': 'sdh',
    'chc': 'chc',
    'phc': 'phc',
    'uphc': 'phc',
    'uchc': 'chc',
    'hsc': 'hwc',
    'sc': 'hwc',
    'sub centre': 'hwc',
    'health sub centre': 'hwc'
}

gov_df['facility_type_std'] = gov_df['facility_type'].map(type_map).fillna(gov_df['facility_type'])
hmis_df['facility_type_std'] = hmis_df['facility_type'].map(type_map).fillna(hmis_df['facility_type'])

print("🔍 GOV Facility Types")
print(gov_df['facility_type_std'].value_counts())

print("\n🔍 HMIS Facility Types")
print(hmis_df['facility_type_std'].value_counts())

🔍 GOV Facility Types
facility_type_std
sub_cen    6688
phc        1305
chc         377
dis_h        35
s_t_h        27
Name: count, dtype: int64

🔍 HMIS Facility Types
facility_type_std
hwc                        6688
primary health centre      1236
community health centre     375
phc                         110
oh                           55
dh                           32
sub district hospital        32
chc                           7
idh                           5
medical college               5
Name: count, dtype: int64


In [56]:
type_map_gov_to_std = {
    'sub_cen': 'hwc',
    'phc': 'phc',
    'chc': 'chc',
    'dis_h': 'dh',
    's_t_h': 'sdh'
}

# Apply corrected mapping to GOV facility types
gov_df['facility_type_std'] = gov_df['facility_type_std'].map(type_map_gov_to_std).fillna(gov_df['facility_type_std'])

# Re-run match
matched_df = pd.merge(
    gov_df,
    hmis_df,
    on=['district', 'facility_name', 'facility_type_std'],
    how='inner',
    suffixes=('_gov', '_hmis')
)

print(f"✅ Matched after corrected type mapping: {len(matched_df)}")

✅ Matched after corrected type mapping: 6070


In [57]:
import pandas as pd
import geopandas as gpd
import requests
from shapely.geometry import Point
from tqdm import tqdm

# === Input paths ===
gov_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\odisha_health_facilities_gov.csv"
output_path = gov_path.replace('.csv', '_snapped.geojson')

# === Load the dataset ===
df = pd.read_csv(gov_path)
df = df.dropna(subset=['latitude', 'longitude'])

# Create GeoDataFrame
df['geometry'] = df.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326')

print(f"✅ Loaded {len(gdf)} facilities for snapping.")

✅ Loaded 8432 facilities for snapping.


In [58]:
def snap_to_osrm(point):
    lon, lat = point.x, point.y
    try:
        url = f"http://localhost:5000/nearest/v1/driving/{lon},{lat}"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            snapped = data['waypoints'][0]['location']
            return Point(snapped[0], snapped[1])
        else:
            return None
    except:
        return None

# Run snapping
tqdm.pandas()
gdf['snapped_geometry'] = gdf['geometry'].progress_apply(snap_to_osrm)

# Drop any that failed snapping
gdf = gdf.dropna(subset=['snapped_geometry'])
print(f"✅ Successfully snapped {len(gdf)} facilities.")

100%|██████████| 8432/8432 [01:30<00:00, 93.08it/s] 

✅ Successfully snapped 8432 facilities.





In [60]:
# Drop original geometry column before saving
gdf = gdf.drop(columns=['geometry'])

# Rename snapped_geometry → geometry and set it as active
gdf = gdf.rename(columns={'snapped_geometry': 'geometry'})
gdf = gdf.set_geometry('geometry')
gdf = gdf.set_crs('EPSG:4326')

# Save as GeoJSON
gdf.to_file(output_path, driver='GeoJSON')
print(f"📁 Saved snapped facilities to: {output_path}")

2025-05-20 00:54:39,497 - INFO - Created 8,432 records


📁 Saved snapped facilities to: C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\odisha_health_facilities_gov_snapped.geojson


In [None]:
import pandas as pd
import geopandas as gpd
import requests
from shapely.geometry import Point
from tqdm import tqdm
import os

# === Input paths ===
village_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\data\raw\villages\village_population_odisha.geojson"
output_folder = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Snapped_districts"

# Ensure output directory exists
os.makedirs(output_folder, exist_ok=True)

# === Load villages ===
vdf = gpd.read_file(village_path)
vdf = vdf.to_crs(epsg=4326)
vdf = vdf.dropna(subset=['geometry'])

# Convert to centroids (if not already point)
vdf['geometry'] = vdf['geometry'].centroid

# Clean district column
vdf['district'] = vdf['district'].astype(str).str.lower().str.strip()

# Function to snap a single point to OSRM road network
def snap_point_to_osrm(point):
    lon, lat = point.x, point.y
    try:
        url = f"http://localhost:5000/nearest/v1/driving/{lon},{lat}"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            snapped = data['waypoints'][0]['location']
            return Point(snapped[0], snapped[1])
        else:
            return None
    except:
        return None

# === Snap villages per district ===
districts = sorted(vdf['district'].unique())

for district in tqdm(districts, desc="🔁 Snapping districts"):
    subset = vdf[vdf['district'] == district].copy()
    tqdm.pandas(desc=f"🚀 Snapping: {district}")
    subset['snapped_geometry'] = subset['geometry'].progress_apply(snap_point_to_osrm)
    
    # Drop failed
    subset = subset.dropna(subset=['snapped_geometry'])
    
    # Finalize geometry column
    subset = subset.drop(columns=['geometry'])
    subset = subset.rename(columns={'snapped_geometry': 'geometry'})
    subset = subset.set_geometry('geometry')
    subset = subset.set_crs(epsg=4326)

    # Save output
    out_path = os.path.join(output_folder, f"{district}_villages_snapped.geojson")
    subset.to_file(out_path, driver="GeoJSON")
    print(f"✅ Saved {len(subset)} snapped villages for: {district}")

In [63]:
import geopandas as gpd

path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\odisha_health_facilities_gov_snapped.geojson"
gdf = gpd.read_file(path)

print("📑 Columns in snapped facility file:")
print(gdf.columns.tolist())

📑 Columns in snapped facility file:
['state', 'district', 'subdistrict name', 'facilitytype', 'facilityname', 'facility address', 'latitude', 'longitude', 'activeflag_c', 'notional_physical', 'location type', 'type of facility', 'nin_n', 'geometry']


In [64]:
import geopandas as gpd
import pandas as pd

# === Load the snapped facilities file ===
input_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\odisha_health_facilities_gov_snapped.geojson"
gdf = gpd.read_file(input_path)

# === Standardize column names ===
gdf['district'] = gdf['district'].astype(str).str.lower().str.strip()
gdf['facilitytype'] = gdf['facilitytype'].astype(str).str.lower().str.strip()

# === Pivot table to summarize facility counts by type and district ===
summary = pd.pivot_table(
    gdf,
    index='district',
    columns='facilitytype',
    aggfunc='size',
    fill_value=0
).sort_index()

# === Display and optionally save ===
print("📊 Facility Type Counts by District")
print(summary)

# Optional: save to CSV
summary_path = input_path.replace('.geojson', '_facility_type_summary.csv')
summary.to_csv(summary_path)
print(f"✅ Saved summary table: {summary_path}")

📊 Facility Type Counts by District
facilitytype   chc  dis_h  phc  s_t_h  sub_cen
district                                      
anugul           9      1   31      3      166
balangir        15      1   44      2      226
baleshwar       17      1   69      1      275
bargarh         14      1   46      1      204
baudh            5      1   12      0       67
bhadrak          7      1   53      0      178
cuttack         18      2   66      2      332
deogarh          4      1    8      0       42
dhenkanal       10      1   36      2      167
gajapati         8      1   21      0      136
ganjam          30      2   90      2      460
jagatsinghpur    9      1   37      0      189
jajapur         12      1   59      0      260
jharsuguda       6      1   16      0       66
kalahandi       16      1   45      1      242
kandhamal       14      1   40      1      172
kendrapara       9      1   46      0      227
keonjhar        17      1   66      2      351
khordha         13      2

In [65]:
import geopandas as gpd
import os

# === Input: snapped facilities file ===
input_path = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Processed\odisha_health_facilities_gov_snapped.geojson"

# === Output directory ===
output_dir = r"C:\Users\utkar\OneDrive\Desktop\ClimateXTelemedicine Odisha\Odisha_VScode\.venv\Policy note\Snapped_Facilities"
os.makedirs(output_dir, exist_ok=True)

# === Load the snapped facilities ===
gdf = gpd.read_file(input_path)
gdf = gdf.to_crs(epsg=4326)

# === Standardize column names ===
gdf['district'] = gdf['district'].astype(str).str.lower().str.strip()
gdf['facilitytype'] = gdf['facilitytype'].astype(str).str.lower().str.strip()

# === Define accepted types (optional filtering if needed) ===
accepted_types = ['chc', 'phc', 'dis_h', 's_t_h', 'sub_cen']

# Optional: Filter to only accepted types
gdf = gdf[gdf['facilitytype'].isin(accepted_types)]

# === Group and export by district × facilitytype ===
grouped = gdf.groupby(['district', 'facilitytype'])

for (district, ftype), group in grouped:
    # Safe filename
    filename = f"{district}_{ftype}.geojson".replace(" ", "_")
    out_path = os.path.join(output_dir, filename)
    
    # Save
    group.to_file(out_path, driver='GeoJSON')
    print(f"✅ Saved {len(group)} facilities → {filename}")

2025-05-20 01:18:03,924 - INFO - Created 9 records
2025-05-20 01:18:03,938 - INFO - Created 1 records
2025-05-20 01:18:03,956 - INFO - Created 31 records
2025-05-20 01:18:03,969 - INFO - Created 3 records
2025-05-20 01:18:04,009 - INFO - Created 166 records
2025-05-20 01:18:04,025 - INFO - Created 15 records
2025-05-20 01:18:04,038 - INFO - Created 1 records
2025-05-20 01:18:04,059 - INFO - Created 44 records
2025-05-20 01:18:04,074 - INFO - Created 2 records
2025-05-20 01:18:04,113 - INFO - Created 226 records
2025-05-20 01:18:04,127 - INFO - Created 17 records


✅ Saved 9 facilities → anugul_chc.geojson
✅ Saved 1 facilities → anugul_dis_h.geojson
✅ Saved 31 facilities → anugul_phc.geojson
✅ Saved 3 facilities → anugul_s_t_h.geojson
✅ Saved 166 facilities → anugul_sub_cen.geojson
✅ Saved 15 facilities → balangir_chc.geojson
✅ Saved 1 facilities → balangir_dis_h.geojson
✅ Saved 44 facilities → balangir_phc.geojson
✅ Saved 2 facilities → balangir_s_t_h.geojson
✅ Saved 226 facilities → balangir_sub_cen.geojson


2025-05-20 01:18:04,138 - INFO - Created 1 records
2025-05-20 01:18:04,157 - INFO - Created 69 records
2025-05-20 01:18:04,167 - INFO - Created 1 records
2025-05-20 01:18:04,212 - INFO - Created 275 records
2025-05-20 01:18:04,226 - INFO - Created 14 records
2025-05-20 01:18:04,240 - INFO - Created 1 records
2025-05-20 01:18:04,259 - INFO - Created 46 records
2025-05-20 01:18:04,270 - INFO - Created 1 records
2025-05-20 01:18:04,298 - INFO - Created 204 records
2025-05-20 01:18:04,312 - INFO - Created 5 records
2025-05-20 01:18:04,326 - INFO - Created 1 records


✅ Saved 17 facilities → baleshwar_chc.geojson
✅ Saved 1 facilities → baleshwar_dis_h.geojson
✅ Saved 69 facilities → baleshwar_phc.geojson
✅ Saved 1 facilities → baleshwar_s_t_h.geojson
✅ Saved 275 facilities → baleshwar_sub_cen.geojson
✅ Saved 14 facilities → bargarh_chc.geojson
✅ Saved 1 facilities → bargarh_dis_h.geojson
✅ Saved 46 facilities → bargarh_phc.geojson
✅ Saved 1 facilities → bargarh_s_t_h.geojson
✅ Saved 204 facilities → bargarh_sub_cen.geojson
✅ Saved 5 facilities → baudh_chc.geojson
✅ Saved 1 facilities → baudh_dis_h.geojson


2025-05-20 01:18:04,340 - INFO - Created 12 records
2025-05-20 01:18:04,365 - INFO - Created 67 records
2025-05-20 01:18:04,379 - INFO - Created 7 records
2025-05-20 01:18:04,441 - INFO - Created 1 records
2025-05-20 01:18:04,461 - INFO - Created 53 records
2025-05-20 01:18:04,494 - INFO - Created 178 records
2025-05-20 01:18:04,513 - INFO - Created 18 records
2025-05-20 01:18:04,524 - INFO - Created 2 records
2025-05-20 01:18:04,544 - INFO - Created 66 records


✅ Saved 12 facilities → baudh_phc.geojson
✅ Saved 67 facilities → baudh_sub_cen.geojson
✅ Saved 7 facilities → bhadrak_chc.geojson
✅ Saved 1 facilities → bhadrak_dis_h.geojson
✅ Saved 53 facilities → bhadrak_phc.geojson
✅ Saved 178 facilities → bhadrak_sub_cen.geojson
✅ Saved 18 facilities → cuttack_chc.geojson
✅ Saved 2 facilities → cuttack_dis_h.geojson
✅ Saved 66 facilities → cuttack_phc.geojson


2025-05-20 01:18:04,560 - INFO - Created 2 records
2025-05-20 01:18:04,623 - INFO - Created 332 records
2025-05-20 01:18:04,634 - INFO - Created 4 records
2025-05-20 01:18:04,648 - INFO - Created 1 records
2025-05-20 01:18:04,661 - INFO - Created 8 records
2025-05-20 01:18:04,679 - INFO - Created 42 records
2025-05-20 01:18:04,694 - INFO - Created 10 records
2025-05-20 01:18:04,706 - INFO - Created 1 records
2025-05-20 01:18:04,720 - INFO - Created 36 records
2025-05-20 01:18:04,730 - INFO - Created 2 records


✅ Saved 2 facilities → cuttack_s_t_h.geojson
✅ Saved 332 facilities → cuttack_sub_cen.geojson
✅ Saved 4 facilities → deogarh_chc.geojson
✅ Saved 1 facilities → deogarh_dis_h.geojson
✅ Saved 8 facilities → deogarh_phc.geojson
✅ Saved 42 facilities → deogarh_sub_cen.geojson
✅ Saved 10 facilities → dhenkanal_chc.geojson
✅ Saved 1 facilities → dhenkanal_dis_h.geojson
✅ Saved 36 facilities → dhenkanal_phc.geojson
✅ Saved 2 facilities → dhenkanal_s_t_h.geojson


2025-05-20 01:18:04,768 - INFO - Created 167 records
2025-05-20 01:18:04,786 - INFO - Created 8 records
2025-05-20 01:18:04,797 - INFO - Created 1 records
2025-05-20 01:18:04,820 - INFO - Created 21 records
2025-05-20 01:18:04,852 - INFO - Created 136 records
2025-05-20 01:18:04,870 - INFO - Created 30 records
2025-05-20 01:18:04,884 - INFO - Created 2 records
2025-05-20 01:18:04,908 - INFO - Created 90 records
2025-05-20 01:18:04,921 - INFO - Created 2 records


✅ Saved 167 facilities → dhenkanal_sub_cen.geojson
✅ Saved 8 facilities → gajapati_chc.geojson
✅ Saved 1 facilities → gajapati_dis_h.geojson
✅ Saved 21 facilities → gajapati_phc.geojson
✅ Saved 136 facilities → gajapati_sub_cen.geojson
✅ Saved 30 facilities → ganjam_chc.geojson
✅ Saved 2 facilities → ganjam_dis_h.geojson
✅ Saved 90 facilities → ganjam_phc.geojson
✅ Saved 2 facilities → ganjam_s_t_h.geojson


2025-05-20 01:18:04,990 - INFO - Created 460 records
2025-05-20 01:18:05,003 - INFO - Created 9 records
2025-05-20 01:18:05,020 - INFO - Created 1 records
2025-05-20 01:18:05,040 - INFO - Created 37 records
2025-05-20 01:18:05,094 - INFO - Created 189 records
2025-05-20 01:18:05,108 - INFO - Created 12 records
2025-05-20 01:18:05,120 - INFO - Created 1 records
2025-05-20 01:18:05,146 - INFO - Created 59 records
2025-05-20 01:18:05,195 - INFO - Created 260 records


✅ Saved 460 facilities → ganjam_sub_cen.geojson
✅ Saved 9 facilities → jagatsinghpur_chc.geojson
✅ Saved 1 facilities → jagatsinghpur_dis_h.geojson
✅ Saved 37 facilities → jagatsinghpur_phc.geojson
✅ Saved 189 facilities → jagatsinghpur_sub_cen.geojson
✅ Saved 12 facilities → jajapur_chc.geojson
✅ Saved 1 facilities → jajapur_dis_h.geojson
✅ Saved 59 facilities → jajapur_phc.geojson


2025-05-20 01:18:05,211 - INFO - Created 6 records
2025-05-20 01:18:05,229 - INFO - Created 1 records
2025-05-20 01:18:05,251 - INFO - Created 16 records
2025-05-20 01:18:05,283 - INFO - Created 66 records
2025-05-20 01:18:05,299 - INFO - Created 16 records
2025-05-20 01:18:05,319 - INFO - Created 1 records
2025-05-20 01:18:05,345 - INFO - Created 45 records
2025-05-20 01:18:05,371 - INFO - Created 1 records
2025-05-20 01:18:05,415 - INFO - Created 242 records


✅ Saved 260 facilities → jajapur_sub_cen.geojson
✅ Saved 6 facilities → jharsuguda_chc.geojson
✅ Saved 1 facilities → jharsuguda_dis_h.geojson
✅ Saved 16 facilities → jharsuguda_phc.geojson
✅ Saved 66 facilities → jharsuguda_sub_cen.geojson
✅ Saved 16 facilities → kalahandi_chc.geojson
✅ Saved 1 facilities → kalahandi_dis_h.geojson
✅ Saved 45 facilities → kalahandi_phc.geojson
✅ Saved 1 facilities → kalahandi_s_t_h.geojson


2025-05-20 01:18:05,435 - INFO - Created 14 records
2025-05-20 01:18:05,452 - INFO - Created 1 records
2025-05-20 01:18:05,482 - INFO - Created 40 records
2025-05-20 01:18:05,497 - INFO - Created 1 records
2025-05-20 01:18:05,602 - INFO - Created 172 records
2025-05-20 01:18:05,626 - INFO - Created 9 records


✅ Saved 242 facilities → kalahandi_sub_cen.geojson
✅ Saved 14 facilities → kandhamal_chc.geojson
✅ Saved 1 facilities → kandhamal_dis_h.geojson
✅ Saved 40 facilities → kandhamal_phc.geojson
✅ Saved 1 facilities → kandhamal_s_t_h.geojson
✅ Saved 172 facilities → kandhamal_sub_cen.geojson


2025-05-20 01:18:05,641 - INFO - Created 1 records
2025-05-20 01:18:05,661 - INFO - Created 46 records
2025-05-20 01:18:05,707 - INFO - Created 227 records
2025-05-20 01:18:05,721 - INFO - Created 17 records
2025-05-20 01:18:05,735 - INFO - Created 1 records
2025-05-20 01:18:05,762 - INFO - Created 66 records
2025-05-20 01:18:05,778 - INFO - Created 2 records


✅ Saved 9 facilities → kendrapara_chc.geojson
✅ Saved 1 facilities → kendrapara_dis_h.geojson
✅ Saved 46 facilities → kendrapara_phc.geojson
✅ Saved 227 facilities → kendrapara_sub_cen.geojson
✅ Saved 17 facilities → keonjhar_chc.geojson
✅ Saved 1 facilities → keonjhar_dis_h.geojson
✅ Saved 66 facilities → keonjhar_phc.geojson
✅ Saved 2 facilities → keonjhar_s_t_h.geojson


2025-05-20 01:18:05,848 - INFO - Created 351 records
2025-05-20 01:18:05,878 - INFO - Created 13 records
2025-05-20 01:18:05,893 - INFO - Created 2 records
2025-05-20 01:18:05,922 - INFO - Created 65 records
2025-05-20 01:18:05,969 - INFO - Created 202 records
2025-05-20 01:18:05,988 - INFO - Created 16 records
2025-05-20 01:18:06,003 - INFO - Created 1 records
2025-05-20 01:18:06,027 - INFO - Created 48 records
2025-05-20 01:18:06,039 - INFO - Created 1 records


✅ Saved 351 facilities → keonjhar_sub_cen.geojson
✅ Saved 13 facilities → khordha_chc.geojson
✅ Saved 2 facilities → khordha_dis_h.geojson
✅ Saved 65 facilities → khordha_phc.geojson
✅ Saved 202 facilities → khordha_sub_cen.geojson
✅ Saved 16 facilities → koraput_chc.geojson
✅ Saved 1 facilities → koraput_dis_h.geojson
✅ Saved 48 facilities → koraput_phc.geojson
✅ Saved 1 facilities → koraput_s_t_h.geojson


2025-05-20 01:18:06,083 - INFO - Created 307 records
2025-05-20 01:18:06,095 - INFO - Created 8 records
2025-05-20 01:18:06,107 - INFO - Created 1 records
2025-05-20 01:18:06,126 - INFO - Created 28 records
2025-05-20 01:18:06,159 - INFO - Created 158 records
2025-05-20 01:18:06,174 - INFO - Created 28 records
2025-05-20 01:18:06,188 - INFO - Created 1 records
2025-05-20 01:18:06,215 - INFO - Created 86 records
2025-05-20 01:18:06,226 - INFO - Created 3 records


✅ Saved 307 facilities → koraput_sub_cen.geojson
✅ Saved 8 facilities → malkangiri_chc.geojson
✅ Saved 1 facilities → malkangiri_dis_h.geojson
✅ Saved 28 facilities → malkangiri_phc.geojson
✅ Saved 158 facilities → malkangiri_sub_cen.geojson
✅ Saved 28 facilities → mayurbhanj_chc.geojson
✅ Saved 1 facilities → mayurbhanj_dis_h.geojson
✅ Saved 86 facilities → mayurbhanj_phc.geojson
✅ Saved 3 facilities → mayurbhanj_s_t_h.geojson


2025-05-20 01:18:06,288 - INFO - Created 589 records
2025-05-20 01:18:06,301 - INFO - Created 11 records
2025-05-20 01:18:06,317 - INFO - Created 1 records
2025-05-20 01:18:06,349 - INFO - Created 40 records
2025-05-20 01:18:06,401 - INFO - Created 289 records
2025-05-20 01:18:06,426 - INFO - Created 12 records
2025-05-20 01:18:06,443 - INFO - Created 1 records
2025-05-20 01:18:06,463 - INFO - Created 37 records


✅ Saved 589 facilities → mayurbhanj_sub_cen.geojson
✅ Saved 11 facilities → nabarangapur_chc.geojson
✅ Saved 1 facilities → nabarangapur_dis_h.geojson
✅ Saved 40 facilities → nabarangapur_phc.geojson
✅ Saved 289 facilities → nabarangapur_sub_cen.geojson
✅ Saved 12 facilities → nayagarh_chc.geojson
✅ Saved 1 facilities → nayagarh_dis_h.geojson
✅ Saved 37 facilities → nayagarh_phc.geojson


2025-05-20 01:18:06,498 - INFO - Created 166 records
2025-05-20 01:18:06,516 - INFO - Created 6 records
2025-05-20 01:18:06,533 - INFO - Created 1 records
2025-05-20 01:18:06,549 - INFO - Created 17 records
2025-05-20 01:18:06,576 - INFO - Created 95 records
2025-05-20 01:18:06,592 - INFO - Created 16 records
2025-05-20 01:18:06,610 - INFO - Created 1 records
2025-05-20 01:18:06,635 - INFO - Created 50 records
2025-05-20 01:18:06,680 - INFO - Created 241 records
2025-05-20 01:18:06,693 - INFO - Created 11 records


✅ Saved 166 facilities → nayagarh_sub_cen.geojson
✅ Saved 6 facilities → nuapada_chc.geojson
✅ Saved 1 facilities → nuapada_dis_h.geojson
✅ Saved 17 facilities → nuapada_phc.geojson
✅ Saved 95 facilities → nuapada_sub_cen.geojson
✅ Saved 16 facilities → puri_chc.geojson
✅ Saved 1 facilities → puri_dis_h.geojson
✅ Saved 50 facilities → puri_phc.geojson
✅ Saved 241 facilities → puri_sub_cen.geojson
✅ Saved 11 facilities → rayagada_chc.geojson


2025-05-20 01:18:06,709 - INFO - Created 1 records
2025-05-20 01:18:06,737 - INFO - Created 38 records
2025-05-20 01:18:06,801 - INFO - Created 1 records
2025-05-20 01:18:06,899 - INFO - Created 235 records
2025-05-20 01:18:06,916 - INFO - Created 11 records


✅ Saved 1 facilities → rayagada_dis_h.geojson
✅ Saved 38 facilities → rayagada_phc.geojson
✅ Saved 1 facilities → rayagada_s_t_h.geojson
✅ Saved 235 facilities → rayagada_sub_cen.geojson


2025-05-20 01:18:06,932 - INFO - Created 2 records
2025-05-20 01:18:06,955 - INFO - Created 31 records
2025-05-20 01:18:06,972 - INFO - Created 2 records
2025-05-20 01:18:07,003 - INFO - Created 167 records
2025-05-20 01:18:07,015 - INFO - Created 5 records
2025-05-20 01:18:07,028 - INFO - Created 1 records
2025-05-20 01:18:07,047 - INFO - Created 20 records
2025-05-20 01:18:07,059 - INFO - Created 1 records
2025-05-20 01:18:07,091 - INFO - Created 89 records
2025-05-20 01:18:07,105 - INFO - Created 20 records
2025-05-20 01:18:07,118 - INFO - Created 2 records


✅ Saved 11 facilities → sambalpur_chc.geojson
✅ Saved 2 facilities → sambalpur_dis_h.geojson
✅ Saved 31 facilities → sambalpur_phc.geojson
✅ Saved 2 facilities → sambalpur_s_t_h.geojson
✅ Saved 167 facilities → sambalpur_sub_cen.geojson
✅ Saved 5 facilities → sonapur_chc.geojson
✅ Saved 1 facilities → sonapur_dis_h.geojson
✅ Saved 20 facilities → sonapur_phc.geojson
✅ Saved 1 facilities → sonapur_s_t_h.geojson
✅ Saved 89 facilities → sonapur_sub_cen.geojson
✅ Saved 20 facilities → sundargarh_chc.geojson
✅ Saved 2 facilities → sundargarh_dis_h.geojson


2025-05-20 01:18:07,138 - INFO - Created 60 records
2025-05-20 01:18:07,151 - INFO - Created 2 records
2025-05-20 01:18:07,210 - INFO - Created 390 records


✅ Saved 60 facilities → sundargarh_phc.geojson
✅ Saved 2 facilities → sundargarh_s_t_h.geojson
✅ Saved 390 facilities → sundargarh_sub_cen.geojson
