In [2]:
import geopandas as gpd

# Load your GeoJSON file (change the file name to yours)
gdf = gpd.read_file("nepal.geojson")

# View the first few rows
print(gdf.head())


       osm_id osm_type  completeness   amenity healthcare  \
0  8389701858     node         6.250    clinic              
1  9768423697     node         9.375    clinic              
2  5598345161     node        12.500   dentist              
3  9796614384     node         9.375    clinic              
4  5544172880     node        12.500  pharmacy              

                                  name operator     source speciality  \
0                                                                       
1               Basic Health Centre 07                                  
2  Advance Medicine service And Dental                                  
3          Kolhabi Primary Health Post                                  
4                Shakya Medical Concen           2cpokhara              

  operator_type  ... url addr_housenumber addr_street addr_postcode addr_city  \
0                ...                                                            
1                ...            

In [3]:
print(gdf.columns)


Index(['osm_id', 'osm_type', 'completeness', 'amenity', 'healthcare', 'name',
       'operator', 'source', 'speciality', 'operator_type',
       'operational_status', 'opening_hours', 'beds', 'staff_doctors',
       'staff_nurses', 'health_amenity_type', 'dispensing', 'wheelchair',
       'emergency', 'insurance', 'water_source', 'electricity',
       'is_in_health_area', 'is_in_health_zone', 'url', 'addr_housenumber',
       'addr_street', 'addr_postcode', 'addr_city', 'changeset_id',
       'changeset_version', 'changeset_timestamp', 'uuid', 'geometry'],
      dtype='object')


In [4]:
# Unique cities listed in data
print(gdf['addr_city'].unique())

# Check how many features are from each city
print(gdf['addr_city'].value_counts())

# View some metadata columns
print(gdf[['addr_city', 'changeset_id', 'changeset_version', 'changeset_timestamp']].head())


['' 'Madi Okherbote Bazzar' 'Bhairahawa' 'Nilkantha Municipality-3'
 'Chhirreshwornath' 'kathmandu municipality' 'Sankhar' 'Tansen'
 'Kathmandu' 'kathmandu' 'Birendranagar' 'Pokhara'
 'Modi Rural Municipality' 'राप्ती गाउँपालिका दाङ'
 'Bhimdutta Municipality' 'Pokhara Lekhnath Metropolitan city' 'Rampur'
 'samakhusi' 'Bhajani Municipality' 'Barahathawa,Sarlahi' 'Shivagunj'
 'Taukhel' 'Ghorahi Sub Metropolitan City 10' 'Hetauda' 'khajura-RM'
 'Annapurna Rural Municipality, Ward No. 5' 'Besishahar'
 'Dhangadhi Sub-Metropolitan City' 'Pokhara Lekhanath Metropolitan City'
 'Annapurna Rural Mun,1' 'Bharatpur' 'BABAI RM' 'Annapurna Rural Mun,4'
 'bhatkekopul,Tokha,kathmandu' 'Duduwa Rural Municipality' 'Chainpur'
 'Kathmandu Municipality' 'Gulmi,tamghas' 'Modi Gaunpalika' 'Chandragiri'
 'barahathawa  SARLAHI' 'pokhara lekhnath municipality' 'Harihar Bhawan'
 'Lubhu' 'Charikot' 'Dhangadhi Submetropolitan City-1'
 'Danchhi-5;Kathmandu' 'Lekbeshi' 'Lainchaur, Kathmandu' 'madhyapur Thimi'
 'Bhat

In [6]:
import geopandas as gpd

# Load your GeoJSON
clinics = gpd.read_file("nepal.geojson")

# Check unique values in 'amenity'
unique_amenities = clinics['amenity'].unique()
print(unique_amenities)


['clinic' 'dentist' 'pharmacy' 'hospital' 'doctors' '' 'health_post'
 'healthpost']


In [7]:
# List of Bagmati Province cities/towns
bagmati_cities = [
    # Kathmandu & surroundings
    "Kathmandu",
    "kathmandu",
    "Kathmandu Municipality",
    "kathmandu municipality",
    "Lainchaur, Kathmandu",
    "Tokha / Bhatkekopul, Tokha, Kathmandu",
    "Danchhi-5; Kathmandu",
    "Chapagaun",
    "Samakhusi",
    "Madhyapur Thimi",
    "Madhyapur Thmi",
    "Madhyapur Thimi Municipality",
    "Thecho",
    
    # Lalitpur
    "Lalitpur",
    "Godawari",
    
    # Bhaktapur
    "Lubhu",
    
    # Other towns in Bagmati (Dhading, Makwanpur, Dolakha, etc.)
    "Nilkantha Municipality-3",
    "Galchhi",
    "Charikot",
    "Hetauda",
    "Chandragiri",
    "Baddanda",
    "Kalikasthan"
]


In [8]:
# Filter for Bagmati Province
bagmati_clinics = clinics[clinics['addr_city'].isin(bagmati_cities)]

# Further filter for only 'clinic' and 'hospital' amenities
bagmati_clinics = bagmati_clinics[bagmati_clinics['amenity'].isin(['clinic', 'hospital'])]

# Optional: remove entries with missing geometry
bagmati_clinics = bagmati_clinics[bagmati_clinics.geometry.notnull()]

# Save filtered GeoJSON
bagmati_clinics.to_file("bagmati_clinics_filtered.geojson", driver="GeoJSON")

# Quick check
print(f"Number of clinics/hospitals in Bagmati: {len(bagmati_clinics)}")
print(bagmati_clinics['amenity'].value_counts())

Number of clinics/hospitals in Bagmati: 36
amenity
hospital    27
clinic       9
Name: count, dtype: int64


In [9]:
# Show first few rows
print(bagmati_clinics.head())

# Check columns
print(bagmati_clinics.columns)

# Count unique clinics/hospitals
print(bagmati_clinics['name'].nunique())

# Check distribution of amenities
print(bagmati_clinics['amenity'].value_counts())


          osm_id osm_type  completeness   amenity healthcare  \
42     361656700      way        34.375  hospital   hospital   
72     501793250      way        21.875    clinic              
246    648643548      way        31.250    clinic     clinic   
670  12750345708     node        28.125  hospital   hospital   
778   2236622338     node        18.750  hospital   hospital   

                                      name               operator source  \
42                 Sahid Memorial Hospital                private          
72                 Orchid Home Care Center                private          
246      Marie Stopes Centre - Putalisadak  Sunaulo Parivar Nepal          
670  Siddhasthali Rural Community Hospital                                 
778                         भारोसा अस्पताल                private          

                                            speciality operator_type  ... url  \
42                                                                    ...    

In [10]:
# Define scoring function
def score_clinic(row):
    text = ' '.join(str(v).lower() for v in [row.get('amenity'), row.get('speciality')])
    if any(k in text for k in ['delivery', 'maternity', 'obstetric']):
        return 10
    elif any(k in text for k in ['antenatal', 'prenatal']):
        return 7
    elif 'clinic' in text or 'health' in text:
        return 4
    return 2

# Apply scoring
bagmati_clinics['pregnancy_score'] = bagmati_clinics.apply(score_clinic, axis=1)


In [13]:
bagmati_clinics.to_file("bagmati_clinics_scored.geojson", driver="GeoJSON")


In [14]:
import geopandas as gpd

# Load your filtered/scored Bagmati clinics GeoJSON
bagmati_clinics = gpd.read_file("bagmati_clinics_scored.geojson")

# Show first 5 rows
print(bagmati_clinics.head())


        osm_id osm_type  completeness   amenity healthcare  \
0    361656700      way        34.375  hospital   hospital   
1    501793250      way        21.875    clinic              
2    648643548      way        31.250    clinic     clinic   
3  12750345708     node        28.125  hospital   hospital   
4   2236622338     node        18.750  hospital   hospital   

                                    name               operator source  \
0                Sahid Memorial Hospital                private          
1                Orchid Home Care Center                private          
2      Marie Stopes Centre - Putalisadak  Sunaulo Parivar Nepal          
3  Siddhasthali Rural Community Hospital                                 
4                         भारोसा अस्पताल                private          

                                          speciality operator_type  ...  \
0                                                                   ...   
1                               

In [17]:
columns_to_keep = [
    'name', 'amenity', 'speciality', 'addr_city', 'addr_street', 'addr_housenumber',
    'operator', 'operator_type', 'opening_hours', 'beds', 'staff_doctors', 'staff_nurses',
    'geometry', 'pregnancy_score'  # add this after scoring
]

bagmati_clinics_clean = bagmati_clinics[columns_to_keep]

# Save a clean version
bagmati_clinics_clean.to_file("bagmati_clinics_clean.geojson", driver="GeoJSON")


In [19]:
import geopandas as gpd
# Load your data
clinics = gpd.read_file("bagmati_clinics_clean.geojson")
# Count missing (NaN) values in each column
print(clinics.isnull().sum())


name                0
amenity             0
speciality          0
addr_city           0
addr_street         0
addr_housenumber    0
operator            0
operator_type       0
opening_hours       0
beds                0
staff_doctors       0
staff_nurses        0
pregnancy_score     0
geometry            0
dtype: int64
