In [2]:
import pandas as pd
import h3
import folium
from shapely.geometry import Polygon

## Load Health Centres Data

In [3]:
health_df = pd.read_csv('TS_health_centres.csv')
print("Health centers loaded:", health_df.shape)

Health centers loaded: (7001, 13)


In [4]:
print(health_df.isnull().sum())

State                    0
District                 0
Taluka                 277
Facility Name            0
Facility Type            0
Latitude                12
longitude               12
Street                 578
Landmark               611
Locality               419
Region Indicator        17
Operational Status       0
Ownership Authority     18
dtype: int64


## Function to ger h3 index for res 7

In [5]:
def latlon_to_h3(lat, lon, res=7):
    return h3.latlng_to_cell(lat, lon, res)

## Adding h3 index to health centres

In [None]:
# remove rows with invalid coordinates
health_df = health_df[
    (health_df['Latitude'].between(-90, 90)) &
    (health_df['longitude'].between(-180, 180))
].dropna(subset=['Latitude', 'longitude'])

# assign h3 index
health_df['h3_index'] = health_df.apply(
    lambda row: latlon_to_h3(row['Latitude'], row['longitude']), axis=1
)

## Count health centers per h3 cell

In [7]:
health_counts = health_df.groupby('h3_index').size().reset_index(name='health_centers')
print("Health centers grouped by H3:", health_counts.shape)

Health centers grouped by H3: (4612, 2)


## Load the population file

In [None]:
population_file = 'TS_density_all_points.csv'

chunk_size = 10**6  # 1 million rows per chunk
pop_per_h3 = {}

for chunk in pd.read_csv(population_file, chunksize=chunk_size):
    # only valid coordinates
    chunk = chunk[
        (chunk['Latitude'].between(-90, 90)) &
        (chunk['Longitude'].between(-180, 180))
    ].dropna(subset=['Latitude', 'Longitude'])

    # remove invalid population values
    chunk = chunk[chunk['Value'] != -99999.0]

    # drop NaNs in Value if present
    chunk = chunk.dropna(subset=['Value'])

    # compute h3 index for each population record
    chunk['h3_index'] = chunk.apply(
        lambda row: latlon_to_h3(row['Latitude'], row['Longitude']), axis=1
    )

    # aggregate population value per h3 cell
    pop_chunk = chunk.groupby('h3_index')['Value'].sum()
    for h3_idx, val in pop_chunk.items():
        pop_per_h3[h3_idx] = pop_per_h3.get(h3_idx, 0) + val

    print(f"Processed chunk, current total hexagons: {len(pop_per_h3)}")


Processed chunk, current total hexagons: 1721
Processed chunk, current total hexagons: 3362
Processed chunk, current total hexagons: 4991
Processed chunk, current total hexagons: 6589
Processed chunk, current total hexagons: 8198
Processed chunk, current total hexagons: 9778
Processed chunk, current total hexagons: 11381
Processed chunk, current total hexagons: 12967
Processed chunk, current total hexagons: 14550
Processed chunk, current total hexagons: 16127
Processed chunk, current total hexagons: 17728
Processed chunk, current total hexagons: 19319
Processed chunk, current total hexagons: 20965
Processed chunk, current total hexagons: 21657


- convert population dict to dataframe

In [9]:
pop_df = pd.DataFrame(list(pop_per_h3.items()), columns=['h3_index', 'population'])

* merge health centres and population dataframes on h3_index

In [10]:
df = pd.merge(pop_df, health_counts, on='h3_index', how='left')
df['health_centers'] = df['health_centers'].fillna(0).astype(int)

In [None]:
# calculate area of each h3 cell
df['area_km2'] = df['h3_index'].apply(lambda x: h3.cell_area(x, unit='km^2'))

## Overall Statistics

In [12]:
total_health_centers = df['health_centers'].sum()
total_area = df['area_km2'].sum()
covered_area = df.loc[df['health_centers'] > 0, 'area_km2'].sum()
not_covered_area = total_area - covered_area
pop_covered = df.loc[df['health_centers'] > 0, 'population'].sum()
pop_not_covered = df['population'].sum() - pop_covered

In [13]:
print(f"Total health care centers: {total_health_centers:,}", )
print(f"Total area covered by health care centers (km2): {covered_area:,.2f}")
print(f"Total area not covered by health care centers (km2): {not_covered_area:,.2f}")
print(f"Total population covered by health care centers: {int(pop_covered):,}")
print(f"Total population not covered by health care centers: {int(pop_not_covered):,}")

Total health care centers: 6,945
Total area covered by health care centers (km2): 24,284.02
Total area not covered by health care centers (km2): 90,826.34
Total population covered by health care centers: 19,859,247
Total population not covered by health care centers: 19,907,615


In [14]:
print(f"Total population: {df['population'].sum():,.0f}")
print(f"Average population per h3 cell: {df['population'].mean():,.0f}")
print(f"Cells with zero population: {(df['population'] == 0).sum()}")


Total population: 39,766,863
Average population per h3 cell: 1,836
Cells with zero population: 0


## Folium map

In [None]:
import json

# telangana approx center
map_center = [17.5, 79]
m = folium.Map(location=map_center, zoom_start=8)

def h3_to_polygon(h3_index):
    boundary = h3.cell_to_boundary(h3_index)
    return Polygon([(lon, lat) for lat, lon in boundary])

# add only green polygons where health centers > 0
for _, row in df.iterrows():
    if row['health_centers'] > 0:
        poly = h3_to_polygon(row['h3_index'])
        coords = [(lat, lon) for lon, lat in poly.exterior.coords]

        folium.Polygon(
            locations=coords,
            color='green',
            fill=True,
            fill_opacity=0.4,
            weight=1,
            popup=(
                f"Population: {int(row['population']):,}<br>"
                f"Health Centers: {row['health_centers']:,}<br>"
                f"Area (km²): {row['area_km2']:,.2f}"
            )
        ).add_to(m)

# add telangana boundary outline
with open("../Boundaries/Telangana/TS_State_Boundary.geojson", "r") as f:
    ts_boundary = json.load(f)

folium.GeoJson(
    ts_boundary,
    name="Telangana Boundary",
    style_function=lambda x: {
        'fillColor': 'none',
        'color': 'black',
        'weight': 2
    }
).add_to(m)

# stats box
stats_html = f"""
<div style="position: fixed; 
            top: 50px; left: 50px; width: 250px; 
            z-index: 9999; font-size: 14px; 
            background-color: white; padding: 10px; 
            border: 2px solid black;">
<b>Total Health Centers:</b> {total_health_centers:,}<br>
<b>Area Covered (km²):</b> {covered_area:,.2f}<br>
<b>Area Not Covered (km²):</b> {not_covered_area:,.2f}<br>
<b>Population Covered:</b> {pop_covered:,.0f}<br>
<b>Population Not Covered:</b> {pop_not_covered:,.0f}
</div>
"""
m.get_root().html.add_child(folium.Element(stats_html))

# save map
m.save('telangana_health_map.html')
m
