## This notebook imports health data from geospatial data

In [3]:
import os
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas.tools import sjoin
import rasterio
from shapely.geometry import Point, Polygon
from functools import reduce
import fhv

# UPAZILA SHAPEFILE
# ------------------------------------------------- #
shape = gpd.read_file('./data/admin_boundary/bgd_admbnda_adm3_bbs_20180410.shp')
# Convert ADM3_PCODE of Mymensingh (45) division (total 378 unions) (45 -> 30)
f45t30 = '30' + shape.loc[shape['ADM1_PCODE'] == '45', 'ADM3_PCODE'].str[2:]
shape.loc[shape['ADM1_PCODE'] == '45', 'ADM3_PCODE'] = f45t30.values
shape['ADM3_PCODE'] = shape['ADM3_PCODE'].astype(int)
if False:
    shape[['ADM2_PCODE','ADM2_EN','ADM3_PCODE','ADM3_EN']].sort_values(
        by='ADM3_PCODE').reset_index(drop=True).to_excel('./data/upazila_list.xlsx')
# ------------------------------------------------- #

# POPULATION DATA
# ------------------------------------------------- #
# BGD Census total population in 2011:  144,043,697
# BGD World Bank population in 2011:    149,273,778
# BGD World Bank population in 2017:    159,670,593
# ------------------------------------------------- #
df = fhv.LoadCensusBBS('./data/census2011/age 5 years group.xls')
popu2011 = df.sum(axis=1)
popu2017 = (popu2011/popu2011.sum()*159670593).astype(int)

### The main datasets are obtained from the [Directorate General of Health Services (DGHS)](https://dghs.gov.bd/index.php/en/home) - [Health Dashboard](http://103.247.238.81/webportal/pages/index.php).
- (a) The number of hospital beds per 1,000 people in each Upazila
- (b) The number of physicians per 10,000 people in each Upazila

In [4]:
health_table = [['NHOSPITALBED','neg','Person','Health','Number of hospital beds per 1,000 people','MinMax'],
                ['NPHYSICIAN','neg','Person','Health','Number of physicians per 10,000 people','MinMax']]
health_table = pd.DataFrame(health_table, columns=['Name','Sign','Type','Domain','Description','Normalization'])
health_table['Scale'] = 'Upazila'
health_table['Source'] = 'DGHS (2020)'
health_table

Unnamed: 0,Name,Sign,Type,Domain,Description,Normalization,Scale,Source
0,NHOSPITALBED,neg,Person,Health,"Number of hospital beds per 1,000 people",MinMax,Upazila,DGHS (2020)
1,NPHYSICIAN,neg,Person,Health,"Number of physicians per 10,000 people",MinMax,Upazila,DGHS (2020)


### (a) NHOSPITALBED: Number of hospital beds per 1,000 people

- The health facility information is obtained from DGHS > [Facility Registry](http://facilityregistry.dghs.gov.bd/index.php) (downloaded at Apr-13-2019, Total Facilities: 23,886).
- The national average is 0.6 (World Bank: 0.8 in 2015).


In [5]:
# The number of hospital beds per 1000 people in each Upazila
# - Tertiary Health Care: Medical College Hospitals, Specialized Institutes, Maternity Hospital Located at different regional level
# - Secondary Health Care: District Hospitals, General Hospitals, 100-250 Bed Hospitals
# - Primary Health Care: Upazila Health Complexes, TB Clinics, Upazila Family Planning Office, MCWCs
# - Daycare facilities : Upazila Sub-centers, UH&FWCs, Community Clinics
df = pd.read_excel('./data/health_facility/health_facility_bgd_aggregated.xlsx')

# Pre-processing for data manipulating
df.loc[df['Division'] == 'Mymensingh', 'Division'] = 'Dhaka'        # Mymensingh -> Dhaka
df.loc[df['Division'] == 'Chattogram', 'Division'] = 'Chittagong'   # Chattogram -> Chittagong
df.loc[df['District'] == 'Chattogram', 'District'] = 'Chittagong'   # Chattogram -> Chittagong
df = df[df['Upazila'].notna()]

# - Check District names to be linked
df['District'] = df['District'].replace({'Barishal':'Barisal',
                                         'Bogura':'Bogra', 
                                         'Brahmanbaria':'Brahamanbaria',
                                         'Chapai Nawabganj':'Nawabganj',
                                         'Cumilla':'Comilla', 'Jashore':'Jessore',
                                         'Kishorganj':'Kishoreganj'})

# Example for checking different names
# df.loc[~df['Upazila'].isin(shape['ADM3_EN']),['District','Upazila']].groupby(['District','Upazila']).sum()

# - Check District names are identical
assert len(df) == df['District'].isin(shape['ADM2_EN']).sum()
# - Check Upazila names to be linked
df['Upazila'] = df['Upazila'].replace({'Banaripara':'Banari Para',
                                       'Barisal Sadar':'Barisal Sadar (Kotwali)',
                                       'Charfession':'Char Fasson',
                                       'Haimchar':'Haim Char',
                                       'Jibannagar':'Jiban Nagar',
                                       'Brahmanpara':'Brahman Para',
                                       "COX'S BAZAR SADAR":"Cox's Bazar Sadar",
                                       'Saghata':'Saghatta','Kotalipara':'Kotali Para',
                                       'Tungipara':'Tungi Para',
                                       'Bagherpara':'Bagher Para',
                                       'Monirampur':'Manirampur',
                                       'Kuliarchar':'Kuliar Char',
                                       'Rajibpur':'Char Rajibpur',
                                       'Shibchar':'Shib Char',
                                       'Maulvibazar Sadar':'Maulvi Bazar Sadar',
                                       'Mujibnagar':'Mujib Nagar',
                                       'Bagatipara':'Bagati Para',
                                       'Chapai Nababganj Sadar':'Nawabganj Sadar',
                                       'Noakhali Sadar':'Noakhali Sadar (Sudharam)',
                                       'Kalapara':'Kala Para',
                                       'Banani':'Darus Salam',
                                       'Bhasan Tek':'Kotwali',
                                       'Bhatara':'Kotwali',
                                       'Mugda Para':'Ramna',
                                       'Uttara  Paschim':'Uttara',
                                       'Uttara  Purba':'Uttar Khan',
                                       'Wari':'Hazaribagh',
                                       'Jessore Sadar':'Kotwali',
                                       'Tarakanda':'Mymensingh Sadar',
                                       'Naldanga':'Natore Sadar',
                                       'Rangabali':'Galachipa',
                                       'Indurkani':'Zianagar',
                                       'Nesarabad':'Nesarabad (Swarupkati)',
                                       'Baliakandi':'Balia Kandi',
                                       'Goalanda':'Goalandaghat',
                                       'Baghaichhari':'Baghai Chhari',
                                       'Belaichhari':'Belai Chhari',
                                       'Juraichhari':'Jurai Chhari',
                                       'Kowkhali':'Kawkhali (Betbunia)',
                                       'Rangamati Sadar  Up':'Rangamati Sadar',
                                       'Mithapukur':'Mitha Pukur',
                                       'Ullahpara':'Ullah Para',
                                       'Beanibazar':'Beani Bazar',
                                       'Golapganj':'Golabganj'})
# Check Upazila names are identical
assert len(df) == df['Upazila'].isin(shape['ADM3_EN']).sum()

# Bed types
bed_type = ['Approved Bed Number','Revenue Bed Number','Development Bed Number']
# Insert ADM3_PCODE to the DataFrame
df_left = df[['District','Upazila',*bed_type]]
df_right = shape[['ADM2_EN','ADM3_EN','ADM3_PCODE']]
upazila_bed = df_left.merge(df_right, how='inner', left_on=['District','Upazila'], right_on=['ADM2_EN','ADM3_EN'])
# Upazila beds ['Approved Bed Number','Revenue Bed Number','Development Bed Number']
upazila_bed = upazila_bed[['ADM3_PCODE',*bed_type]].groupby('ADM3_PCODE').sum(skipna=True)
# Upazila beds total
upazila_bed = upazila_bed.sum(1)    # Total beds: 95,051

### The Bakalia Upazila (ADM3_PCODE: 201510) does not have any health facilities 
### even though its population is over 200,000 (checked from the website)

# The number of hospital beds per 1000 people
nbed = pd.concat([popu2017,upazila_bed],axis=1).fillna(0)
nbed.columns = ['popu', 'nbed']
nbed_per_popu = nbed['nbed'] / nbed['popu'] * 1000

# National average: 0.595
nbed1000 = nbed.sum()[1]/nbed.sum()[0]*1000

### (b) NPHYSICIAN: Number of physicians per 10,000 people
- The physician data is obtained from the [Directorate General of Health Services (DGHS)](https://dghs.gov.bd/index.php/en/home) - [Health Dashboard](http://103.247.238.81/webportal/pages/index.php).
- The original data was Tableau format and extracted as the csv (Excel) format (downloaded at Apr-13-2019, Total Physicians: 9,313).
- The naitonal average is 0.58 (World Bank: 0.53 in 2017).

In [6]:
temp = pd.read_excel('./data/health_physician/hrm.xlsx')
df = temp.copy()
# The Spatial Join is much efficient, however, the coordinates of physicians are not correct.
# Because we found many points out of the national boundary.
# -------- #
# # Source: https://gis.stackexchange.com/a/165413/161718
# point = gpd.GeoDataFrame(df['Number of Records'], geometry=gpd.points_from_xy(df.Longitude, df.Latitude), crs = {'init' :'epsg:4326'})
# shape_sub = shape[['ADM3_PCODE','geometry']]
# pointInPolys = sjoin(point, shape_sub, how='left', op='within')
# pointInPolys
# -------- #

In [7]:
df = temp[['Division Name','District Name','Upazila Name']]
df.columns = ['Division','District','Upazila']

# Pre-processing for data manipulating
df = df[df['Upazila'].notna()].reset_index(drop=True)

# - Check District names to be linked
df['District'] = df['District'].replace({'Kishorgonj':'Kishoreganj',
                                         'Brahmanbaria':'Brahamanbaria',
                                         'Chapai Nawabganj':'Nawabganj'})
assert len(df) == df['District'].isin(shape['ADM2_EN']).sum()

# - Check Upazila names to be linked
# Example for checking different names 
# df.loc[~df['Upazila'].isin(shape['ADM3_EN']),['Division','District','Upazila']].groupby(['District','Upazila']).count().index
df['Upazila'] = df['Upazila'].replace({'Barisal Sadar (kotwali)':'Barisal Sadar (Kotwali)',
                                       'Daulat Khan':'Daulatkhan',
                                       "COX'S BAZAR SADAR":"Cox's Bazar Sadar",
                                       'Banani':'Darus Salam',
                                       'Saghata':'Saghatta',
                                       'Kotalipara':'Kotali Para',
                                       'Tungipara':'Tungi Para',
                                       'Sarishabari Upazila':'Sarishabari',
                                       'Jessore Sadar':'Kotwali',
                                       'Monirampur':'Manirampur',
                                       'Shibchar':'Shib Char',
                                       'Maulvibazar Sadar':'Maulvi Bazar Sadar',
                                       'Tarakanda':'Mymensingh Sadar',
                                       'Bagatipara':'Bagati Para',
                                       'Chapai Nababganj Sadar':'Nawabganj Sadar',
                                       'Domar Upazila':'Domar',
                                       'Saidpur Upazila':'Saidpur',
                                       'Noakhali Sadar':'Noakhali Sadar (Sudharam)',
                                       'Kalapara':'Kala Para',
                                       'Indurkani':'Zianagar',
                                       'Nesarabad (swarupkati)':'Nesarabad (Swarupkati)',
                                       'Baliakandi':'Balia Kandi',
                                       'Goalanda':'Goalandaghat',
                                       'Baghaichhari':'Baghai Chhari',
                                       'Belai Chhari  Upazi':'Belai Chhari',
                                       'Jurai Chhari Upazil':'Jurai Chhari',
                                       'Kaptai  Upazila':'Kaptai',
                                       'Kawkhali (betbunia)':'Kawkhali (Betbunia)',
                                       'Langadu  Upazila':'Langadu',
                                       'Naniarchar  Upazila':'Naniarchar',
                                       'Rangamati Sadar  Up':'Rangamati Sadar',
                                       'Golapganj':'Golabganj'})
assert len(df) == df['Upazila'].isin(shape['ADM3_EN']).sum()

# Insert ADM3_PCODE to the DataFrame
df_left = df[['District','Upazila']]
df_right = shape[['ADM2_EN','ADM3_EN','ADM3_PCODE']]
upazila_phys = df_left.merge(df_right, how='inner', left_on=['District','Upazila'], right_on=['ADM2_EN','ADM3_EN'])
upazila_phys = upazila_phys[['ADM2_EN','ADM3_PCODE']].groupby('ADM3_PCODE').count()

# The number of physicians per 10000 people
nphys = pd.concat([popu2017,upazila_phys],axis=1).fillna(0)
nphys.columns = ['popu', 'nphys']
nphys_per_popu = nphys['nphys'] / nphys['popu'] * 1000

# National average: 0.583
nphys10000 = nphys.sum()[1]/nphys.sum()[0]*10000

### Save the data

In [13]:
health = pd.concat([nbed_per_popu, nphys_per_popu],axis=1)
health.columns = ['NHOSPITALBED', 'NPHYSICIAN']
assert all(health.index == popu2017.index)
health.index.name = 'ADM3_PCODE'

# Save data
if True:
    fn = './data/health.hdf'
    health.to_hdf(fn, 'data'); print('%s is saved.' % fn)
    fn = './data/health_table.hdf'
    health_table.to_hdf(fn, 'table'); print('%s is saved.' % fn)

./data/health.hdf is saved.
./data/health_table.hdf is saved.
