## This notebook imports health data from geospatial data
The main datasets are obtained from the [Directorate General of Health Services (DGHS)](https://dghs.gov.bd/index.php/en/home) - [Health Dashboard](http://103.247.238.81/webportal/pages/index.php).
- (a) The number of hospital beds per 1000 people in each Upazila
- (b) The number of physicians per 1000 people in each Upazila

In [14]:
import os
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
from functools import reduce
import fhv

# UPAZILA SHAPEFILE
# ------------------------------------------------- #
shape = gpd.read_file('./data/admin_boundary/bgd_admbnda_adm3_bbs_20180410.shp')
# Convert ADM3_PCODE of Mymensingh (45) division (total 378 unions) (45 -> 30)
f45t30 = '30' + shape.loc[shape['ADM1_PCODE'] == '45', 'ADM3_PCODE'].str[2:]
shape.loc[shape['ADM1_PCODE'] == '45', 'ADM3_PCODE'] = f45t30.values
shape['ADM3_PCODE'] = shape['ADM3_PCODE'].astype(int)
# ------------------------------------------------- #

# POPULATION DATA
# ------------------------------------------------- #
# BGD Census total population in 2011:  144,043,697
# BGD World Bank population in 2011:    149,273,778
# BGD World Bank population in 2017:    159,670,593
# ------------------------------------------------- #
df = fhv.LoadCensusBBS('./data/census2011/age 5 years group.xls')
popu2011 = df.sum(axis=1)
popu2017 = (popu2011/popu2011.sum()*159670593).astype(int)

### (a) The number of hospital beds per 1000 people in each Upazila

The health facility information is obtained from DGHS > [Facility Registry](http://facilityregistry.dghs.gov.bd/index.php) (downloaded at Apr-13-2019, Total Facilities: 23,886).


In [72]:
# The number of hospital beds per 1000 people in each Upazila
# - Tertiary Health Care: Medical College Hospitals, Specialized Institutes, Maternity Hospital Located at different regional level
# - Secondary Health Care: District Hospitals, General Hospitals, 100-250 Bed Hospitals
# - Primary Health Care: Upazila Health Complexes, TB Clinics, Upazila Family Planning Office, MCWCs
# - Daycare facilities : Upazila Sub-centers, UH&FWCs, Community Clinics
df = pd.read_excel('./data/health_facility/health_facility_bgd_aggregated.xlsx')

# Pre-processing for data mining
df.loc[df['Division'] == 'Mymensingh', 'Division'] = 'Dhaka'        # Mymensingh -> Dhaka
df.loc[df['Division'] == 'Chattogram', 'Division'] = 'Chittagong'   # Chattogram -> Chittagong
df.loc[df['District'] == 'Chattogram', 'District'] = 'Chittagong'   # Chattogram -> Chittagong
df = df[df['Upazila'].notna()]

# - Change District names to be linked
df['District'] = df['District'].replace({'Barishal':'Barisal',
                                         'Bogura':'Bogra', 
                                         'Brahmanbaria':'Brahamanbaria',
                                         'Chapai Nawabganj':'Nawabganj',
                                         'Cumilla':'Comilla', 'Jashore':'Jessore',
                                         'Kishorganj':'Kishoreganj'})

# Example for checking different names
# df.loc[~df['Upazila'].isin(shape['ADM3_EN']),['District','Upazila']].groupby(['District','Upazila']).sum()

# - Check District names are identical
assert len(df) == df['District'].isin(shape['ADM2_EN']).sum()
# - Change Upazila names to be linked
df['Upazila'] = df['Upazila'].replace({'Banaripara':'Banari Para',
                                       'Barisal Sadar':'Barisal Sadar (Kotwali)',
                                       'Charfession':'Char Fasson',
                                       'Haimchar':'Haim Char',
                                       'Jibannagar':'Jiban Nagar',
                                       'Brahmanpara':'Brahman Para',
                                       "COX'S BAZAR SADAR":"Cox's Bazar Sadar",
                                       'Saghata':'Saghatta','Kotalipara':'Kotali Para',
                                       'Tungipara':'Tungi Para',
                                       'Bagherpara':'Bagher Para',
                                       'Monirampur':'Manirampur',
                                       'Kuliarchar':'Kuliar Char',
                                       'Rajibpur':'Char Rajibpur',
                                       'Shibchar':'Shib Char',
                                       'Maulvibazar Sadar':'Maulvi Bazar Sadar',
                                       'Mujibnagar':'Mujib Nagar',
                                       'Bagatipara':'Bagati Para',
                                       'Chapai Nababganj Sadar':'Nawabganj Sadar',
                                       'Noakhali Sadar':'Noakhali Sadar (Sudharam)',
                                       'Kalapara':'Kala Para',
                                       'Banani':'Darus Salam',
                                       'Bhasan Tek':'Kotwali',
                                       'Bhatara':'Kotwali',
                                       'Mugda Para':'Ramna',
                                       'Uttara  Paschim':'Uttara',
                                       'Uttara  Purba':'Uttar Khan',
                                       'Wari':'Hazaribagh',
                                       'Jessore Sadar':'Kotwali',
                                       'Tarakanda':'Mymensingh Sadar',
                                       'Naldanga':'Natore Sadar',
                                       'Rangabali':'Galachipa',
                                       'Indurkani':'Zianagar',
                                       'Nesarabad':'Nesarabad (Swarupkati)',
                                       'Baliakandi':'Balia Kandi',
                                       'Goalanda':'Goalandaghat',
                                       'Baghaichhari':'Baghai Chhari',
                                       'Belaichhari':'Belai Chhari',
                                       'Juraichhari':'Jurai Chhari',
                                       'Kowkhali':'Kawkhali (Betbunia)',
                                       'Rangamati Sadar  Up':'Rangamati Sadar',
                                       'Mithapukur':'Mitha Pukur',
                                       'Ullahpara':'Ullah Para',
                                       'Beanibazar':'Beani Bazar',
                                       'Golapganj':'Golabganj'})
# Check Upazila names are identical
assert len(df) == df['Upazila'].isin(shape['ADM3_EN']).sum()

# Bed types
bed_type = ['Approved Bed Number','Revenue Bed Number','Development Bed Number']
# Insert ADM3_PCODE to the DataFrame
df_left = df[['District','Upazila',*bed_type]]
df_right = shape[['ADM2_EN','ADM3_EN','ADM3_PCODE']]
upazila_bed = df_left.merge(df_right, how='inner', left_on=['District','Upazila'], right_on=['ADM2_EN','ADM3_EN'])
# Upazila beds ['Approved Bed Number','Revenue Bed Number','Development Bed Number']
upazila_bed = upazila_bed[['ADM3_PCODE',*bed_type]].groupby('ADM3_PCODE').sum(skipna=True)
# Upazila beds total
upazila_bed = upazila_bed.sum(1)    # Total beds: 95,051

### The Bakalia Upazila (ADM3_PCODE: 201510) does not have any health facilities 
### even though its population is over 200,000 (checked from the website)

# The number of hospital beds per 1000 people
nbed = pd.concat([popu2017,upazila_bed],axis=1).fillna(0)
nbed.columns = ['popu', 'nbed']
nbed_per_popu = nbed['nbed'] / nbed['popu'] * 1000

### (b) The number of physicians per 1000 people in each Upazila