## This notebook loads post-flood disaster damage/loss/health data 

In [89]:
import os
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas.tools import sjoin
import rasterio
from shapely.geometry import Point, Polygon
from functools import reduce
import fhv

# UPAZILA SHAPEFILE
# ------------------------------------------------- #
shape = gpd.read_file('./data/admin_boundary/bgd_admbnda_adm3_bbs_20180410.shp')
# Convert ADM3_PCODE of Mymensingh (45) division (total 378 unions) (45 -> 30)
f45t30 = '30' + shape.loc[shape['ADM1_PCODE'] == '45', 'ADM3_PCODE'].str[2:]
shape.loc[shape['ADM1_PCODE'] == '45', 'ADM3_PCODE'] = f45t30.values
shape['ADM3_PCODE'] = shape['ADM3_PCODE'].astype(int)
f45t30 = '30' + shape.loc[shape['ADM1_PCODE'] == '45', 'ADM2_PCODE'].str[2:]
shape.loc[shape['ADM1_PCODE'] == '45', 'ADM2_PCODE'] = f45t30.values
shape['ADM2_PCODE'] = shape['ADM2_PCODE'].astype(int)
ADM2 = shape[['ADM2_EN','ADM2_PCODE']].copy().drop_duplicates()
ADM2['ADM2_PCODE'] = ADM2['ADM2_PCODE'].astype(int)
if False:
    shape[['ADM2_PCODE','ADM2_EN','ADM3_PCODE','ADM3_EN']].sort_values(
        by='ADM3_PCODE').reset_index(drop=True).to_excel('./data/upazila_list.xlsx')
# ------------------------------------------------- #

# POPULATION DATA
# ------------------------------------------------- #
# BGD Census total population in 2011:  144,043,697
# BGD World Bank population in 2011:    149,273,778
# BGD World Bank population in 2017:    159,670,593
# ------------------------------------------------- #
df = fhv.LoadCensusBBS('./data/census2011/age 5 years group.xls')
popu2011 = df.sum(axis=1)
popu2017 = (popu2011/popu2011.sum()*159670593).astype(int)
popu2017_adm2 = popu2017.copy()
popu2017_adm2.index = (popu2017_adm2.index / 100).astype(int)
popu2017_adm2 = popu2017_adm2.groupby(popu2017_adm2.index).sum()
popu2017_adm2.index.name = 'ADM2_PCODE'; popu2017_adm2.name = 'Population'

### Post-flood disaster damage and loss data
The impacts (damage and loss) of 2017 August flood is obtained from Shelter Cluster DDM, MoDMR, NIRAPAD, etc.
- [Banladesh Monsoon Floods 2017, data table on Sep-3](https://www.sheltercluster.org/bangladesh-monsoon-floods-2017/documents/assessment-flood-damage-data-government-03092017)
- [Banladesh Monsoon Floods 2017, data table on Aug-30, from NIRAPAD Monthly Hazard Incident Report](https://www.nirapad.org.bd/home/resources/monthlyHazard)
- [72 hours Rapid Assessment Report NAWG V1](https://www.sheltercluster.org/bangladesh-monsoon-floods-2017/documents/nawg-72-hours-rapid-assessment-report-v1)

Here we use the following variables to represent the impacts on public health:
- Distress: Percent of affected population, Percent of displaced people, Number of death
- Damage: Number of damaged houses, Number of damaged roads (km), Number of damaged crop land (Hect)
- Disruption: Number of affected institution, Number of damaged tubewell

In [247]:
damage_table = [['PAFFCPOPU','Distress','Percent of affected population','MinMax'],
                ['PDISPPOPU','Distress','Percent of displaced population','MinMax'],
                ['NDEATH','Distress','Number of death','MinMax'],
                ['NDAMGHOUS','Damage','Number of damaged houses', 'Quantile'],
                ['DAMGROAD','Damage','Damaged roads (Km)', 'Quantile'],
                ['DAMGCLAND','Damage','Damaged crop land (Hect)', 'Quantile'],
                ['NAFFCINST','Disruption','Number of affected educational institutions','Quantile'],
                ['NDAMGTUBE','Disruption','Number of damaged tubewell','Quantile']]
damage_table = pd.DataFrame(damage_table, columns=['Name','Domain','Description','Normalization'])
damage_table['Scale'] = 'District'
damage_table

Unnamed: 0,Name,Domain,Description,Normalization,Scale
0,PAFFCPOPU,Distress,Percent of affected population,MinMax,District
1,PDISPPOPU,Distress,Percent of displaced population,MinMax,District
2,NDEATH,Distress,Number of death,MinMax,District
3,NDAMGHOUS,Damage,Number of damaged houses,Quantile,District
4,DAMGROAD,Damage,Damaged roads (Km),Quantile,District
5,DAMGCLAND,Damage,Damaged crop land (Hect),Quantile,District
6,NAFFCINST,Disruption,Number of affected educational institutions,Quantile,District
7,NDAMGTUBE,Disruption,Number of damaged tubewell,Quantile,District


In [250]:
# DDM and NDRCC published in Sep-03-2017
df = pd.read_excel('./data/disaster_records/damagedata_DDM.xlsx', 
                   sheet_name='DDM, Sep 3',skiprows=1,skipfooter=1).drop('SL', axis=1).fillna(0)
df = df[['Name of affected Districts','No of Total  affecetd Families','No of Total  damaged Houses','No of Total   damaged Crops land (Hect)','No of Death People','No of Damaged Water point (Tube well)']]
df = df.rename(columns={'Name of affected Districts': 'ADM2_EN',
                       'No of Total  affecetd Families': 'Affected families',
                       'No of Total  damaged Houses': 'Affected houses',
                       'No of Total   damaged Crops land (Hect)': 'Affected crops land (Hect)',
                       'No of Death People': 'Death',
                       'No of Damaged Water point (Tube well)': 'Damaged tube well'})
# - Change the district names to be consistent with shapefile
df['ADM2_EN'] = df['ADM2_EN'].replace({'Rajshahi Dist': 'Rajshahi',
                                       'Moulvibazar': 'Maulvibazar',
                                       'Sunamjanj': 'Sunamganj',
                                       'Netrokona': 'Netrakona'})
# - Merge with ADM2 (Name and Code) just like join
df = pd.merge(df,ADM2,how='inner',left_on='ADM2_EN',right_on='ADM2_EN').drop('ADM2_EN',axis=1)
df0903 = df[df.columns[[-1,0,1,2,3,4]]]
# We will remove the Rajshahi district (ADM2_PCODE: 5081)
df0903 = df0903.loc[df0903['ADM2_PCODE'] != 5081].reset_index(drop=True)


# DDM and NDRCC published in Aug-30-2017
df = pd.read_excel('./data/disaster_records/damagedata_DDM.xlsx', 
                   sheet_name='DDM, Aug 30 (modified)',skiprows=0,skipfooter=1).fillna(0)
df = df.rename(columns={'Affected Districts': 'ADM2_EN',
                        'Affected People ( %. of total population)':'Affected population',
                        'No. of Damaged House':'Damaged houses',
                        'Affected Crops land (Hec.)':'Affected crops land (Hect)',
                        'No. of Death':'Death',
                        'No. of Displaced':'Displaced',
                        'No. of Affected Tube well':'Damaged tube well'})
# - Change the district names to be consistent with shapefile
df['ADM2_EN'] = df['ADM2_EN'].replace({'Brahmanbaria': 'Brahamanbaria',
                                       'Chadpur': 'Chandpur',
                                       'Moulvibazar': 'Maulvibazar',
                                       'Munsiganj': 'Munshiganj',
                                       'Netrokona': 'Netrakona',
                                       'Panchaghar': 'Panchagarh'})
# - Merge with ADM2 (Name and Code) just like join
df = pd.merge(df,ADM2,how='inner',left_on='ADM2_EN',right_on='ADM2_EN').drop('ADM2_EN',axis=1)
df0830 = df[df.columns[[-1,3,4,5,6,7,8,9,10,11,12,13]]]

# Merge to single DataFrame
temp1 = df0830[['ADM2_PCODE', 'Affected population', 'Affected Institution', 'Affected Road (km)', 'Displaced']]
temp2 = df0903[['ADM2_PCODE', 'Affected houses', 'Affected crops land (Hect)', 'Death', 'Damaged tube well']]
damage = pd.merge(temp1, temp2, how='outer', left_on='ADM2_PCODE', right_on='ADM2_PCODE')

# Merge with population and Percentage of disp
damage = pd.merge(damage,popu2017_adm2,how='inner',left_on='ADM2_PCODE', right_on='ADM2_PCODE')
damage['Displaced'] = damage['Displaced']/damage['Population']*100
damage = damage.drop('Population', axis=1).set_index('ADM2_PCODE')

# Reorder and Rename DataFrame
damage = damage.rename(columns={'Affected population':'PAFFCPOPU',
                                'Affected Institution':'NAFFCINST',
                                'Affected Road (km)':'DAMGROAD',
                                'Displaced':'PDISPPOPU',
                                'Affected houses':'NDAMGHOUS',
                                'Affected crops land (Hect)':'DAMGCLAND',
                                'Death':'NDEATH',
                                'Damaged tube well':'NDAMGTUBE'})
damage = damage[damage_table.Name]

Unnamed: 0_level_0,PAFFCPOPU,PDISPPOPU,NDEATH,NDAMGHOUS,DAMGROAD,DAMGCLAND,NAFFCINST,NDAMGTUBE
ADM2_PCODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
5010,10.77,0.515356,0,9602,325.0,39170,124,2124
2012,0.12,0.0,0,73,308.0,1180,6,0
2019,2.35,0.0,2,284,880.0,3175,0,0
2013,0.23,0.006124,0,1696,614.0,57,30,164
5527,4.73,0.0,30,47247,1996.0,121170,500,2163


### Post-flood health data
The post-flood health data is obtained from the [Directorate General of Health Services (DGHS)](https://dghs.gov.bd/index.php/en/home) - [Health Dashboard](http://103.247.238.81/webportal/pages/index.php).</br>
In the dashboard, the DGHS made a page for public health situation during/after the 2017 (July-August) flood in the Tableau format.
- By removing the duplicate rows, the numbers are similar to the [dashboards](http://103.247.238.81/webportal/pages/flood_affected.php).
- There are some differences from different periods of data
 
Here we use the following variables to represent the impacts on public health:
- Injuries Trauma Affected
- Diarrhoea Affected
- RTI, Drowning, snake bite, injury, eye disease, skin disease, and other cases

In [253]:
phealth_table = [['NTRAUMA','Health','Number of people with trauma from injuries','Quantile'],
                 ['NDIARRHEA','Health','Number of diarrhea cases','Quantile'],
                 ['NODIEASE','Health','Number of other diseases','Quantile']]
phealth_table = pd.DataFrame(phealth_table, columns=['Name','Domain','Description','Normalization'])
phealth_table['Scale'] = 'District'
phealth_table

Unnamed: 0,Name,Domain,Description,Normalization,Scale
0,NTRAUMA,Health,Number of people with trauma from injuries,Quantile,District
1,NDIARRHEA,Health,Number of diarrhea cases,Quantile,District
2,NODIEASE,Health,Number of other diseases,Quantile,District


In [256]:
# (1) Health records from Jul-01-2017 to Aug-15-2017 (Upazila scale)
cols = ['Division Code', 'Division Name',
        'District Code', 'District Name',
        'Upazila Code', 'Upazila Name',
        'Diarrhoea Affected', 'Diarrhoea Death',
        'Drowning Affected', 'Drowning Death',
        'Injuries Trauma Affected','Injuries Trauma Death',
        'Skin Disease Affected',
        'Snakebite Affected','Snakebite Death',
        'Is This Upazilla Currently Flood Affected',
#         'Latitude', 'Longitude',
        'Period']
df = pd.read_excel('./data/health_impact_2017Flood/dhis2_flood_affected (dbmis).xlsx',usecols=cols)[cols]
# - Convert Mymensingh (45) to Dhaka (30)
adm1_pcode = df['Division Code'].copy(); adm1_pcode.loc[adm1_pcode == 45] = 30
# - Assign ADM2_PCODE and ADM3_PCODE
df['ADM2_PCODE'] = adm1_pcode*10**2 + df['District Code']
assert np.isin(df['ADM2_PCODE'].unique(), shape.ADM2_PCODE).sum() == len(df['ADM2_PCODE'].unique())
df['ADM3_PCODE'] = adm1_pcode*10**4 + df['District Code']*10**2 + df['Upazila Code']
assert np.isin(df['ADM3_PCODE'].unique(), shape.ADM3_PCODE).sum() == len(df['ADM3_PCODE'].unique())
# - Reorder the DataFrame
df['Date'] = pd.DatetimeIndex(pd.to_datetime(df['Period'],format='%Y%m%d'))
df = df[['ADM2_PCODE','Date',*cols[6:-1]]]
# - Remove duplicate rows
df = df.drop_duplicates()
# - Group by ADM2_PCODE
health_trauma = df.groupby('ADM2_PCODE')['Injuries Trauma Affected'].sum()
health_trauma.name = 'Trauma'


# (2) Health data from Jul-23-2017 to Aug-26-2017 (District scale)
df = pd.read_excel('./data/health_impact_2017Flood/controlroom_flood (dbmis).xlsx')
df = df[['District',*df.columns[6:-1]]]
df = df.groupby('District').sum().reset_index()
df = df.rename(columns={'District':'ADM2_EN'})
df['ADM2_EN'] = df['ADM2_EN'].replace({'Brahmanbaria': 'Brahamanbaria'})
df = pd.merge(df,ADM2,how='inner',left_on='ADM2_EN',right_on='ADM2_EN').drop('ADM2_EN',axis=1)
df = df.set_index('ADM2_PCODE')
# - Diarrhea and Sum of other disease cases
col_other = ['No of RTI cases','No Of Eye Disease cases','No Of Drowning cases',
             'No Of Eye Disease Deaths','No Of Injury cases','No Of Injury Deaths',
             'No Of Snake Bite cases','No Of Snake Bite Deaths','No Of Skin Disease cases','No Of Other cases']
health_diarrhea = df['No Of Diarrhea Cases']
health_diarrhea.name = 'Diarrhea'
health_other = df[col_other].sum(1)
health_other.name = 'Other'
health_disease = pd.merge(health_diarrhea,health_other, how='inner', left_index=True, right_index=True)

# Merged DataFrame
phealth = pd.merge(health_trauma,health_disease,how='outer',left_index=True,right_index=True).fillna(0)
phealth = phealth[phealth.sum(1) > 0]

# Reorder and Rename Dataframe
phealth = phealth.rename(columns={'Trauma':'NTRAUMA',
                                  'Diarrhea':'NDIARRHEA',
                                  'Other':'NODIEASE'})

Unnamed: 0_level_0,NTRAUMA,NDIARRHEA,NODIEASE
ADM2_PCODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1004,1855.0,0.0,0.0
2012,251.0,0.0,0.0
2013,0.0,10.0,5.0
2019,365.0,120.0,0.0
3026,0.0,110.0,594.0


### Save the data

In [267]:
impact_table = pd.concat([damage_table, phealth_table]).reset_index(drop=True)
impact = pd.merge(damage, phealth, how='outer',left_index=True, right_index=True).fillna(0)

# Save data
if True:
    fn = './data/impact.hdf'
    health.to_hdf(fn, 'data'); print('%s is saved.' % fn)
    fn = './data/health_table.hdf'
    health_table.to_hdf(fn, 'table'); print('%s is saved.' % fn)

Unnamed: 0_level_0,PAFFCPOPU,PDISPPOPU,NDEATH,NDAMGHOUS,DAMGROAD,DAMGCLAND,NAFFCINST,NDAMGTUBE,NTRAUMA,NDIARRHEA,NODIEASE
ADM2_PCODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1855.0,0.0,0.0
2012,0.12,0.0,0.0,73.0,308.0,1180.0,6.0,0.0,251.0,0.0,0.0
2013,0.23,0.006124,0.0,1696.0,614.0,57.0,30.0,164.0,0.0,10.0,5.0
2019,2.35,0.0,2.0,284.0,880.0,3175.0,0.0,0.0,365.0,120.0,0.0
2084,4.9,0.0,0.0,3560.0,20.0,2370.0,0.0,0.0,0.0,0.0,0.0
3026,1.52,0.0,0.0,11481.0,3.0,380.0,144.0,0.0,0.0,110.0,594.0
3029,2.6,0.0,0.0,520.0,3.0,1731.0,47.0,0.0,0.0,84.0,278.0
3039,41.11,0.0,18.0,20257.0,1104.0,50127.0,853.0,6871.0,237.0,1128.0,873.0
3054,0.8,0.000387,0.0,0.0,8.0,362.0,0.0,5.0,0.0,75.0,249.0
3056,9.75,0.0,2.0,51820.0,288.0,16729.0,210.0,5996.0,0.0,256.0,643.0
