In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from tabula import read_pdf

### Bangladesh Population in union scale
- [BBS Geo Location Registry](http://app.dghs.gov.bd/bbscode/)
- Bangladesh population is **144,043,697** (BBS) and **149,273,778** (WorldBank) in 2011, and **159,670,593** (WorldBank) in 2017.
- 'bgd_admbnda_adm4_bbs_20180410.shp' has 5,160 unions
- Municipal Corporations, also known as **paurasabva**, are the local governing bodies of the cities and towns in Bangladesh. There are 327 such municipal corporations in eight divisions of Bangladesh. 
- The Paurashava consists of wards but is represented as a single unity with PCODE likes "XXXXXX99". However, the city coporation has its own wards that spatially represented in the shapefile.
- Mymensingh (45) division consists of 4 districts: Sherpur (4589), Jamalpur (4539), Mymensingh (4561), Netrakona (4572). These districts are included into Dhaka division (30) in the census data.

### Load BBS Geocode Union PDFs
We need to import this files in order to identify/classify city corporations and paurasava.

In [25]:
# Barisal (10)
df = read_pdf('./data/union/Geocode Union_Barisal2015.pdf',pages='all',multiple_tables=False,
              pandas_options={'header':0,'skiprows':3})
df10 = df.drop(df.columns[-1], axis=1).dropna(axis=0, how='all').reset_index(drop=True)
df10.columns = ['Division','Zila','Upazila','Paurasava','Union','Name']
# Chittagong (20)
df = pd.read_excel('./data/union/Geocode Union_Chittagong2015.xlsx',header=0, skiprows=1)
df = df.dropna(axis=0, how='all').reset_index(drop=True)
df.columns = ['Division','Zila','Upazila','Paurasava','Union','Name']
df = df.loc[df['Division'] != 'Division']
df20 = df.loc[df['Division'] != -1]
# Dhaka (30)
df = read_pdf('./data/union/Geocode Union_Dhaka2015.pdf',pages='all',multiple_tables=False,
              pandas_options={'header':0,'skiprows':2})
df30 = df.dropna(axis=0, how='all').reset_index(drop=True)
df30.columns = ['Division','Zila','Upazila','Paurasava','Union','Name']
# Khulna (40)
df = read_pdf('./data/union/Geocode Union_Khulna2015.pdf',pages='all',multiple_tables=False,
              pandas_options={'header':0,'skiprows':2})
df40 = df.dropna(axis=0, how='all').reset_index(drop=True)
df40.columns = ['Division','Zila','Upazila','Paurasava','Union','Name']
# Rajshahi (50)
df = read_pdf('./data/union/Geocode Union_Rajshahi2015.pdf',pages='all',multiple_tables=False,
              pandas_options={'header':0,'skiprows':2})
df50 = df.dropna(axis=0, how='all').reset_index(drop=True)
df50.columns = ['Division','Zila','Upazila','Paurasava','Union','Name']
# Rangpur (55)
df = read_pdf('./data/union/Geocode Union_Rangpur2015.pdf',pages='all',multiple_tables=False,
              pandas_options={'header':0,'skiprows':2})
df55 = df.dropna(axis=0, how='all').reset_index(drop=True)
df55.columns = ['Division','Zila','Upazila','Paurasava','Union','Name']
# Sylhet (60)
df = read_pdf('./data/union/Geocode Union_Sylhet2015.pdf',pages='all',multiple_tables=False,
              pandas_options={'header':0,'skiprows':2})
df60 = df.dropna(axis=0, how='all').reset_index(drop=True)
df60.columns = ['Division','Zila','Upazila','Paurasava','Union','Name']
# Merge
df = pd.concat([df10, df20, df30, df40, df50, df55, df60])
geocode = df.reset_index(drop=True)

In [152]:
# Remove unnecessary rows
temp = geocode.copy()
remove = (temp['Name'].isna()) | (temp['Name'] == 'Name') | (temp['Name'] == '(6)')
temp = temp[~remove]
# Split merged codes in Paurasava column
targ = temp['Paurasava'].str.len() == 5
new = temp.loc[targ, 'Paurasava'].str.split(' ',n=1,expand = True)
temp.loc[targ, 'Paurasava'] = new[0]
temp.loc[targ, 'Union'] = new[1]
temp = temp.reset_index(drop=True)
# Change Dtype (float first then Int64)
temp[temp.columns[:-1]] = temp[temp.columns[:-1]].astype(np.float)
temp[temp.columns[:-1]] = temp[temp.columns[:-1]].astype("Int64")
# temp.head(50)
temp.to_excel('./data/union/Geocode_temp.xlsx')

In [147]:
# 
# 



### Load Union Statistics (BBS, 2011)

In [172]:
# Load 
df = pd.read_excel('./data/union/union_stats_extracted.xlsx',
                   skiprows=0,header=0,skipfooter=0)
df.insert(3, "Paurashava",np.full(df.shape[0], np.nan))

In [169]:
temp = df.copy()
temp

In [170]:
df

Unnamed: 0,Division,Zila,Upazila,t,Paurashava,Union,Name,Area (in Acres),Number of Household,all_both,...,1014_all,1014_male,1014_female,18p_all,18p_male,18p_female,lit7p_all,lit7p_male,lit7p_female,Married Women Aged 15-49 Years
0,,,,,,,Bangladesh,,32173630,144043697,...,16646615,8614889,8031726,86897083,42521618,44375465,51.8,54.1,49.4,34242977
1,,,,,,,Bangladesh Rural,,24671590,110480514,...,13036749,6746461,6290288,65340055,31203857,34136198,47.2,49.0,45.4,26446955
2,,,,,,,Bangladesh Urban,,3543504,15810187,...,1772352,921371,850981,9935984,4993022,4942962,64.2,66.8,61.6,3794007
3,,,,,,,Bangladesh Other Urban,,1369028,6094394,...,698282,363116,335166,3708305,1889382,1818923,58.6,61.2,55.9,1431540
4,,,,,,,Cantonment/Restricted Area,,29571,187310,...,15929,7985,7944,137259,98844,38415,89.4,93.2,81.3,30827
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10033,60.0,91.0,94.0,,47.0,47.0,Kajalshar union,8969.0,4705,27184,...,3502,1732,1770,13872,6560,7312,47.2,48.7,45.8,5258
10034,60.0,91.0,94.0,,57.0,57.0,Kholachhara union,6823.0,3464,20759,...,2731,1420,1311,10722,5331,5391,33.5,36.8,30.1,3829
10035,60.0,91.0,94.0,,66.0,66.0,Manikpur union,9813.0,5600,32557,...,4588,2527,2061,16718,8054,8664,55.1,57.2,53.0,5980
10036,60.0,91.0,94.0,,76.0,76.0,Sultanpur union,5469.0,4549,26062,...,3344,1634,1710,14420,7198,7222,55.7,59.9,51.5,4754


In [206]:
cheongdu = ['Joan', 'Donghoon','Seohyun','Sungmee','Mirang']
idx = np.random.randint(5)
cheongdu[idx]


'Joan'

In [29]:
shp_fn = './data/admin_boundary/bgd_admbnda_adm4_bbs_20180410.shp'
gdf = gpd.read_file(shp_fn)
shp_union = gdf[['ADM4_PCODE', 'ADM4_EN']].sort_values('ADM4_PCODE').reset_index(drop=True)
shp_union

Unnamed: 0,ADM4_PCODE,ADM4_EN
0,10040913,Amtali
1,10040915,Arpangashia
2,10040923,Atharagashia
3,10040939,Barabagi
4,10040943,Chhota Bagi
...,...,...
5155,60919457,Kholachhara
5156,60919466,Manikpur
5157,60919476,Sultanpur
5158,60919485,Zakiganj


In [18]:
df

Unnamed: 0,Division,Zila,Upazila,Union,Name,Area (in Acres),Number of Household,all_both,all_male,all_female,...,1014_all,1014_male,1014_female,18p_all,18p_male,18p_female,lit7p_all,lit7p_male,lit7p_female,Married Women Aged 15-49 Years
0,,,,,Bangladesh,,32173630,144043697,72109796,71933901,...,16646615,8614889,8031726,86897083,42521618,44375465,51.8,54.1,49.4,34242977
1,,,,,Bangladesh Rural,,24671590,110480514,54580004,55900510,...,13036749,6746461,6290288,65340055,31203857,34136198,47.2,49.0,45.4,26446955
2,,,,,Bangladesh Urban,,3543504,15810187,8038903,7771284,...,1772352,921371,850981,9935984,4993022,4942962,64.2,66.8,61.6,3794007
3,,,,,Bangladesh Other Urban,,1369028,6094394,3127003,2967391,...,698282,363116,335166,3708305,1889382,1818923,58.6,61.2,55.9,1431540
4,,,,,Cantonment/Restricted Area,,29571,187310,124751,62559,...,15929,7985,7944,137259,98844,38415,89.4,93.2,81.3,30827
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10033,60.0,91.0,94.0,47.0,Kajalshar union,8969.0,4705,27184,13274,13910,...,3502,1732,1770,13872,6560,7312,47.2,48.7,45.8,5258
10034,60.0,91.0,94.0,57.0,Kholachhara union,6823.0,3464,20759,10486,10273,...,2731,1420,1311,10722,5331,5391,33.5,36.8,30.1,3829
10035,60.0,91.0,94.0,66.0,Manikpur union,9813.0,5600,32557,16477,16080,...,4588,2527,2061,16718,8054,8664,55.1,57.2,53.0,5980
10036,60.0,91.0,94.0,76.0,Sultanpur union,5469.0,4549,26062,12958,13104,...,3344,1634,1710,14420,7198,7222,55.7,59.9,51.5,4754


### Type of House and Tenancy

In [30]:
df = pd.read_excel('./data/union/Type of House and Tenancy.xls',
                   skiprows=11,header=0,index_col=0,skipfooter=8)



In [33]:
df

Unnamed: 0,Union,Type of House,Tenancy of House,Unnamed: 4,Unnamed: 5,Unnamed: 6
,Ward no-01,,Own,Rented,Rent-free,Total
,,Pucca,14,3,2,19
,,Semi-pucca,10,1,-,11
,,Kutcha,191,3,2,196
,,Total,215,7,4,226
...,...,...,...,...,...,...
,,Pucca,1859653,1780168,109731,3749552
,,Semi-pucca,4371772,1827686,154614,6354072
,,Kutcha,19556239,890946,692832,21140017
,,Jhupri,703423,100371,126185,929979


In [37]:
df.loc[~df['Union'].isna(), 'Union']

NaN     Ward no-01
NaN     Ward no-02
NaN     Ward no-03
NaN     Ward no-04
NaN     Ward no-05
          ...     
NaN      Kajalshar
NaN    Kholachhara
NaN       Manikpur
NaN      Sultanpur
NaN          Total
Name: Union, Length: 7760, dtype: object

### Read Disaster-realted Statistics (BBS, 2015)

In [None]:
# ADD RERIGION



# Disaster-related Statistics (BBS, 2015)
Table 4: Distribution of household by main source of income and received remittance by division and district, 2014
Table 5: Distribution of main source oflighting and cooking fuel by division and district, 2014.
Table 18: Distribution of annual household income from agricultural products by division and district, 2014.
Table 20: Distribution of annual household income from non-agricultural sector by division and district, 2014.
Table 22 : Distribution of annual household income from other source by division and district, 2014.
Table 23: Distribution of Disaster affected times of household by division, 2009-'14.
Table 24: Distribution of affected households by disaster categories by division, 2009-'14.
Table 25: Distribution of affected household and disaster categories by division and district, 2009-'14.
Table 26: Distribution of household number of non working days due to last natural disaster by disaster categories and division, 2009-'14.
Table 27: Distribution of Affected Household got early warning by disaster categories and division, 2009-'14.
Table 28: Distribution of household got early warning by type of media, disaster categories and division, 2009-'14.
Table 29: Distribution of affected area and loss of major crops by type of disaster categories and division, 2009-'14    
Table 30: Distribution of affected area and value of loss and damage of minor crops by type of disaster categories and division, 2009-'14.
Table 31: Distribution of affected area and loss of major crops by division and district, 2009-'14.
Table 32: Distribution of affected area and loss of minor crops by division and district, 2009-'14.
Table 35: Distribution of area and damage value of land by disaster categories and division, 2009-'14.
Table 36: Distribution of area and damage value of land by division and district, 2009-'14.
Table 39: Distribution of population suffering from sickness and injury by sex, disaster categories and division, 2009-'14.
Table 40: Distribution of population suffering from sickness and injury by sex, age group and division, 2009-'14.
Table 41: Distribution of population suffering from sickness and injury by sex, division and district, 2009-'14.  
Table 42: Distribution of number of total children and sick children by division and district, 2009-'14.
Table 48: Distribution of Children did not attend to School Due to Natural Disaster by Division and District, 2009-'14.
Table 51: Distribution of disaster preparedness of household by disaster category and division, 2009-'14.
Table 52: Distribution of disaster preparedness of household by division and district, 2009-'14.
Table 53: Distribution of households having disaster precaution measures according to prior-disaster experience by disaster and division, 2009-'14.
Table 54: Distribution of household preparedness during disaster period untill normal situation by disaster and division, 2009-'14.
Table 55: Distribution of household preparedness during disaster period untill normal situation by division and district, 2009-'14.
Table 56: Distribution of household taken action (precaution) during disaster period until normal situation by disaster and division, 2009-'14.
Table 57: Distribution of population suffering from disease due to disaster by division and district, 2014.
Table 58: Distribution of population suffering from disease due to natural disaster by sex, age group, division and district, 2014.
Table 59: Distribution of Population Suffering from Disease Due to natural disaster by Type of Disease, Division and District, 2014.    
Table 60: Distribution of household members suffering from disease before disaster by division and district, 2009-'14.
Table 61: Distribution of household members suffering from disease during disaster period by division and district, 2009-'14
Table 62: Distribution of household members suffering from disease post disaster period by division and district, 2009-'14.
Table 63: Distribution of main probable cause of suffering from disease due to disaster by division and district, 2014.    
Table 64: Distribution of source of household drinking water during disaster period by division and district, 2009-'14.
Table 65: Distribution of other use of water (cooking, sewerage, cleanliness etc.) before disaster period by division and district, 2009-'14.
Table 66: Distribution of other use water (cooking, sewerage, cleanliness etc.) during disaster period by division and district, 2009-'14.
Table 67: Distribution of disease status due to insufficient drinking and other use of water supply during/after disaster period by division and district, 2009-'14.    
Table 68: Distribution of cause of main disease due to insufficient drinking and other use of water supply during/after disaster period by division and district, 2009-'14.
Table 71: Distribution of respondent's knowledge and perception about main impact of climate change by division and district, 2014.
Table 73: Distribution of Respondent's knowledge and perception about disaster management by division and district, 2014.
Table 74: Distribution of household received finantial/rehabiltation support from government/non-government agency during/post disaster period by division and district, 2009-'14
Table 75: Distribution of household received financial/rehabilitation support from different organization/ office during/post disaster period by division and district, 2009-'14.
Table 76: Distribution of households received loan from post disaster period by division and district, 2009-'14.
Table A1: Standard error calculate of total income and total damage and loss by divisiond/ istrict. 
    
    
    
    