# Flood Vulnerability Index (FVI) Data Preparation
This notebook imports socioeconomic and physical data for FVI assessment.

In [1]:
import os
import sys
import time
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
from shapely.geometry import Point
from sklearn.preprocessing import MinMaxScaler, PowerTransformer, QuantileTransformer
import matplotlib.pyplot as plt
from functools import reduce
import fhv
pd.options.mode.chained_assignment = None

## Load 2011 BBS Census Data

This section imports [Bangladesh Bureau of Statistics (BBS)](http://203.112.218.65:8008/) 2011 census data downloaded from [BBS-REDATAM](http://203.112.218.69/binbgd/RpWebEngine.exe/Portal).

In [118]:
census_name = [['PAGE5','pos','person','Percent of children under 5 years'],
               ['PAGE65','pos','person','Percent of elder population (65+ years)'],
               ['PFEMALE','pos','person','Percent of females'],
               ['PRURAL','pos','house','Percent of households in rural areas'], 
               ['PWEAKBUILT','pos','house','Percent of households with weak materials'],
               ['PNOWATER','pos','house','Percent of households without public water supply'],
               ['PNOSANITARY','pos','house','ercent of households without sanitary facilities'],
               ['PNOELEC','pos','house','Percent of households without electricity'],
               ['PDISABL','pos','person','Percent of population with disability'],
               ['PLITERACY','pos','person','Percent of population who cannot read and write'],
               ['PETHNIC','pos','person','Percent of ethnic population'],
               ['PRENT','pos','house','Percent of rented houses'],
               ['PNOPRIEDU','pos','person','Percent of population who don''t complete primary education'],
               ['PNOCOLLEGE','pos','person','Percent of population who don''t attend college'],
               ['PNOEMPLOY','pos','person','Percent of population without employment'],
               ['PAGRICULT','pos','person','Percent of population with agricultural jobs']]
census_name = pd.DataFrame(census_name, columns=['Name','Sign','Type','Description'])

In [119]:
# POPULATION DATA
df = fhv.LoadCensusBBS('./data/census2011/age 5 years group.xls')
popu = df.sum(axis=1)
###
# CARIBRATE POPULATION 
###
census = pd.DataFrame(index=df.index)
census.index.name = 'UID'
# - PAGE5: Percent of children under 5 years
census['PAGE5'] = df[df.columns[0]]/df.sum(axis=1)
# - PAGE65: Percent of elderly population (65+ years)
census['PAGE65'] = df[df.columns[14:]].sum(axis=1)/df.sum(axis=1)
# - PFEMALE: Percent of females
df = fhv.LoadCensusBBS('./data/census2011/sex.xls')
census['PFEMALE'] = df['Female']/df.sum(axis=1)


# BUILT ENVIRONMENT
# - PRURAL: Percent of households in rural areas
df = fhv.LoadCensusBBS('./data/census2011/Area of Residence.xls')
census['PRURAL'] = df['Rural']/df.sum(axis=1)
# - PWEAKBUILT: Percent of households with weak materials
# (#house_Kutcha_and_Jhupri / #house_total)
# *Pucca means high quality materials (e.g., cement or RCC)
# *Kutcha & Jhupri means weaker materials (e.g., mud, clay, lime, or thatched)
df = fhv.LoadCensusBBS('./data/census2011/Type of House.xls')
census['PWEAKBUILT'] = df[['Pucca','Semi-pucca']].sum(axis=1)/df.sum(1)
# - PNOWATER: Percent of households without public water supply
# *This includes "Other", excluding "Tap" and "Tube-well" water supply
df = fhv.LoadCensusBBS('./data/census2011/Source of Drinking Water.xls')
census['PNOWATER'] = df[df.columns[-1]]/df.sum(axis=1)
# - PNOSANITARY: Percent of households without sanitary facilities
# *This includes "Non-Sanitary" and "None" and excludes 
# *"Sanitary (with Water Seal)" and "Sanitary (no Water Seal)"
df = fhv.LoadCensusBBS('./data/census2011/Toilet Facilities.xls')
census['PNOSANITARY'] = df[df.columns[2:]].sum(axis=1)/df.sum(axis=1)
# - PNOELEC: Percent household without electricity
df = fhv.LoadCensusBBS('./data/census2011/Electricity Connection.xls')
census['PNOELEC'] = df['No']/df.sum(axis=1)


# SOCIAL
# - PDISABL: Percent of population with disability
# *This includes all kinds of disabilities (Speech, Vision, Hearing, Physical, Mental, Autistic)
df = fhv.LoadCensusBBS('./data/census2011/Disability.xls')
census['PDISABL'] = df[df.columns[1:]].sum(axis=1)/df.sum(axis=1)
# - PLITERACY: Percent of population who cannot read and write
df = fhv.LoadCensusBBS('./data/census2011/Literacy.xls')
census['PLITERACY'] = df['No']/df.sum(axis=1)
# - PETHNIC: Percent of ethnic population 
df = fhv.LoadCensusBBS('./data/census2011/Ethnic Population.xls')
census['PETHNIC'] = df['Yes']/df.sum(axis=1)
# - PRENT: Percent of rented houses
df = fhv.LoadCensusBBS('./data/census2011/Tenancy.xls')
census['PRENT'] = df[['Rented', 'Rent-free']].sum(axis=1)/df.sum(axis=1)


# EDUCATION
# - PNOPRIEDU: Percent of population who dont complete primary education
# *BGD's primary education is ClassI-ClassV
# *https://en.wikipedia.org/wiki/Education_in_Bangladesh#/media/File:BangEduSys.png
df = fhv.LoadCensusBBS('./data/census2011/Educational Attainment.xls')
census['PNOPRIEDU'] = df[df.columns[:5]].sum(axis=1)/df.sum(axis=1)
# - PNOCOLLEGE: Percent of population who don't attend college
census['PNOCOLLEGE'] = df[df.columns[:-4]].sum(axis=1)/df.sum(axis=1)


# EMPLOYMENT
# - PNOEMPLOY: Percent of population without employment
# *This includes "Employed" and "Household Work" and excludes "Looking For Job" and "Do Not Work"
df = fhv.LoadCensusBBS('./data/census2011/Activity Status.xls')
census['PNOEMPLOY'] = df[['Looking For Job','Do Not Work']].sum(axis=1)/df.sum(axis=1)
# - PAGRICULT : Percent of population with agricultural jobs
df = fhv.LoadCensusBBS('./data/census2011/Employment Field.xls')
census['PAGRICULT'] = df['Agriculture']/df.sum(axis=1)



In [120]:
census

Unnamed: 0_level_0,PAGE5,PAGE65,PFEMALE,PRURAL,PWEAKBUILT,PNOWATER,PNOSANITARY,PNOELEC,PDISABL,PLITERACY,PETHNIC,PRENT,PNOPRIEDU,PNOCOLLEGE,PNOEMPLOY,PAGRICULT
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
100409,0.103249,0.040339,0.511939,0.919587,0.053392,0.019316,0.330364,0.783000,0.020299,0.472155,0.004445,0.067313,0.541183,0.962848,0.334860,0.722824
100419,0.095860,0.039666,0.504324,0.908695,0.068895,0.020730,0.166402,0.704202,0.023943,0.388951,0.000000,0.026309,0.421312,0.948729,0.329836,0.601251
100428,0.097443,0.040805,0.508003,0.881363,0.092098,0.050962,0.268048,0.680701,0.020743,0.413562,0.000741,0.097429,0.444761,0.939851,0.345242,0.561402
100447,0.093653,0.044466,0.516130,0.886505,0.071055,0.048994,0.218788,0.648306,0.018080,0.398837,0.000000,0.051930,0.432513,0.942979,0.367495,0.607921
100485,0.098751,0.044111,0.508659,0.828357,0.060392,0.436741,0.223187,0.688012,0.022614,0.395149,0.000023,0.102425,0.425012,0.954199,0.325537,0.638243
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
609141,0.150547,0.026528,0.499579,0.978997,0.195637,0.542778,0.795591,0.686969,0.011467,0.672957,0.008522,0.055613,0.725874,0.982873,0.403099,0.662260
609153,0.143152,0.030035,0.500637,0.945791,0.282478,0.283740,0.587684,0.593275,0.014548,0.588482,0.011797,0.093185,0.663757,0.974846,0.405466,0.527961
609159,0.137528,0.028306,0.510098,0.898787,0.385117,0.746354,0.581728,0.565562,0.014551,0.564579,0.001018,0.031226,0.626126,0.976171,0.427244,0.631287
609162,0.105126,0.023166,0.473312,0.332392,0.741495,0.046615,0.218020,0.132115,0.010691,0.387067,0.007786,0.579076,0.433749,0.875801,0.391028,0.101994


In [None]:
#%% Load GADM4 raster
fn_adm = os.path.join('land', 'boundary_gadm', 'gadm4.tif')
ds = gdal.Open(fn_adm)
code4 = ds.GetRasterBand(1).ReadAsArray().astype('uint32')
# - Load Value-and-CC4
xl = pd.ExcelFile(os.path.join('land', 'boundary_gadm', 'gadm4.xls'))
df = xl.parse('gadm4.tif.vat')
table = np.array([df.Value, df.CC_4]).T
# - Change code4 map to CC4 values
code4[code4 == code4.max()] = 4294967295
for i in range(len(table)):
    code4[code4 == table[i,0]] = table[i,1]
code3 = np.floor(code4/100)
# - Save new raster
fn = os.path.join('land', 'boundary_gadm', 'gadm4_code.tif')
if not os.path.isfile(fn):
    out_ds = fh.make_raster(ds, fn, code4, gdal.GDT_UInt32, 4294967295)
    del out_ds