In [None]:
import pandas as pd
import numpy as np
import string
import json

In [None]:
## Static Config
locations = [
    {
        "column": "District",
        "name": "level1"
    },
    {
        "column": "Municipality/Union",
        "name": "level2"
    },
    {
        "column": "Ward Number",
        "name": "level3"
    }
]


latlong = ["latitude", "longitude"]
charts = ["water service level", "sanitation service level", "hygiene service ladder"]
default = {
    "water service level" : ["Safely Managed", "Basic", "Limited", "Unimproved", "Surface Water"],
    "sanitation service level": ["Safely Managed", "Basic", "Limited", "Unimproved", "Open Defecation"],
    "hygiene service ladder": ["Safely Managed", "Basic", "Limited", "No Facility"]
}


## Table config
table = [
    {
        "name": "water indicators",
        "column": "water service level",
        "indicators": [
            "Main source of drinking water",
            "Main source of water for other purposes",
            "Specific Location of Water Collection",
            "Time to Collect Water",
            "Times in Last Month When Drinking Water Quantity Was Insufficient",
            "Fecal and Chemical Levels Acceptable"
        ],
    },
    {
        "name": "sanitation indicators",
        "column": "sanitation service level",
        "indicators": [
            "Type of Toilet Facility",
            "Facility is shared with others outside of the household",
            "Location of sanitation facility",
            "Emptying of on-site sanitation facilities",
            "Disposal of excreta from onite sanitation facilities"
        ],
    },
    {
        "name": "hygiene indicators",
        "column": "hygiene service ladder",
        "indicators": [
            "Type of Handwashing Facility Used Most Often",
            "Water Available at Handwashing Facility",
            "Soap Available"
        ],
    }
]

In [None]:
df = pd.read_csv('bd_hh_data.csv', error_bad_lines=False)

In [None]:
## Rename level 2
def refineLevel2Name(x):
    if 'Municipality' in x:
        x = x.replace(' Municipality', '')
    if 'Municipality' in x or 'Union' in x:
        x = x.replace(' Union', '')
    return x

df['Municipality/Union'] = df['Municipality/Union'].apply(refineLevel2Name)

In [None]:
## Filter Data by Municipality/Union - Agardari & Paurashava
# df = df[(df['Municipality/Union'] == 'Paurashava') | (df['Municipality/Union'] == 'Agardari')]
# df = df.reset_index(drop=True)

In [None]:
## Delete Column with no name
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [None]:
### Fill Empty Integer Values
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
df_num = df.select_dtypes(include=numerics).drop(columns=['Latitude', 'Longitude'])
df_num = df_num.fillna(0.0).astype(np.int32)
df[list(df_num)] = df_num

In [None]:
### Replace Empty String Values
df_str = df.select_dtypes(include=['object']).fillna("")
df[list(df_str)] = df_str

In [None]:
## Generate Settings 
chars =list(string.ascii_uppercase)
chars_col = chars + [x+y for x in chars for y in chars]
keyname = lambda x,y: {a:y[b] if (a != 'data_point_id') else 'data_point_id' for b, a in enumerate(x)}

columns_length = len(list(df))
column_names = list(df)

In [None]:
### Generate configs
index = chars_col[:columns_length]
configs = keyname(index, column_names)

chart_config = []
latlong_config = {}

for key in configs:
    if(configs[key].lower() in charts):
        chart_config.append({'key': key, 'name': configs[key], 'value': default[configs[key].lower()]})
    if(configs[key].lower() in latlong):
        latlong_config.update({configs[key].lower(): key})

### Locations
loc_config = {}
for loc in locations:
    column_index = ""
    for key in configs:
        if loc['column'].lower() == configs[key].lower():
            column_index = key
    loc_config.update({loc['name'].lower(): column_index})

#### Table
table_config = []
for tb in table:
    column_index = ""
    tb_indicators = []
    for key in configs:
        if tb['column'].lower() == configs[key].lower():
            column_index = key
        if configs[key].lower() in [x.lower() for x in tb['indicators']]:
            tb_indicators.append(key)
    table_config.append({
        "key": column_index,
        "name": tb['name'],
        "indicators": tb_indicators
    })

            
configs['charts'] = chart_config
configs['locations'] = loc_config
configs['latlong'] = latlong_config
configs['table'] = table_config

In [None]:
### Replace Dataset Columns
df.columns = index

In [None]:
configs

In [None]:
data = list(df.T.to_dict().values())
data[:2]

In [None]:
### to json
df.to_json('../resources/js/data/bd_hh_data.json', orient="records", indent=2)

with open('../resources/js/data/bd_hh_config.json', 'w') as outfile:
    json.dump(configs, outfile)