In [1]:
import pandas as pd
import numpy as np
import string
import json

In [2]:
## Static Config
locations = ["woreda", "kebele"]
latlong = ["latitude", "longitude"]
charts = ["water service level", "sanitation service level", "hygiene"]
default = {
    "water service level" : ["Safely Managed", "Basic", "Limited", "Unimproved", "Surface Water"],
    "sanitation service level": ["Safely Managed", "Basic", "Limited", "Unimproved", "Open Defecation"],
    "hygiene": ["Safely Managed", "Basic", "Limited", "No Facility"]
}


## Table config
table = [
    {
        "name": "water indicators",
        "column": "water service level",
        "indicators": [
            "Main source of drinking water",
            "Main source of water for other purposes",
            "Specific Location of Water Collection",
            "Time to Collect Water",
            "Times in Last Month When Drinking Water Quantity Was Insufficient",
            "Fecal and Chemical Levels Acceptable"
        ],
    },
    {
        "name": "sanitation indicators",
        "column": "sanitation service level",
        "indicators": [
            "Type of Toilet Facility",
            "Facility is shared with others outside of the household",
            "Location of sanitation facility",
            "Emptying of on-site sanitation facilities",
            "Disposal of excreta from onite sanitation facilities"
        ],
    },
    {
        "name": "hygiene indicators",
        "column": "hygiene",
        "indicators": [
            "Type of Handwashing Facility Used Most Often",
            "Water Available at Handwashing Facility",
            "Soap Available"
        ],
    }
]

In [3]:
df = pd.read_csv('ug_hh_data.csv', error_bad_lines=False)

In [4]:
## Delete Column with no name
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [5]:
### Fill Empty Integer Values
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
df_num = df.select_dtypes(include=numerics).drop(columns=['Latitude', 'Longitude'])
df_num = df_num.fillna(0.0).astype(np.int32)
df[list(df_num)] = df_num

In [6]:
### Replace Empty String Values
df_str = df.select_dtypes(include=['object']).fillna("")
df[list(df_str)] = df_str

In [7]:
## Generate Settings 
chars =list(string.ascii_uppercase)
chars_col = chars + [x+y for x in chars for y in chars]
keyname = lambda x,y: {a:y[b] if (a != 'data_point_id') else 'data_point_id' for b, a in enumerate(x)}

columns_length = len(list(df))
column_names = list(df)

In [8]:
### Generate configs
index = chars_col[:columns_length]
configs = keyname(index, column_names)

chart_config = []
loc_config = {}
latlong_config = {}

for key in configs:
    if(configs[key].lower() in charts):
        chart_config.append({'key': key, 'name': configs[key], 'value': default[configs[key].lower()]})
    if(configs[key].lower() in locations):
        loc_config.update({configs[key].lower(): key})
    if(configs[key].lower() in latlong):
        latlong_config.update({configs[key].lower(): key})


#### Table
table_config = []
for tb in table:
    column_index = ""
    tb_indicators = []
    for key in configs:
        if tb['column'].lower() == configs[key].lower():
            column_index = key
        if configs[key].lower() in [x.lower() for x in tb['indicators']]:
            tb_indicators.append(key)
    table_config.append({
        "key": column_index,
        "name": tb['name'],
        "indicators": tb_indicators
    })

            
configs['charts'] = chart_config
configs['locations'] = loc_config
configs['latlong'] = latlong_config
configs['table'] = table_config

In [9]:
### Replace Dataset Columns
df.columns = index

In [10]:
configs

{'A': 'Identifier',
 'B': 'Woreda',
 'C': 'Kebele',
 'D': 'Parish',
 'E': 'Village',
 'F': 'Latitude',
 'G': 'Longitude',
 'H': 'Name of Household',
 'I': 'What is the total number of household members, including yourself?',
 'J': 'Water Service Level',
 'K': 'Main source of drinking water',
 'L': 'Main source of water for other purposes',
 'M': 'Specific Location of Water Collection',
 'N': 'Time to Collect Water',
 'O': 'Times in Last Month When Drinking Water Quantity Was Insufficient',
 'P': 'Fecal and Chemical Levels Acceptable',
 'Q': 'Sanitation Service Level',
 'R': 'Type of Toilet Facility',
 'S': 'Facility is shared with others outside of the household',
 'T': 'Location of sanitation facility',
 'U': 'Emptying of on-site sanitation facilities',
 'V': 'Disposal of excreta from onite sanitation facilities',
 'W': 'Hygiene',
 'X': 'Type of Handwashing Facility Used Most Often',
 'Y': 'Water Available at Handwashing Facility',
 'Z': 'Soap Available',
 'charts': [{'key': 'J',
   '

In [11]:
data = list(df.T.to_dict().values())
data[:2]

[{'A': '7cv3-8hk1-kam0',
  'B': 'Agago',
  'C': 'Kotomol',
  'D': 'Lukee',
  'E': 'Odokomit East',
  'F': 2.64303668,
  'G': 33.27401517,
  'H': 'lumumba Patrick',
  'I': 4,
  'J': 'Basic',
  'K': 'Deep tubewell/borehole',
  'L': 0,
  'M': 'Elsewhere',
  'N': 'Less than 30 minutes',
  'O': 'Yes, at least once',
  'P': 0,
  'Q': 'Basic',
  'R': 'Pit latrine with slab',
  'S': 'No',
  'T': 'In own plot/yard',
  'U': 'No, never emptied',
  'V': '',
  'W': 'Basic',
  'X': 'Fixed facility observed in plot/yard',
  'Y': 'Water is available',
  'Z': 'Yes, soap or detergent are present'},
 {'A': 'eh5k-ca64-3k9x',
  'B': 'Agago',
  'C': 'Patongo Town Council',
  'D': 'Oporot',
  'E': 'Oporot South',
  'F': 2.74808792,
  'G': 33.30608985,
  'H': 'Komakech Henry',
  'I': 11,
  'J': 'Basic',
  'K': 'Deep tubewell/borehole',
  'L': 0,
  'M': 'Elsewhere',
  'N': 'Less than 30 minutes',
  'O': 'No, always sufficient',
  'P': 0,
  'Q': 'Basic',
  'R': 'Ventilated Improved Pit latrine (VIP)',
  'S': 'N

In [12]:
### to json
df.to_json('../resources/js/data/ug_hh_data.json', orient="records", indent=2)

with open('../resources/js/data/ug_hh_config.json', 'w') as outfile:
    json.dump(configs, outfile)