In [1]:
import pandas as pd
import numpy as np
import string
import json

In [2]:
## Static Config
locations = [
    {
        "column": "Woreda",
        "name": "level1"
    },
    {
        "column": "Kebele",
        "name": "level2"
    }
]


latlong = ["latitude", "longitude"]
charts = ["water service level", "sanitation service level", "hygiene service ladder"]
default = {
    "water service level" : ["Safely Managed", "Basic", "Limited", "Surface Water", "Unimproved","No Service"],
    "sanitation service level": ["Safely Managed", "Basic", "Limited", "Unimproved", "Open Defecation"],
    "hygiene service ladder": ["Safely Managed", "Basic", "Limited", "No Service"]
}


## Table config
table = [
    {
        "name": "water indicators",
        "column": "water service level",
        "indicators": [
            "Specific Location of Water Collection",
            "Time to Collect Water",
            "Times in Last Month When Drinking Water Quantity Was Insufficient"
        ],
    },
    {
        "name": "sanitation indicators",
        "column": "sanitation service level",
        "indicators": [
            "Type of Toilet Facility",
            "Facility is Shared with Others Outside of the Household",
            "Location of Sanitation Facility",
            "Emptying of On-site Sanitation Facilities",
        ],
    },
    {
        "name": "hygiene indicators",
        "column": "hygiene service level",
        "indicators": [
            "Type of Handwashing Facility Used Most Often",
            "Water Available at Handwashing Facility",
            "Soap is Available"
        ],
    }
]

In [3]:
df = pd.read_csv('eth_hh_data.csv', error_bad_lines=False)

In [4]:
## Delete Column with no name
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [5]:
### Fill Empty Integer Values
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
df_num = df.select_dtypes(include=numerics)
if 'Latitude' in list(df_num):
    df_num = df.num.drop(columns=['Latitude'])
if 'Longitude' in list(df_num):
    df_num = df.num.drop(columns=['Longitude'])
df_num = df_num.fillna(0.0).astype(np.int32)
df[list(df_num)] = df_num

In [6]:
### Replace Empty String Values
df_str = df.select_dtypes(include=['object']).fillna("")
df[list(df_str)] = df_str

In [7]:
## Generate Settings 
chars =list(string.ascii_uppercase)
chars_col = chars + [x+y for x in chars for y in chars]
keyname = lambda x,y: {a:y[b] if (a != 'data_point_id') else 'data_point_id' for b, a in enumerate(x)}

columns_length = len(list(df))
column_names = list(df)

In [8]:
### Generate configs
index = chars_col[:columns_length]
configs = keyname(index, column_names)

chart_config = []
latlong_config = {}

for key in configs:
    if(configs[key].lower() in charts):
        chart_config.append({'key': key, 'name': configs[key], 'value': default[configs[key].lower()]})
    if(configs[key].lower() in latlong):
        latlong_config.update({configs[key].lower(): key})

### Locations
loc_config = {}
for loc in locations:
    column_index = ""
    for key in configs:
        if loc['column'].lower() == configs[key].lower():
            column_index = key
    loc_config.update({loc['name'].lower(): column_index})

#### Table
table_config = []
for tb in table:
    column_index = ""
    tb_indicators = []
    for key in configs:
        if tb['column'].lower() == configs[key].lower():
            column_index = key
        if configs[key].lower() in [x.lower() for x in tb['indicators']]:
            tb_indicators.append(key)
    table_config.append({
        "key": column_index,
        "name": tb['name'],
        "indicators": tb_indicators
    })

            
configs['charts'] = chart_config
configs['locations'] = loc_config
configs['latlong'] = latlong_config
configs['table'] = table_config

In [9]:
### Replace Dataset Columns
df.columns = index

In [10]:
configs

{'A': 'Woreda',
 'B': 'Kebele',
 'C': 'Village',
 'D': 'Name of Respondent',
 'E': 'Household Size',
 'F': 'Main Source of Drinking Water',
 'G': 'Water Service Level',
 'H': 'Specific Location of Water Collection',
 'I': 'Time to Collect Water',
 'J': 'Times in Last Month When Drinking Water Quantity Was Insufficient',
 'K': 'Sanitation Service Level',
 'L': 'Type of Toilet Facility',
 'M': 'Facility is Shared with Others Outside of the Household',
 'N': 'Location of Sanitation Facility',
 'O': 'Emptying of On-site Sanitation Facilities',
 'P': 'Hygiene Service Level',
 'Q': 'Type of Handwashing Facility Used Most Often',
 'R': 'Water Available at Handwashing Facility',
 'S': 'Soap is Available',
 'charts': [{'key': 'G',
   'name': 'Water Service Level',
   'value': ['Safely Managed',
    'Basic',
    'Limited',
    'Surface Water',
    'Unimproved',
    'No Service']},
  {'key': 'K',
   'name': 'Sanitation Service Level',
   'value': ['Safely Managed',
    'Basic',
    'Limited',
   

In [11]:
data = list(df.T.to_dict().values())
data[:2]

[{'A': 'Negele Arsi (Rural)',
  'B': 'Kersa Gara',
  'C': "Bu'ura Gara",
  'D': 'fatuma aman',
  'E': 7,
  'F': 'Public tap/standpipe',
  'G': 'Basic',
  'H': 'In own dwelling',
  'I': 'less than 30 minutes',
  'J': 'No, always sufficient',
  'K': 'Safely Managed',
  'L': 'Toilet that flush/pour to flush piped sewer system',
  'M': 'Yes',
  'N': 'In own dwelling',
  'O': '',
  'P': 'Basic',
  'Q': 'Fixed facility observed in dwelling',
  'R': 'Water is available',
  'S': 'Yes, soap or detergent are present'},
 {'A': 'Shashemene (Rural)',
  'B': 'Chabi',
  'C': 'Chabi',
  'D': 'Burtukan Hedato',
  'E': 7,
  'F': 'Deep tubewell/borehole',
  'G': 'Limited',
  'H': 'Elsewhere',
  'I': 'More than 30 minutes',
  'J': 'Yes, at least once',
  'K': 'Basic',
  'L': 'Pit latrine with slab',
  'M': 'No',
  'N': 'In own dwelling',
  'O': 'No, never emptied',
  'P': 'Basic',
  'Q': 'Mobile object observed (bucket, jug, kettle)',
  'R': 'Water is available',
  'S': 'Yes, soap or detergent are present

In [12]:
### to json
df.to_json('../resources/js/data/nep_hh_data.json', orient="records", indent=2)

with open('../resources/js/data/nep_hh_config.json', 'w') as outfile:
    json.dump(configs, outfile)