In [1]:
import pandas as pd
import numpy as np
import string
import json
import random
import uuid

In [2]:
## Static Config
locations = [
    {
        "column": "Woreda",
        "name": "level1"
    },
    {
        "column": "Kebele",
        "name": "level2"
    }
]

latlong = ["latitude", "longitude"]
charts = ["water", "sanitation"]
default = {
    "water" : ["Advanced", "Basic", "Limited", "No Service"],
    "sanitation": ["Advanced", "Basic", "Limited", "No Service"]
}

## Table config

table = [
    {
        "name": "water indicators",
        "column": "drinking water",
        "indicators": [
            "Is there an improved water source that serves the school?",
            "What is the type of improved water source?",
        ],
    },
    {
        "name": "sanitation indicators",
        "column": "sanitation",
        "indicators": [
            "Does the school have any sanitation facilities?",
            "What type of sanitation facilities are present?",
            "Is there a sanitation facility accessible to students with disabilities?",
            "Condition of the superstructure, roof, walls, and doors",
            "Condition are the floor/slabs/platform"
        ],
    },
    {
        "name": "hygiene indicators",
        "column": "hygiene",
        "indicators": [
            "Is there a place for handwashing?",
            "Is there water available at the handwashing station?",
            "Is there soap at hand washing station?"
        ],
    }
]

## Marker Detail
marker = {
    "name" : "School Name",
    "color": [],
    "detail": []
}

main = {
    'name':'School Name',
    'column': 'School Name',
    'indicators': [{
        'name': 'Water',
        'column': 'Water'
    },{
        'name': 'Sanitation',
        'column': 'Sanitation'
    },{
        'name': 'Hygiene',
        'column': 'Hygiene'
    }]
}

In [3]:
df = pd.read_csv('eth_school_data.csv')
if 'Identifier' not in list(df):
    df['Identifier'] = [str(uuid.uuid4()) for _ in range(len(df.index))]
    df = df[['Identifier'] + list(df)[0:-1]]

In [4]:
## Delete Column with no name
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [5]:
## Fill Column
#default_column = ["Identifier", "Woreda", "Kebele", "School Name", "Lat", "Lot", "Latitude", "Longitude", "School Type", "Male Pupils", "Female Pupils"]
#for column in df.columns:
#    if column not in default_column and column != "Number of Student Toilets Currently Usable":
#        df[column] = df[column].apply(lambda x: random.choice(df[column].dropna().unique()))

In [6]:
## Fill Number of Student Toilets Currently Usable
def fillToiletUsed(x):
    total = x["Number of Boy Pupils"] + x["Number of Girl Pupils"]
    return int(random.randint(0,total))

df["Number of Boy Pupils"] = 0
df["Number of Girl Pupils"] = 0
df["Number of Student Toilets Currently Usable"] = df.apply(fillToiletUsed, axis=1)

In [7]:
### Fill Empty Integer Values
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
df_num = df.select_dtypes(include=numerics).drop(columns=['Latitude', 'Longitude'])
df_num = df_num.fillna(0.0).astype(np.int32)
df[list(df_num)] = df_num

In [8]:
### Replace Empty String Values
df_str = df.select_dtypes(include=['object']).fillna("")
for strcol in list(df_str):
    df_str[strcol] = df_str[strcol].str.strip()
df[list(df_str)] = df_str

In [9]:
## Generate Settings 
chars =list(string.ascii_uppercase)
chars_col = chars + [x+y for x in chars for y in chars]
keyname = lambda x,y: {a:y[b] if (a != 'data_point_id') else 'data_point_id' for b, a in enumerate(x)}

columns_length = len(list(df))
column_names = list(df)

In [10]:
### Generate configs
index = chars_col[:columns_length]
configs = keyname(index, column_names)

marker_config = {}
chart_config = []
latlong_config = {}
for key in configs:
    if(configs[key].lower() in charts):
        chart_config.append({'key': key, 'name': configs[key], 'value': default[configs[key].lower()]})
    if(configs[key].lower() in latlong):
        latlong_config.update({configs[key].lower(): key})
    if(configs[key].lower() == marker['name'].lower()):
        marker_config = { "name" : key}

### Locations
loc_config = {}
for loc in locations:
    column_index = ""
    for key in configs:
        if loc['column'].lower() == configs[key].lower():
            column_index = key
    loc_config.update({loc['name'].lower(): column_index})


#### Table
table_config = []
for tb in table:
    column_index = ""
    tb_indicators = []
    for key in configs:
        if tb['column'].lower() == configs[key].lower():
            column_index = key
        if configs[key].lower() in [x.lower() for x in tb['indicators']]:
            tb_indicators.append(key)
    table_config.append({
        "key": column_index,
        "name": tb['name'],
        "indicators": tb_indicators
    })

#### Marker
marker_color = [];
for mk in marker['color']:
    column = None
    andColumn = None
    for key in configs:
        if configs[key].lower() == mk['column'].lower():
            column = key
        if mk['and'] != None and configs[key].lower() == mk['and'].lower():
            andColumn = key
    marker_color.append({
        "name": mk['name'],
        "column": column,
        "action": mk['action'],
        "type": mk['type'],
        "value": mk['value'],
        "and": andColumn,
        "and_value": mk['and_value'],
        "color": mk['color']
    })
marker_config['color'] = marker_color

#### Marker Detail
marker_detail = [];
for mk in marker['detail']:
    column = None
    andColumn = None
    for key in configs:
        if configs[key].lower() == mk['column'].lower():
            column = key
        if mk['and'] != None and configs[key].lower() == mk['and'].lower():
            andColumn = key
    marker_detail.append({
        "name": mk['name'],
        "column": column,
        "action": mk['action'],
        "type": mk['type'],
        "value": mk['value'],
        "and": andColumn,
        "and_value": mk['and_value']
    })
marker_config['detail'] = marker_detail
    
## Main Table
main_config = {'name':main['name']}
indicators = []
for key in configs:
    if configs[key] == main['column']:
        main_config.update({'key':key})
    for i in main['indicators']:
        if configs[key] == i['column']:
            indicators.append({'name':i['name'],'key':key})
main_config.update({'indicators':indicators})
            
configs['charts'] = chart_config
configs['marker'] = marker_config 
configs['locations'] = loc_config
configs['latlong'] = latlong_config
configs['table'] = table_config
configs['main'] = main_config

In [11]:
### Replace Dataset Columns
df.columns = index

In [12]:
configs

{'A': 'Identifier',
 'B': 'Woreda',
 'C': 'Kebele',
 'D': 'School Name',
 'E': 'Latitude',
 'F': 'Longitude',
 'G': 'School Type',
 'H': 'Male Pupils',
 'I': 'Female Pupils',
 'J': 'Reason for Inventory',
 'K': 'Water Supply Source',
 'L': 'Water',
 'M': 'Year Commisioned',
 'N': 'Functionality_Status_of_Water_Supply',
 'O': 'Yield',
 'P': 'Type of Latrine',
 'Q': 'Sanitation',
 'R': 'latrine_For_Boys_and_Girls_Separate',
 'S': 'Number of Boy Pupils',
 'T': 'Number of Girl Pupils',
 'U': 'Are there Latrines for Disabled?',
 'V': 'Latrine_Condition',
 'W': 'Presence_of_HandWashingFacility',
 'X': 'hand_washing_in_use',
 'Y': 'availability_of_soap',
 'Z': 'Urinal_present_girls',
 'AA': 'Urinal_present_boys',
 'AB': 'Separate_latrine_for_tolitet_Staff',
 'AC': 'Number of Student Toilets Currently Usable',
 'charts': [{'key': 'L',
   'name': 'Water',
   'value': ['Advanced', 'Basic', 'Limited', 'No Service']},
  {'key': 'Q',
   'name': 'Sanitation',
   'value': ['Advanced', 'Basic', 'Limit

In [13]:
data = list(df.T.to_dict().values())
data[:2]

[{'A': '23ecc66b-7c42-44b8-b527-2e9e33198928',
  'B': 'Shashemene',
  'C': 'Bura',
  'D': 'Bura',
  'E': 7.260344141,
  'F': 38.48864077,
  'G': 'Primary(1-8)',
  'H': 644,
  'I': 368,
  'J': 'New Scheme',
  'K': 'Pipeline Connection',
  'L': 'Basic',
  'M': 2006,
  'N': 'Functional',
  'O': 0,
  'P': 'Simple Pit Latrine',
  'Q': 'Limited',
  'R': 'No',
  'S': 0,
  'T': 0,
  'U': 'No',
  'V': 'Clean, Used',
  'W': 'No Service',
  'X': 'No',
  'Y': 'No',
  'Z': 'No',
  'AA': 'No',
  'AB': 'No',
  'AC': 0},
 {'A': 'e778e948-3468-46b0-a014-73bbd27fac23',
  'B': 'Shashemene',
  'C': 'Bura',
  'D': 'B/Sheshera',
  'E': 7.226484876,
  'F': 38.48695793,
  'G': 'Primary(1-8)',
  'H': 215,
  'I': 173,
  'J': '',
  'K': 'No Water Scheme',
  'L': 'No Service',
  'M': 0,
  'N': '',
  'O': 0,
  'P': 'Simple Pit Latrine',
  'Q': 'Limited',
  'R': 'No',
  'S': 0,
  'T': 0,
  'U': 'No',
  'V': 'Dirty But Used',
  'W': 'No Service',
  'X': 'No',
  'Y': 'No',
  'Z': 'No',
  'AA': 'No',
  'AB': 'No',
  '

In [14]:
### to json
df.to_json('../resources/js/data/eth_school_data.json', orient="records", indent=2)

with open('../resources/js/data/eth_school_config.json', 'w') as outfile:
    json.dump(configs, outfile)