In [None]:
import pandas as pd
import numpy as np
import string
import json
import random

In [None]:
## Static Config
locations = [
    {
        "column": "District",
        "name": "level1"
    },
    {
        "column": "Municipality/Union",
        "name": "level2"
    },
    {
        "column": "Ward Number",
        "name": "level3"
    }
]

latlong = ["latitude", "longitude"]
charts = ["drinking water", "sanitation", "hygiene"]
default = {
    "drinking water" : ["Advanced", "Basic", "Limited", "No Service"],
    "sanitation": ["Advanced", "Basic", "Limited", "No Service"],
    "hygiene": ["Advanced", "Basic", "Limited", "No Service"]
}

## Table config
table = [
    {
        "name": "water indicators",
        "column": "drinking water",
        "indicators": [
            "Is there an improved water source that serves the school?",
            "What is the type of improved water source?",
        ],
    },
    {
        "name": "sanitation indicators",
        "column": "sanitation",
        "indicators": [
            "Does the school have any sanitation facilities?",
            "What type of sanitation facilities are present?",
            "In total, how many functioning stances are for females?",
            "IN total, how many functioning stances are for Males?",
            "Is there a sanitation facility accessible to students with disabilities?",
            "Condition of the superstructure, roof, walls, and doors",
            "Condition are the floor/slabs/platform"
        ],
    },
    {
        "name": "hygiene indicators",
        "column": "hygiene",
        "indicators": [
            "Is there a place for handwashing?",
            "Is there water available at the handwashing station?",
            "Is there soap at hand washing station?"
        ],
    }
]

## Marker Detail
marker = {
    "name" : "School Name",
    "color": [],
    "detail": []
}

main = {
    'name':'School Name',
    'column': 'School Name',
    'indicators': [{
        'name': 'Water',
        'column': 'Drinking Water'
    },{
        'name': 'Sanitation',
        'column': 'Sanitation'
    },{
        'name': 'Hygiene',
        'column': 'Hygiene'
    }]
}

In [None]:
df = pd.read_csv('bd_school_data.csv')

In [None]:
## Rename level 2
def refineLevel2Name(x):
    if 'Municipality' in x:
        x = x.replace(' Municipality', '')
    if 'Municipality' in x or 'Union' in x:
        x = x.replace(' Union', '')
    return x

df['Municipality/Union'] = df['Municipality/Union'].apply(refineLevel2Name)

In [None]:
## Filter Data by Municipality/Union - Agardari & Paurashava
df = df[(df['Municipality/Union'] == 'Paurashava') | (df['Municipality/Union'] == 'Agardari')]
df = df.reset_index(drop=True)

In [None]:
## Delete Column with no name
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [None]:
## Fill Column
#default_column = ["Identifier", "Woreda", "Kebele", "School Name", "Lat", "Lot", "Latitude", "Longitude", "School Type", "Male Pupils", "Female Pupils"]
#for column in df.columns:
#    if column not in default_column and column != "Number of Student Toilets Currently Usable":
#        df[column] = df[column].apply(lambda x: random.choice(df[column].dropna().unique()))

In [None]:
## Fill Number of Student Toilets Currently Usable
# def fillToiletUsed(x):
#     total = x["Male Pupils"] + x["Female Pupils"]
#     return int(random.randint(0,total))

# df["Male Pupils"] = 0
# df["Female Pupils"] = 0
# df["Number of Student Toilets Currently Usable"] = df.apply(fillToiletUsed, axis=1)

In [None]:
### Fill Empty Integer Values
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
df_num = df.select_dtypes(include=numerics).drop(columns=['Latitude', 'Longitude'])
df_num = df_num.fillna(0.0).astype(np.int32)
df[list(df_num)] = df_num

In [None]:
### Replace Empty String Values
df_str = df.select_dtypes(include=['object']).fillna("N/A")
for strcol in list(df_str):
    df_str[strcol] = df_str[strcol].str.strip()
df[list(df_str)] = df_str

In [None]:
## Generate Settings 
chars =list(string.ascii_uppercase)
chars_col = chars + [x+y for x in chars for y in chars]
keyname = lambda x,y: {a:y[b] if (a != 'data_point_id') else 'data_point_id' for b, a in enumerate(x)}

columns_length = len(list(df))
column_names = list(df)

In [None]:
### Generate configs
index = chars_col[:columns_length]
configs = keyname(index, column_names)

marker_config = {}
chart_config = []
latlong_config = {}
for key in configs:
    if(configs[key].lower() in charts):
        chart_config.append({'key': key, 'name': configs[key], 'value': default[configs[key].lower()]})
    if(configs[key].lower() in latlong):
        latlong_config.update({configs[key].lower(): key})
    if(configs[key].lower() == marker['name'].lower()):
        marker_config = { "name" : key}

### Locations
loc_config = {}
for loc in locations:
    column_index = ""
    for key in configs:
        if loc['column'].lower() == configs[key].lower():
            column_index = key
    loc_config.update({loc['name'].lower(): column_index})


#### Table
table_config = []
for tb in table:
    column_index = ""
    tb_indicators = []
    for key in configs:
        if tb['column'].lower() == configs[key].lower():
            column_index = key
        if configs[key].lower() in [x.lower() for x in tb['indicators']]:
            tb_indicators.append(key)
    table_config.append({
        "key": column_index,
        "name": tb['name'],
        "indicators": tb_indicators
    })

#### Marker
marker_color = [];
for mk in marker['color']:
    column = None
    andColumn = None
    for key in configs:
        if configs[key].lower() == mk['column'].lower():
            column = key
        if mk['and'] != None and configs[key].lower() == mk['and'].lower():
            andColumn = key
    marker_color.append({
        "name": mk['name'],
        "column": column,
        "action": mk['action'],
        "type": mk['type'],
        "value": mk['value'],
        "and": andColumn,
        "and_value": mk['and_value'],
        "color": mk['color']
    })
marker_config['color'] = marker_color

#### Marker Detail
marker_detail = [];
for mk in marker['detail']:
    column = None
    andColumn = None
    for key in configs:
        if configs[key].lower() == mk['column'].lower():
            column = key
        if mk['and'] != None and configs[key].lower() == mk['and'].lower():
            andColumn = key
    marker_detail.append({
        "name": mk['name'],
        "column": column,
        "action": mk['action'],
        "type": mk['type'],
        "value": mk['value'],
        "and": andColumn,
        "and_value": mk['and_value']
    })
marker_config['detail'] = marker_detail
    
## Main Table
main_config = {'name':main['name']}
indicators = []
for key in configs:
    if configs[key] == main['column']:
        main_config.update({'key':key})
    for i in main['indicators']:
        if configs[key] == i['column']:
            indicators.append({'name':i['name'],'key':key})
main_config.update({'indicators':indicators})
            
configs['charts'] = chart_config
configs['marker'] = marker_config 
configs['locations'] = loc_config
configs['latlong'] = latlong_config
configs['table'] = table_config
configs['main'] = main_config

In [None]:
### Replace Dataset Columns
df.columns = index

In [None]:
configs

In [None]:
data = list(df.T.to_dict().values())
data[:2]

In [None]:
### to json
df.to_json('../resources/js/data/bd_school_data.json', orient="records", indent=2)

with open('../resources/js/data/bd_school_config.json', 'w') as outfile:
    json.dump(configs, outfile)