In [1]:
import pandas as pd
import numpy as np
import string
import json
import random

In [2]:
## Static Config
locations = ["woreda", "kebele"]
latlong = ["latitude", "longitude"]
charts = ["water service level", "sanitation service level", "hygiene service level"]
default = {
    "water service level" : ["Advanced", "Basic", "Limited", "No Service"],
    "sanitation service level": ["Advanced", "Basic", "Limited", "No Service"],
    "hygiene service level": ["Advanced", "Basic", "Limited", "No Service"]
}

## Table config
table = [
    {
        "name": "water indicators",
        "column": "water service level",
        "indicators": [
            "Main Source of Drinking Water Provided by the School",
            "Main Water Source is Currently Available at the School",
        ],
    },
    {
        "name": "sanitation indicators",
        "column": "sanitation service level",
        "indicators": [
            "Type of Student Toilets at the School",
            "Toilets are Separate for Girls and Boys",
        ],
    },
    {
        "name": "hygiene indicators",
        "column": "hygiene service level",
        "indicators": [
            "Handwashing Facilities are at the School",
            "Soap and water available at the handwashing facilities",
        ],
    }
]

In [3]:
df = pd.read_csv('ug_school_data.csv')

In [4]:
## Delete Column with no name
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [5]:
## Fill Column
#default_column = ["Identifier", "Woreda", "Kebele", "School Name", "Lat", "Lot", "Latitude", "Longitude", "School Type", "Male Pupils", "Female Pupils"]
#for column in df.columns:
#    if column not in default_column and column != "Number of Student Toilets Currently Usable":
#        df[column] = df[column].apply(lambda x: random.choice(df[column].dropna().unique()))

In [6]:
## Fill Number of Student Toilets Currently Usable
def fillToiletUsed(x):
    total = x["Male Pupils"] + x["Female Pupils"]
    return int(random.randint(0,total))

df["Male Pupils"] = 0
df["Female Pupils"] = 0
df["Number of Student Toilets Currently Usable"] = df.apply(fillToiletUsed, axis=1)

In [7]:
### Fill Empty Integer Values
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
df_num = df.select_dtypes(include=numerics).drop(columns=['Latitude', 'Longitude'])
df_num = df_num.fillna(0.0).astype(np.int32)
df[list(df_num)] = df_num

In [8]:
### Replace Empty String Values
df_str = df.select_dtypes(include=['object']).fillna("")
df[list(df_str)] = df_str

In [9]:
## Generate Settings 
chars =list(string.ascii_uppercase)
chars_col = chars + [x+y for x in chars for y in chars]
keyname = lambda x,y: {a:y[b] if (a != 'data_point_id') else 'data_point_id' for b, a in enumerate(x)}

columns_length = len(list(df))
column_names = list(df)

In [10]:
### Generate configs
index = chars_col[:columns_length]
configs = keyname(index, column_names)

chart_config = []
loc_config = {}
latlong_config = {}
for key in configs:
    if(configs[key].lower() in charts):
        chart_config.append({'key': key, 'name': configs[key], 'value': default[configs[key].lower()]})
    if(configs[key].lower() in locations):
        loc_config.update({configs[key].lower(): key})
    if(configs[key].lower() in latlong):
        latlong_config.update({configs[key].lower(): key})
        
#### Table
table_config = []
for tb in table:
    column_index = ""
    tb_indicators = []
    for key in configs:
        if tb['column'].lower() == configs[key].lower():
            column_index = key
        if configs[key].lower() in [x.lower() for x in tb['indicators']]:
            tb_indicators.append(key)
    table_config.append({
        "key": column_index,
        "name": tb['name'],
        "indicators": tb_indicators
    })

            
configs['charts'] = chart_config
configs['locations'] = loc_config
configs['latlong'] = latlong_config
configs['table'] = table_config

In [11]:
### Replace Dataset Columns
df.columns = index

In [12]:
configs

{'A': 'Identifier',
 'B': 'Woreda',
 'C': 'Kebele',
 'D': 'Parish',
 'E': 'School Name',
 'F': 'Latitude',
 'G': 'Longitude',
 'H': 'School Type',
 'I': 'Total number of Boys enrolled in the school',
 'J': 'Total number of Girls enrolled in the school',
 'K': 'What is the total enrolment in the school?',
 'L': 'What is the regular number of staff in the school?',
 'M': 'Drinking Water',
 'N': 'Is there an improved water source that serves the school?',
 'O': 'What is the type of improved water source?',
 'P': 'What year was the water facility constructed?',
 'Q': 'Who funded the construction of the water point?',
 'R': 'Sanitation',
 'S': 'Does the school have any sanitation facilities?',
 'T': 'What type of sanitation facilities are present?',
 'U': 'In total, how many functioning stances are for females?',
 'V': 'IN total, how many functioning stances are for Males?',
 'W': 'Is there a sanitation facility accessible to students with disabilities?',
 'X': 'In what year was the latest 

In [13]:
data = list(df.T.to_dict().values())
data[:2]

[{'A': 's5cy-yfp7-b2js',
  'B': 0,
  'C': 0,
  'D': 0,
  'E': 'Abone primary school',
  'F': nan,
  'G': nan,
  'H': 0,
  'I': 300,
  'J': 200,
  'K': 500,
  'L': 15,
  'M': 'Basic',
  'N': 'Yes-school has its own improved water sources',
  'O': 'Deep Borehole',
  'P': 2010,
  'Q': 'Government',
  'R': 'Basic',
  'S': 'Yes',
  'T': 'VIP latrine',
  'U': 2,
  'V': 2,
  'W': 'Yes',
  'X': 2016,
  'Y': 'Good and well-raised wall with roof and door',
  'Z': 'Medium - some minor cracks',
  'AA': 'Never emptied',
  'AB': '',
  'AC': 'Basic',
  'AD': 'Yes',
  'AE': 'Yes',
  'AF': 'Yes',
  'AG': 'YES- collapsed or partially collapsed latrines',
  'AH': 'Rarely',
  'AI': 0,
  'AJ': 0,
  'AK': 0},
 {'A': 'av7y-npt6-59t8',
  'B': 0,
  'C': 0,
  'D': 0,
  'E': 'odokomit primary school',
  'F': nan,
  'G': nan,
  'H': 0,
  'I': 509,
  'J': 470,
  'K': 979,
  'L': 110,
  'M': 'Limited',
  'N': 'Yes-school shares with a community improved water source',
  'O': 'Deep Borehole',
  'P': 0,
  'Q': '',
  

In [14]:
### to json
df.to_json('../resources/js/data/ug_school_data.json', orient="records", indent=2)

with open('../resources/js/data/ug_school_config.json', 'w') as outfile:
    json.dump(configs, outfile)