In [1]:
import pandas as pd
import numpy as np
from openpyxl import load_workbook
from IPython.core.display import display, HTML
from pathlib import Path
import json

In [2]:
css_rules = Path('dataframe.css').read_text()
HTML('<style>' + css_rules + '</style>')

In [3]:
source = '../backend/source/baseline.xlsx'
instance_name = 'wai-uganda'

In [4]:
def generate_json_file(data, name):
    name = name.replace(" ","_").lower()
    json_object = json.dumps(data, indent = 4)
    with open(f"../backend/source/{instance_name}/forms/{name}.json", "w") as outfile:
        outfile.write(json_object)

In [5]:
def get_definitions(data, form_name, location):
    forms = []
    jsonforms = []
    metaforms = []
    for index, col in enumerate(list(data)):
        datatype = data[col].dtypes
        formtype = "text"
        options = None
        meta = False
        if datatype == int:
            formtype = "number"
        if datatype == np.float64:
            formtype = "number"
        if datatype == object:
            test = data[col].dropna()
            test = test.str.lower()
            options = list(test.unique())
            if len(options) > 8:
                options = None
                formtype = "text"
            else:
                formtype = "option"
                if len(options) == 1:
                    for yn in ["yes","no"]:
                        if options[0].lower() == yn:
                            options = ["yes","no"]
                else:
                    options = [str(o).lower() for o in options]
        if col.strip().lower() in ['latitude','longitude']:
            col = 'geolocation'
            formtype = 'geo'
        if col.strip().lower() in location:
            formtype = 'administration'
        cname = col.replace("_"," ").lower().strip()
        if "|" in cname:
            cname = cname.split("|")[1].strip()
        if "name" in cname:
            meta = True
        if formtype == "option":
            option_with_color = [{"name":o, "color":None} for o in options]
            jsonforms.append({"order": index + 1,"question": cname, "type": formtype, "meta": meta, "options": option_with_color})
            for opt in options:
                forms.append({"ID": index + 1,"QUESTION": cname, "TYPE": formtype.upper(), "OPTIONS": opt})
        elif formtype in ["geo","administration"]:
            cname = formtype
            if formtype == "geo":
                cname += "location"
            if formtype == "administration":
                cname = "location"
            if cname not in metaforms:
                metaforms.append(cname)
                jsonforms.append({"order": index + 1, "question": cname, "type": formtype, "meta": True, "options": None})
                forms.append({"ID": index + 1,"QUESTION": cname, "TYPE": formtype.upper(), "OPTIONS": " - " })
        else:
            jsonforms.append({"order": index + 1,"question": cname, "type": formtype, "meta": meta, "options": None})
            forms.append({"ID":  index + 1, "QUESTION": cname, "TYPE": formtype.upper(), "OPTIONS": " - "})
    generate_json_file(jsonforms, form_name)
    results = pd.DataFrame(forms).groupby(['ID','QUESTION','TYPE','OPTIONS']).first()
    display(HTML(f"<h1>{form_name}</h1>"))
    display(HTML(results.to_html()))
    display(HTML(f"<hr/>"))
    return results

In [None]:
all_sheets = load_workbook(source, read_only=True).sheetnames
sheets = list(filter(lambda x: instance_name in x, all_sheets))
print(sheets)
for sheet_index, sheet in enumerate(sheets):
    data = pd.read_excel(source, sheet)
    data.drop(data.filter(regex="Unnamed"),axis=1, inplace=True)
    sheet_name = f"{sheet_index + 1}-{sheet}".replace(instance_name,"").replace(" ","").lower()
    get_definitions(data, f"0{sheet_name}", ["district","county","sub-county","parish"])

['wai-uganda HH', 'wai-uganda Health', 'wai-uganda School', 'wai-uganda WP']
