In [1]:
import pandas as pd
import numpy as np
from openpyxl import load_workbook
from IPython.core.display import display, HTML
from pathlib import Path


In [2]:
css_rules = Path('dataframe.css').read_text()
HTML('<style>' + css_rules + '</style>')

In [3]:
source = './data-input.xlsx'
source_filter = 'Eth'

In [4]:
def get_definitions(data, form_name, location):
    forms = []
    for index, col in enumerate(list(data)):
        datatype = data[col].dtypes
        formtype = "text"
        options = None
        if datatype == int:
            formtype = "number"
        if datatype == np.float64:
            formtype = "decimal"
        if datatype == object:
            test = data[col].dropna()
            test = test.str.capitalize()
            options = list(test.unique())
            if len(options) > 8:
                options = None
                formtype = "text"
            else:
                formtype = "option"
        if col in location:
            options = None
            formtype = "cascade"
        cname = col.replace("_"," ").title()
        if "|" in cname:
            cname = cname.split("|")[1]
        if formtype == "option":
            for opt in options:
                forms.append({"ID": index + 1,"QUESTION": cname, "TYPE": formtype.upper(), "OPTIONS": opt})
        else:
            forms.append({"ID":  index + 1, "QUESTION": cname, "TYPE": formtype.upper(), "OPTIONS": " - "})
    display(HTML(f"<h1>{form_name}</h1>"))
    display(HTML(pd.DataFrame(forms).groupby(['ID','QUESTION','TYPE','OPTIONS']).first().to_html()))
    display(HTML(f"<hr/>"))

In [5]:
all_sheets = load_workbook(source, read_only=True).sheetnames
sheets = list(filter(lambda x: 'Eth' in x, all_sheets))

for sheet in sheets:
    data = pd.read_excel(source, sheet)
    data.drop(data.filter(regex="Unnamed"),axis=1, inplace=True)
    get_definitions(data, sheet, ["Woreda","Kebele"])

ID,QUESTION,TYPE,OPTIONS
1,Woreda,CASCADE,-
2,Kebele,CASCADE,-
3,Village,TEXT,-
4,Name Of Respondent,TEXT,-
5,Household Size,NUMBER,-
6,Main Source Of Drinking Water,TEXT,-
7,Water Service Level,OPTION,Basic
7,Water Service Level,OPTION,Limited
7,Water Service Level,OPTION,Surface water
7,Water Service Level,OPTION,Unimproved


ID,QUESTION,TYPE,OPTIONS
1,Woreda,CASCADE,-
2,Kebele,CASCADE,-
3,Name Of Health Facility,TEXT,-
4,Latitude,DECIMAL,-
5,Longitude,DECIMAL,-
6,Type Of Healthy Facility,OPTION,Health post
6,Type Of Healthy Facility,OPTION,Other
7,Description Of Water Supply In Health Facilities,OPTION,No water supply in premises
7,Description Of Water Supply In Health Facilities,OPTION,Pipeline connections
7,Description Of Water Supply In Health Facilities,OPTION,Protected wells


ID,QUESTION,TYPE,OPTIONS
1,Woreda,CASCADE,-
2,Kebele,CASCADE,-
3,School Name,TEXT,-
4,Latitude,DECIMAL,-
5,Longitude,DECIMAL,-
6,School Type,OPTION,High school 9-10
6,School Type,OPTION,Primary(1-4)
6,School Type,OPTION,Primary(1-8)
7,Male Pupils,NUMBER,-
8,Female Pupils,DECIMAL,-


ID,QUESTION,TYPE,OPTIONS
1,Woreda,CASCADE,-
2,Kebele,CASCADE,-
3,Village,TEXT,-
4,Site Name,TEXT,-
5,Latitude,DECIMAL,-
6,Longitude,DECIMAL,-
7,Water Source Type 1,OPTION,Deep well with distribution
7,Water Source Type 1,OPTION,Hand dug well fitted with pump or windlass
7,Water Source Type 1,OPTION,Protected spring
7,Water Source Type 1,OPTION,Shallow well fitted with hand pump


ID,QUESTION,TYPE,OPTIONS
1,Woreda,CASCADE,-
2,Kebele,CASCADE,-
3,Village,TEXT,-
4,Latitude,DECIMAL,-
5,Longitude,DECIMAL,-
6,No. Of Hhs,DECIMAL,-
7,Initial Number Latrines,DECIMAL,-
8,Final Number Of Latrines,DECIMAL,-
9,Date Triggered,TEXT,-
10,Odf Status,OPTION,Declared
