In [1]:
import os
import httpx
import xmltodict
import json
import re
import pandas as pd
from io import BytesIO
from zipfile import ZipFile
from lxml import etree

In [2]:
instances = pd.read_csv('./flow-survey.csv')

In [3]:
def get_default_initial_question_group(form_id):
    return [{
    "id": int(form_id) + 1,
    "order": 1,
    "name": "INIT / UPDATE",
    "label": "INIT / UPDATE",
    "questions": [
        {
            "id": int(form_id) + 2,
            "order": 1,
            "name": "reg_or_update",
            "label": "Is this a new registration or an update?",
            "type": "option",
            "required": True,
            "meta": False,
            "options": [
                {
                    "order": 1,
                    "label": "Registration",
                    "value": "new"
                },
                {
                    "order": 2,
                    "label": "Update",
                    "value": "update"
                }
            ],
            "default_value": {
                "submission_type": {
                    "monitoring": "update",
                    "registration": "new"
                }
            }
        },{
            "id": int(form_id) + 3,
            "order": 2,
            "name": "water_point_uuid",
            "label": "Water Point UUID",
            "short_label": "Water Point UUID",
            "meta": False,
            "type": "text",
            "required": True,
            "meta_uuid": True
        },{
            "id": int(form_id) + 4,
            "order": 1,
            "name": "location",
            "label": "Which Village are you in?",
            "type": "administration",
            "required": True,
            "meta": True,
            "disabled": {
                "submission_type": ["monitoring"]
            }
        }
    ]}]

In [4]:
def xml_survey(instance: str):
    instance = instances[instances['instances'] == instance]
    if instance.shape[0]:
        endpoint = list(instance['bucket'])[0]
        return 'https://{}.s3.eu-west-1.amazonaws.com/surveys'.format(endpoint)
    return None

In [5]:
def readxml(xml_path: str, alias: str):
    with open(xml_path) as survey:
        encoding = etree.parse(survey)
        encoding = encoding.docinfo.encoding
    with open(xml_path) as survey:
        survey = xmltodict.parse(survey.read(),
                                 encoding=encoding,
                                 attr_prefix='',
                                 cdata_key='text',
                                 force_list={
                                     'questionGroup', 'question', 'option',
                                     'level', 'altText', 'dependency'
                                 })
        survey = json.dumps(survey).replace('"true"', 'true').replace(
            '"false"', 'false').replace('"answer-value"', '"answerValue"')
        survey = json.loads(survey)
        response = survey['survey']
        response.update({"alias": alias})
    return response

In [6]:
def to_param(text):
    clean_text = re.sub(r'[^A-Za-z0-9 ]+', '', text)
    return clean_text.strip().replace(" ","_")

In [7]:
def convert_form(form: dict):
    form_id = int(form["surveyId"])
    result = {
        "id": form_id,
        "form": form["name"].strip(),
        "description": form["surveyGroupName"].strip(),
        "defaultLanguage": "en",
        "languages": ["en"],
        "version": 30,
        "type": 1,
        "submission_types": ["registration","monitoring"],
    }
    question_groups = get_default_initial_question_group(form_id)
    qcount = 0
    for qgix, qg in enumerate(form["questionGroup"]):
        question_group = {
            "id": form_id + qgix + 1,
            "order": qgix + 1,
            "name": qg["heading"].strip(),
            "label": qg["heading"].strip(),
        }
        questions = []
        for iq, q in enumerate(qg["question"]):
            qcount += 1
            options = []
            q_type = "text"
            if q["type"] == "option":
                q_type = "option"
                if q["options"].get("allowMultiple"):
                    q_type = "multiple_option"
                for io, o in enumerate(q["options"]["option"]):
                    options.append({
                        "order": io + 1, 
                        "value": o["value"],
                        "label": o["value"]
                    })
            elif q["type"] == "free":
                q_type = "text"
            elif q["type"] == "caddisfly":
                q_type = "number"
            else:
                q_type = q["type"].lower()
            if q.get("validationRule"):
                q_type = "number"
            question = {
                "id": int(q["id"]),
                "order": iq + 1,
                "name": to_param(q["text"]),
                "label": q["text"],
                "type": q_type,
                "required": q["mandatory"],
                "meta": q["localeNameFlag"],
            }
            if q.get("dependency"):
                question.update({
                    "dependency": [
                        {"id":int(d["question"]), "options":[d["answerValue"]]}
                        for d in q["dependency"]
                    ]
                })
            if options:
                question.update({
                    "options": options
                })
            questions.append(question)
        question_group.update({"questions":questions})
        question_groups.append(question_group)
    result.update({"question_groups":question_groups})
    print(form["surveyGroupName"].strip(), qcount)
    return result

In [8]:
def download_form(alias: str, survey_id: int):
    instance = xml_survey(alias)
    ziploc = f'./xml/{alias}'
    jsonloc = f'./json/{alias}'
    xml_path = f"{ziploc}/{survey_id}.xml"
    try:
        url = f'{instance}/{survey_id}.zip'
        zip_url = httpx.get(url)
        zip_url.raise_for_status()
    except httpx.HTTPError:
        return False
    if not os.path.exists(ziploc):
        os.mkdir(ziploc)
    if not os.path.exists(jsonloc):
        os.mkdir(jsonloc)
    z = ZipFile(BytesIO(zip_url.content))
    z.extractall(ziploc)
    result = readxml(xml_path=xml_path, alias=alias)
    result = convert_form(result)
    with open(f"{jsonloc}/{survey_id}.json", "w") as json_file:
        json.dump(result, json_file, indent=4)

In [9]:
download_form("fiji-dws", 8520967)
download_form("fiji-dws", 1044040973)
download_form("fiji-dws", 17260923)

WAF  Water Treatment Plant Inspection 158
WAF's Waste Water Pump Station 11
WAF Waste Water Treatment Plant Inspection 103
