In [1]:
from os import environ, mkdir, path
from flow_handler import flow_handler, parse_data
import json
import requests as r
import pandas as pd
import logging
import copy

In [16]:
# SURVEY ID CAN BE FOUND ON https://webform.akvotest.org/
INSTANCE = "seap"
SURVEY_ID = 290080914
FORM_ID = 300160915
DEBUG_PRINT = False
# LIMIT DATA: False / Any Number
LIMIT_DATA = False
# SAVE TO CSV: True / False
SAVE_TO_CSV = True

In [3]:
baseURI = f'https://api-auth0.akvo.org/flow/orgs/{INSTANCE}'
envs = ["AUTH0_CLIENT_ID","FLOW_USERNAME","FLOW_PASSWORD"]
for env in envs:
    if env not in environ:
        logging.error(f"{env} NOT FOUND")

In [4]:
def get_access_token():
    tokenURI = 'https://akvofoundation.eu.auth0.com/oauth/token'
    auth = {
        "client_id": environ['AUTH0_CLIENT_ID'],
        "username": environ['FLOW_USERNAME'],
        "password": environ['FLOW_PASSWORD'],
        "grant_type": "password",
        "scope": "openid email",
    }

    try:
        account = r.post(tokenURI, data=auth).json();
    except:
        logging.error('FAILED TO REQUEST TOKEN')
        return False
    return account["id_token"]

In [5]:
def get_response(token, url):
    headers = {
        "Content-Type":"application/json",
        "Accept": "application/vnd.akvo.flow.v2+json",
        "Authorization": "Bearer {}".format(token)
    }
    response = r.get(url, headers=headers).json()
    return response

In [6]:
def get_folders(token, parent_id = 0):
    return get_response(token, f"{baseURI}/folders?parent_id={parent_id}")

In [7]:
def get_surveys(token, folder_id = 0):
    return get_response(token, f"{baseURI}/surveys?folder_id={folder_id}").get("surveys")

In [8]:
def get_forms(token, survey_id):
    return get_response(token, f"{baseURI}/surveys/{survey_id}").get("forms")

In [9]:
def get_form_instances(token, survey_id, form_id, limit=False, next_page_url=False, result=[]):
    url = f"{baseURI}/form_instances?survey_id={survey_id}&form_id={form_id}"
    if next_page_url:
        url = next_page_url
    res = get_response(token, url)
    result += res.get("formInstances")
    if limit:
        if len(result) >= limit:
            return result
    if res.get("nextPageUrl"):
        get_form_instances(token, survey_id, form_id, limit, res.get("nextPageUrl"), result)
    return result

In [10]:
token = get_access_token()
forms = get_forms(token, SURVEY_ID)
form = list(filter(lambda x: int(x["id"]) == FORM_ID, forms))[0]

In [11]:
form_instances = get_form_instances(token, SURVEY_ID, FORM_ID, limit=LIMIT_DATA)
results = parse_data(raw_data=form_instances,form=form,debug=DEBUG_PRINT)

In [12]:
# print(json.dumps(results, indent=1))

In [13]:
# transform results
data = {}
for r in results:
    meta = copy.copy(r)
    if meta.get('groups'):
        del meta['groups']
    if not r.get('groups'):
        continue
    for g in r.get('groups'):
        group_name = g.get('name')
        for d in g.get('data'):
            # transform value, add meta
            newObj = {}
            newObj.update(meta)
            newObj.update({'repeat_index': d.get('repeat_index')})
            newObj.update(d.get('answers'))
            
            # add transformed value into data
            if not data.get(group_name):
                data.update({group_name: [newObj]})
            else:
                data[group_name].append(newObj)            

In [14]:
# print(json.dumps(data, indent=1))

In [19]:
if SAVE_TO_CSV:
    folder = 'csv_files'
    if not path.exists(folder):
        mkdir(folder)
    for dkey in data:
        filename = f"{folder}/{FORM_ID}_{dkey}.csv"
        df = pd.DataFrame.from_records(data[dkey])
        df.to_csv(filename)
        print(filename, "saved")

csv_files/300160915_Registration Form.csv saved
csv_files/300160915_More Details.csv saved
csv_files/300160915_Item Selection.csv saved
csv_files/300160915_Repeated Group.csv saved
