In [1]:
import requests
import json
import pandas as pd
import random

In [107]:
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 200)

In [2]:
SURVEY_ID = 709692367

In [21]:
AUTH_URL = "https://akvofoundation.eu.auth0.com/oauth/token"
SURVEY_URL = f"https://api-auth0.akvo.org/flow/orgs/maep/surveys/{SURVEY_ID}"
MONITORING_FORM_NAME = "2_INFORMATIONS SUR L’EXPLOITATION ET LA PARCELLE (Formulaire de suivi)"

In [4]:
USERNAME = ""
PASSWORD = ""

In [5]:
r = requests.Session()

In [6]:
metadata_keys = ['formVersion',
 'deviceIdentifier',
 'dataPointId',
 'submissionDate',
 'modifiedAt',
 'id',
 'identifier',
 'displayName',
 'formId',
 'surveyalTime',
 'submitter',
 'createdAt']

In [7]:
def get_token():                                                                                                                                                     
    data = {                                                                                                                                                                                                         
        "client_id": "S6Pm0WF4LHONRPRKjepPXZoX1muXm1JS",                                                                                                                                                             
        "username": USERNAME,                                                                                                                                                                                        
        "password": PASSWORD,                                                                                                                                                                     
        "grant_type": "password",                                                                                                                                                                                    
        "scope": "offline_access"                                                                                                                                                                                    
    }                                                                                                                                                                                                                
    req = r.post(AUTH_URL, data=data)                                                                                                                                                                             
    if req.status_code != 200:                                                                                                                                                                                       
        print("LOGIN ERROR")                                                                                                                                                              
    return req.json()

In [8]:
def get_data(target_url, headers):
    # print(f"GET: {target_url}")
    res = r.get(target_url, headers=headers)
    if res.status_code != 200:                                                                                                                                                                                       
        print("FETCH DATA ERROR")
    return res.json()

In [9]:
def get_headers(refresh_token):                                                                             
    login = {                                                                                            
        'client_id': 'S6Pm0WF4LHONRPRKjepPXZoX1muXm1JS',                                                 
        'grant_type': 'refresh_token',                                                                   
        'refresh_token': refresh_token,                                                                          
        'scope': 'openid email'                                                                          
    }                                                                                                    
    req = r.post(AUTH_URL, data=login)                          
    if req.status_code != 200:                                                                           
        return False                                                                                     
    return {                                                                                             
        'Content-Type': 'application/json',                                                              
        'Accept': 'application/vnd.akvo.flow.v2+json',                                                   
        'Authorization': 'Bearer {}'.format(req.json().get('id_token'))                                  
    }

In [12]:
def fetch_all(url, headers, formInstances=[]):                                                                                                                                                                       
    data = get_data(url, headers)                                                                                                                                                                                    
    next_url = data.get('nextPageUrl')                                                                                                                                                                               
    data = data.get('formInstances')
    # LIMITING DATA TO 100
    # if len(formInstances) == 100:
    #    print("FINISH THE LIMIT")
    #   return formInstances
    # END TEMPORARY CODE
    if data:                                                                                                                                                                                                         
        for d in data:                                                                                                                                                                                               
            formInstances.append(d)                                                                                                                                                                                  
        if next_url:                                                                                                                                                                                                 
            fetch_all(next_url, headers, formInstances)                                                                                                                                                              
    return formInstances 

In [13]:
def data_handler(data, qType):                                                                                                                                                                                       
    if data:                                                                                                                                                                                                         
        if qType in [                                                                                                                                                                                                
                'FREE_TEXT', 'NUMBER', 'BARCODE', 'DATE', 'GEOSHAPE', 'SCAN',                                                                                                                                        
                'CADDISFLY'                                                                                                                                                                                          
        ]:                                                                                                                                                                                                           
            return data                                                                                                                                                                                              
        if qType == 'OPTION':                                                                                                                                                                                        
            return handle_list(data, "text")                                                                                                                                                                         
        if qType == 'CASCADE':                                                                                                                                                                                       
            return handle_list(data, "name")                                                                                                                                                                         
        if qType in ['PHOTO', 'VIDEO']:                                                                                                                                                                              
            return data.get('filename')                                                                                                                                                                              
        if qType == 'VIDEO':                                                                                                                                                                                         
            return data.get('filename')                                                                                                                                                                              
        if qType == 'GEO':                                                                                                                                                                                           
            return {'lat': data.get('lat'), 'long': data.get('long')}                                                                                                                                                
        if qType == 'SIGNATURE':                                                                                                                                                                                     
            return data.get("name")                                                                                                                                                                                  
    return None

In [14]:
def handle_list(data, target):                                                                                                                                                                                       
    response = []                                                                                                                                                                                                    
    for value in data:                                                                                                                                                                                               
        response.append(value.get(target))                                                                                                                                                                       
    return "|".join(response)

In [15]:
def get_page(form_instance_url, form_definition, refresh_token):                                                                                                                                               
    headers = get_headers(refresh_token)                                                                                                                                                                        
    collections = fetch_all(form_instance_url, headers)                                                                                                                                                          
    results = []                                                                                                                                                                                                     
    for col in collections:                                                                                                                                                                                          
        dt = {}                                                                                                                                                                                                      
        for c in col:                                                                                                                                                                                                
            if c != 'responses':                                                                                                                                                                                     
                dt.update({c: col[c]})                                                                                                                                                                               
            else:                                                                                                                                                                                                    
                for g in form_definition:                                                                                                                                                                            
                    answers = col.get(c)                                                                                                                                                                             
                    answers = answers.get(g['id']) if answers else None
                    for q in g['questions']:                                                                                                                                                                         
                        d = None
                        try:
                            a = answers[0].get(q['id'])
                            d = data_handler(a, q['type'])                                                                                                                                                           
                        except Exception as e:                                                                                                                                                                       
                            pass                                                                                                                                                                         
                        # n = "{}|{}".format(q['id'], q['name'])                                                                                                                                                       
                        dt.update({q['name']: d})                                                                                                                                                                            
        results.append(dt)                                                                                                                                                                                           
    return results

In [16]:
refresh_token = get_token().get("refresh_token")
headers = get_headers(refresh_token)
surveys = get_data(SURVEY_URL, headers)

GET: https://api-auth0.akvo.org/flow/orgs/maep/surveys/709692367


In [17]:
registration_form_id = surveys.get("registrationFormId")
monitoring_forms = list(filter(lambda x: x.get("id") != registration_form_id, surveys.get("forms")))

In [18]:
data = {}

In [19]:
for form in surveys.get("forms"):
    form_id = form.get("id")
    form_definition = form.get("questionGroups")
    collections = get_page(form.get("formInstancesUrl"), form_definition, refresh_token)
    data.update({form_id: collections})

GET: https://api-auth0.akvo.org/flow/orgs/maep/form_instances?survey_id=709692367&form_id=715752508
GET: https://api-auth0.akvo.org/flow/orgs/maep/form_instances?survey_id=709692367&form_id=715752508&cursor=CjASKmoOZX5ha3ZvZmxvdy0xODJyGAsSDlN1cnZleUluc3RhbmNlGNeZpMcCDBgAIAA
GET: https://api-auth0.akvo.org/flow/orgs/maep/form_instances?survey_id=709692367&form_id=715752508&cursor=CjASKmoOZX5ha3ZvZmxvdy0xODJyGAsSDlN1cnZleUluc3RhbmNlGMvm2skCDBgAIAA
GET: https://api-auth0.akvo.org/flow/orgs/maep/form_instances?survey_id=709692367&form_id=715752508&cursor=CjASKmoOZX5ha3ZvZmxvdy0xODJyGAsSDlN1cnZleUluc3RhbmNlGOHK-s4CDBgAIAA
GET: https://api-auth0.akvo.org/flow/orgs/maep/form_instances?survey_id=709692367&form_id=715752508&cursor=CjASKmoOZX5ha3ZvZmxvdy0xODJyGAsSDlN1cnZleUluc3RhbmNlGMSn09ACDBgAIAA
GET: https://api-auth0.akvo.org/flow/orgs/maep/form_instances?survey_id=709692367&form_id=715752508&cursor=CjASKmoOZX5ha3ZvZmxvdy0xODJyGAsSDlN1cnZleUluc3RhbmNlGPH3zNECDBgAIAA
GET: https://api-auth0.ak

In [22]:
def get_monitoring_id_by_name(name:str):
    target = list(filter(lambda x: x.get("name").lower() == name.lower(), monitoring_forms))
    if len(target):
        return target[0].get("id")
    else:
        print(f"NOT FOUND: {name}")

In [None]:
## DATA COLLECTIONS ENDS HERE
# WE ARE GOING TO TRANSFORM HERE

In [51]:
monitoring_form_id = get_monitoring_id_by_name(MONITORING_FORM_NAME)
registration_data = data.get("715752508")
monitoring_data = data.get(monitoring_form_id)

In [114]:
registration = pd.DataFrame(registration_data)
monitoring = pd.DataFrame(monitoring_data)

In [115]:
def splitter(x, pos):
    administration = x.split("|")
    if pos > len(administration):
        return ""
    return  administration[pos]

def extract_area(x):
    if x != x:
        return 0
    area_list = x.get("features")
    area = 0
    if area_list:
        for a in area_list:
            subarea = float(a.get("properties").get("area").replace(",","."))
            area += subarea
    return area

In [116]:
registration["Pole"] = registration["Localisation administrative"].apply(lambda x: splitter(x, 0))
registration["Département"] = registration["Localisation administrative"].apply(lambda x: splitter(x, 1))
registration["Commune"] = registration["Localisation administrative"].apply(lambda x: splitter(x, 2))
registration["Arrondissement"] = registration["Localisation administrative"].apply(lambda x: splitter(x, 3))
registration["Village"] = registration["Localisation administrative"].apply(lambda x: splitter(x, 4))
registration["AREA"] = registration["Superficie mesurée de la parcelle (smartphone)"].apply(extract_area)

In [117]:
merged = registration.merge(monitoring, on='dataPointId', suffixes=('_registration', '_monitoring'))

In [124]:
aggregation = merged.groupby(["Commune"], as_index = False)["AREA"].agg(
    ['min','mean','max', 'sum'])

In [125]:
aggregation

Unnamed: 0_level_0,min,mean,max,sum
Commune,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ADJA OUERE,117.521439,507.107109,896.69278,1014.214
BANIKOARA,2.072357,272.464086,480.111511,1634.785
BANTE,0.01,296.47993,998.768082,3261.279
BASSILA,0.0,181.104914,597.062744,2535.469
COPARGO,0.389343,731.042938,1461.696533,1462.086
DASSA ZOUME,3.641281,16762.79466,53989.594879,754325.8
DJIDJA,84.152298,162703.379179,645725.605316,13016270.0
DJOUGOU,98.547745,735.677158,2087.1259,3678.386
GLAZOUE,0.0,64.301392,167.238983,192.9042
GOGOUNOU,0.0,521.524609,873.683044,2607.623


In [126]:
registration.groupby(["Pole","Département","Commune","Arrondissement"], as_index = False)["AREA"].agg(
    ['mean','sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,mean,sum
Pole,Département,Commune,Arrondissement,Unnamed: 4_level_1,Unnamed: 5_level_1
PDA1,ALIBORI,KARIMAMA,BIRNI LAFIA,0.0,0.0
PDA1,ALIBORI,KARIMAMA,KARIMAMA,126.481842,252.9637
PDA2,ALIBORI,BANIKOARA,BANIKOARA,480.111511,480.1115
PDA2,ALIBORI,BANIKOARA,FOUNOUGO,117.894318,235.7886
PDA2,ALIBORI,BANIKOARA,SOROKO,436.7005,436.7005
PDA2,ALIBORI,GOGOUNOU,GOGOUNOU,873.683044,873.683
PDA2,ALIBORI,GOGOUNOU,SORI,433.485,866.97
PDA2,ALIBORI,KANDI,KANDI 1,652.588409,1305.177
PDA2,ALIBORI,SEGBANA,SEGBANA,3.234924,3.234924
PDA2,ATACORA,KEROU,KEROU,580.25766,580.2577
