<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Init-Config" data-toc-modified-id="Init-Config-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Init Config</a></span><ul class="toc-item"><li><span><a href="#Import-Dependencies" data-toc-modified-id="Import-Dependencies-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Import Dependencies</a></span></li><li><span><a href="#Define-Environment" data-toc-modified-id="Define-Environment-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Define Environment</a></span></li><li><span><a href="#Dependency-Settings" data-toc-modified-id="Dependency-Settings-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Dependency Settings</a></span></li></ul></li><li><span><a href="#Helpers" data-toc-modified-id="Helpers-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Helpers</a></span><ul class="toc-item"><li><span><a href="#Generate-Column-Type" data-toc-modified-id="Generate-Column-Type-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Generate Column Type</a></span></li><li><span><a href="#Format-Column-Name" data-toc-modified-id="Format-Column-Name-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Format Column Name</a></span></li><li><span><a href="#Compare-Two-Datasets" data-toc-modified-id="Compare-Two-Datasets-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Compare Two Datasets</a></span></li></ul></li><li><span><a href="#Main-Functions" data-toc-modified-id="Main-Functions-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Main Functions</a></span><ul class="toc-item"><li><span><a href="#Survey-Forms" data-toc-modified-id="Survey-Forms-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Survey Forms</a></span><ul class="toc-item"><li><span><a href="#Get-Data" data-toc-modified-id="Get-Data-3.1.1"><span class="toc-item-num">3.1.1&nbsp;&nbsp;</span>Get Data</a></span></li><li><span><a href="#Create-Table" data-toc-modified-id="Create-Table-3.1.2"><span class="toc-item-num">3.1.2&nbsp;&nbsp;</span>Create Table</a></span></li><li><span><a href="#Inspect-and-Check-Table" data-toc-modified-id="Inspect-and-Check-Table-3.1.3"><span class="toc-item-num">3.1.3&nbsp;&nbsp;</span>Inspect and Check Table</a></span></li><li><span><a href="#Transform-Data-&amp;-Define-Table-Type" data-toc-modified-id="Transform-Data-&amp;-Define-Table-Type-3.1.4"><span class="toc-item-num">3.1.4&nbsp;&nbsp;</span>Transform Data &amp; Define Table Type</a></span></li><li><span><a href="#Create-Columns-&amp;-Append-Meta" data-toc-modified-id="Create-Columns-&amp;-Append-Meta-3.1.5"><span class="toc-item-num">3.1.5&nbsp;&nbsp;</span>Create Columns &amp; Append Meta</a></span></li></ul></li></ul></li><li><span><a href="#Survey-Datapoints" data-toc-modified-id="Survey-Datapoints-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Survey Datapoints</a></span><ul class="toc-item"><li><span><a href="#Transform-Datapoints" data-toc-modified-id="Transform-Datapoints-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Transform Datapoints</a></span></li><li><span><a href="#Compare-Datapoints" data-toc-modified-id="Compare-Datapoints-4.2"><span class="toc-item-num">4.2&nbsp;&nbsp;</span>Compare Datapoints</a></span></li><li><span><a href="#Record-Datapoints" data-toc-modified-id="Record-Datapoints-4.3"><span class="toc-item-num">4.3&nbsp;&nbsp;</span>Record Datapoints</a></span></li></ul></li></ul></div>

# Init Config

In [1]:
import sys
import getpass
sys.path.append('..')

## Import Dependencies

In [2]:
import pandas as pd
import os
import re
from sqlalchemy import create_engine, text, inspect, MetaData, Table, Column, Integer, Float, String, Text
from sqlalchemy.orm import sessionmaker
from geoalchemy2 import Geography, WKTElement
from FlowHandler import FlowHandler
from Akvo import Flow

## Define Environment

In [3]:
PSQL_USER = getpass.getuser()
PSQL_PWD = os.environ['KEYCLOAK_PWD']
PSQL_DB = 'flow_test'

In [4]:
instanceURI = 'seap'
requestURI = 'https://api.akvo.org/flow/orgs/' + instanceURI
surveyID = '285250912'

## Dependency Settings

In [5]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

engine = create_engine("postgresql://{}:{}@localhost/{}".format(PSQL_USER, PSQL_PWD, PSQL_DB))
connection = engine.connect()
meta = MetaData(engine)

Session = sessionmaker()
Session.configure(bind=engine)
session = Session()

# Helpers

## Generate Column Type

In [6]:
def generate_column_type(ctype, cname):
    if ctype == 'GEO':
        return Column(cname, Geography(geometry_type='POINT', srid=4326))
    if ctype == 'NUMBER':
        return Column(cname, Float(), nullable=True)
    return Column(cname, Text(), nullable=True)

## Format Column Name

In [7]:
def format_column_name(name, identifier):
    name = regex.sub('', name).lower().replace(' ','_') 
    return '{}_{}'.format(name,identifier)

## Compare Two Datasets

In [8]:
def dataframe_difference(df1, df2):
    comparison_df = df1.merge(df2,
                              indicator=True,
                              how='outer')
    return comparison_df[comparison_df['_merge'] != 'both']

# Main Functions

## Survey Forms

### Get Data

In [9]:
surveyForm = Flow.getResponse('{}/surveys/{}'.format(requestURI, surveyID))
formDetails = surveyForm['forms'][0]
formInstancesUrl = formDetails['formInstancesUrl']
formInstances = Flow.getResponse(formInstancesUrl)
formInstancesData = formInstances['formInstances']
questionGroups = formDetails['questionGroups']

In [10]:
while 'nextPageUrl' in formInstances:
    print(formInstances['nextPageUrl'])
    nextPageData = Flow.getResponse(formInstances['nextPageUrl'])
    formInstancesData += nextPageData['formInstances']
    formInstances = nextPageData

https://api.akvo.org/flow/orgs/seap/form_instances?survey_id=312920912&form_id=288920912&cursor=Ci8SKWoNZX5ha3ZvZmxvdy02MnIYCxIOU3VydmV5SW5zdGFuY2UYpIv7mQEMGAAgAA


### Create Table

In [11]:
PSQL_TABLE = formDetails['name'].lower().replace(' ','_')
Table(PSQL_TABLE,meta,
          Column('id',Integer, primary_key=True, autoincrement=True),
          Column('datapoint_id',Integer))

Table('registration_form', MetaData(bind=Engine(postgresql://dedenbangkit:***@localhost/flow_test)), Column('id', Integer(), table=<registration_form>, primary_key=True, nullable=False), Column('datapoint_id', Integer(), table=<registration_form>), schema=None)

### Inspect and Check Table

In [12]:
try:
    columnInspector = inspect(engine).get_columns(PSQL_TABLE)
    colExisted = [x['name'] for x in columnInspector]
except:
    colExisted = []

### Transform Data & Define Table Type

In [13]:
questions = []
for group in questionGroups:
    data = {'group_id':group['id'], 'group_name': group['name']}
    for q in group['questions']:
        regex = re.compile('[,\.!?]')
        q['column_name'] = format_column_name(q['name'],q['id'])
        q['query'] = generate_column_type(q['type'],q['column_name'])
        q['create'] = not any(q['column_name'] in c for c in colExisted)
        q.update(data)
        questions.append(q)
questions = pd.DataFrame(questions)
create_columns = questions[['create','query']].to_dict('records')

### Create Columns & Append Meta

In [14]:
for col in create_columns:
    Table(PSQL_TABLE, meta, col['query'], extend_existing=True)
meta.create_all()

# Survey Datapoints

## Transform Datapoints

In [15]:
datapoints = []
for datapoint in formInstancesData:
    dataupdate = {'datapoint_id':int(datapoint['dataPointId'])}
    for group in questionGroups:
        gid = group['id']
        for response in datapoint['responses'][gid]:
            for q in group['questions']:
                cname = format_column_name(q['name'], q['id'])
                try:
                    answer = FlowHandler(response,q['id'],q['type'])
                    if q['type'] == 'GEO':
                        answer = WKTElement('POINT({} {})'.format(answer[1], answer[0]))
                except:
                    answer = None
                dataupdate.update({cname:answer})
    datapoints.append(dataupdate)

POINT(111.45035335345423 -0.02810165456234)
POINT(112.64164424354121 -7.27544384354543)
POINT(106.85136433543413 -6.22426776455634)
POINT(115.23634298 -8.67642098)
POINT(115.24004709 -8.6948913)
POINT(103.60154335465452 -1.60272164532343)
POINT(123.60568934354534 -10.1702217854943)
POINT(115.24005236104131 -8.694890434853733)
POINT(101.44619736465242 0.51482545634524)
POINT(122.50209874353212 -4.00472175435431)
POINT(115.23633138276637 -8.676439560949802)
POINT(115.23639136 -8.67644363)
POINT(110.37196936574631 -7.80315775643561)


## Compare Datapoints

In [16]:
new_data = pd.DataFrame(datapoints)
try:
    rows = connection.execute('SELECT * FROM {}'.format(PSQL_TABLE))
    old_data = [{key: value for (key, value) in row.items()} for row in rows]
    old_data = pd.DataFrame(old_data)
    old_data = old_data.drop(columns=['id'])
    new_data = old_data.merge(new_data, indicator=True, how='outer')
    new_data = new_data[new_data['_merge'] != 'both']
except:
    pass

## Record Datapoints

In [17]:
new_data = new_data.to_dict('records')
table_update = Table(PSQL_TABLE, meta)
connection.execute(table_update.insert(), new_data)

<sqlalchemy.engine.result.ResultProxy at 0x10d400400>

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Init-Config" data-toc-modified-id="Init-Config-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Init Config</a></span><ul class="toc-item"><li><span><a href="#Import-Dependencies" data-toc-modified-id="Import-Dependencies-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Import Dependencies</a></span></li><li><span><a href="#Define-Environment" data-toc-modified-id="Define-Environment-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Define Environment</a></span></li><li><span><a href="#Dependency-Settings" data-toc-modified-id="Dependency-Settings-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Dependency Settings</a></span></li></ul></li><li><span><a href="#Helpers" data-toc-modified-id="Helpers-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Helpers</a></span><ul class="toc-item"><li><span><a href="#Generate-Column-Type" data-toc-modified-id="Generate-Column-Type-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Generate Column Type</a></span></li><li><span><a href="#Format-Column-Name" data-toc-modified-id="Format-Column-Name-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Format Column Name</a></span></li><li><span><a href="#Compare-Two-Datasets" data-toc-modified-id="Compare-Two-Datasets-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Compare Two Datasets</a></span></li></ul></li><li><span><a href="#Main-Functions" data-toc-modified-id="Main-Functions-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Main Functions</a></span><ul class="toc-item"><li><span><a href="#Survey-Forms" data-toc-modified-id="Survey-Forms-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Survey Forms</a></span><ul class="toc-item"><li><span><a href="#Get-Data" data-toc-modified-id="Get-Data-3.1.1"><span class="toc-item-num">3.1.1&nbsp;&nbsp;</span>Get Data</a></span></li><li><span><a href="#Create-Table" data-toc-modified-id="Create-Table-3.1.2"><span class="toc-item-num">3.1.2&nbsp;&nbsp;</span>Create Table</a></span></li><li><span><a href="#Inspect-and-Check-Table" data-toc-modified-id="Inspect-and-Check-Table-3.1.3"><span class="toc-item-num">3.1.3&nbsp;&nbsp;</span>Inspect and Check Table</a></span></li><li><span><a href="#Transform-Data-&amp;-Define-Table-Type" data-toc-modified-id="Transform-Data-&amp;-Define-Table-Type-3.1.4"><span class="toc-item-num">3.1.4&nbsp;&nbsp;</span>Transform Data &amp; Define Table Type</a></span></li><li><span><a href="#Create-Columns-&amp;-Append-Meta" data-toc-modified-id="Create-Columns-&amp;-Append-Meta-3.1.5"><span class="toc-item-num">3.1.5&nbsp;&nbsp;</span>Create Columns &amp; Append Meta</a></span></li></ul></li></ul></li><li><span><a href="#Survey-Datapoints" data-toc-modified-id="Survey-Datapoints-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Survey Datapoints</a></span><ul class="toc-item"><li><span><a href="#Transform-Datapoints" data-toc-modified-id="Transform-Datapoints-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Transform Datapoints</a></span></li><li><span><a href="#Compare-Datapoints" data-toc-modified-id="Compare-Datapoints-4.2"><span class="toc-item-num">4.2&nbsp;&nbsp;</span>Compare Datapoints</a></span></li><li><span><a href="#Record-Datapoints" data-toc-modified-id="Record-Datapoints-4.3"><span class="toc-item-num">4.3&nbsp;&nbsp;</span>Record Datapoints</a></span></li></ul></li></ul></div>