## Init Dependencies

In [68]:
import requests
import os
import pandas as pd
from datetime import datetime
import json

### Workspace Setup

In [69]:
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

## Set Static Variables

In [92]:
URL = 'http://rsr.akvo.org/rest/v1/'
PROJECT_ID = '7924'
RSR_TOKEN = os.environ['RSR_TOKEN']
FMT = '/?format=json&limit=1'
FMT100 = '/?format=json&limit=100'

## Set Authentication

In [71]:
headers = {
    'content-type': 'application/json',
    'Authorization': RSR_TOKEN
}

## Helper Functions

In [72]:
def get_response(endpoint, param, value):
    uri = '{}{}{}&{}={}'.format(URL, endpoint, FMT100, param, value)
    print(get_time() + ' Fetching - ' + uri)
    data = requests.get(uri, headers=headers)
    data = data.json()
    return data

In [73]:
def get_time():
    now = datetime.now().time().strftime("%H:%M:%S")
    return now

In [74]:
def get_sibling_id(x):
    for k,v in x.items():
        return k

In [75]:
def get_report_type(ps,pe):
    rt = {'is_yearly':False}
    psm = ps.split('-')[1]
    pem = pe.split('-')[1]
    if psm == '01' and pem == '12':
        rt = {'is_yearly':True}
    if psm == '01' and pem == '01':
        rt = {'is_yearly':True}
    return rt

In [76]:
def get_dimension_country(dv):
    dp = dv['value'].split(' - ')
    dv = {}
    if dp[0].lower() in ['zambia','malawi','mozambique']:
        dv.update({
            'commodity':'',
            'country':dp[0],
            'has_commodity':False,
            'has_country':True
        })
    else:
        dv.update({
            'commodity':dp[0],
            'country':'',
            'has_commodity':False,
            'has_country':True
        })
    if len(dp) == 2:
        dv.update({
            'commodity':dp[0],
            'country':dp[1],
            'has_commodity':True,
            'has_country':True
        })
    return dv

## Find Related Project

In [77]:
related_project = get_response('related_project','related_project',PROJECT_ID)

16:39:14 Fetching - http://rsr.akvo.org/rest/v1/related_project/?format=json&limit=100&related_project=7924


In [78]:
results_framework_list = list(pd.DataFrame(related_project['results'])['project'])

### Trace All Children (Alternative)

In [98]:
all_results_framework = []
def trace_childrens(project_id):
    related = get_response('related_project','related_project',project_id)        
    if len(related['results']) > 0:
        for result in related['results']:
            all_results_framework.append(result)
            trace_childrens(result['project'])
    else:
        return all_results_framework

### Concat Results Frameworks (Alternative)

In [99]:
def trace_onechildren():
    results_framework = []
    for i, rf in enumerate(results_framework_list):
        result_framework = get_response('results_framework','project',rf)['results']
        if i == 0:
            results_framework = result_framework
        else:
            for res in result_framework:
                results_framework.append(res)
    return results_framework

### Only Parents

In [100]:
results_framework = []
def no_trace():
    results_framework = get_response('results_framework','project',PROJECT_ID)['results']
    return results_framework

In [130]:
results_framework = no_trace()

16:58:38 Fetching - http://rsr.akvo.org/rest/v1/results_framework/?format=json&limit=100&project=7924


## Begin Transformations

In [131]:
results_framework = pd.DataFrame(results_framework)

### Remove Project Without Childs

In [132]:
results_framework['child_projects'] = results_framework['child_projects'].apply(get_sibling_id)

In [133]:
#results_framework = results_framework[results_framework['child_projects'].notnull()]

In [134]:
results_framework = results_framework.to_dict('records')

In [135]:
indicators = []
periods = []
dimension_names = []
dimension_values = []
data_disaggregations = []
for result_framework in results_framework:
    rf_id = {'result':result_framework['id']}
    for indicator in result_framework['indicators']:
        indicator_id = indicator['id']
        for period in indicator['periods']:
            is_yearly = get_report_type(period['period_start'],period['period_end'])
            period.update(is_yearly)
            period.update(rf_id)
            period.update({'indicator':indicator_id})
            periods.append(period)
            for data in period['data']:
                if len(data) > 0:
                    period_id = data['period']
                    for disaggregations in data['disaggregations']:
                        disaggregations.update({'data_id': data['id']})
                        disaggregations.update({'period_id': data['period']})
                        data_disaggregations.append(disaggregations)
        del indicator['periods']
        for dimension_name in indicator['dimension_names']:
            for dimension_value in dimension_name['values']:
                dimension_value.update(rf_id)
                dimension_update = get_dimension_country(dimension_value)
                dimension_value.update(dimension_update)
                dimension_values.append(dimension_value)
            del dimension_name['values']
            dimension_name.update(rf_id)
            dimension_name.update({'indicator':indicator_id})
            dimension_names.append(dimension_name)
        del indicator['dimension_names']
        indicators.append(indicator)

### Joining Dimension Values with Data Disaggregations

In [157]:
dimension_values = pd.DataFrame(dimension_values).groupby(['id']).first().reset_index()

In [160]:
dimension_values

Unnamed: 0,id,value,name,parent_dimension_value,result,commodity,country,has_commodity,has_country
0,1385,Number of improved seed varieties,576,1232,37006,Number of improved seed varieties,,False,True
1,1386,Maize,576,1233,37006,Maize,,False,True
2,1387,Rice,576,1234,37006,Rice,,False,True
3,1388,Legumes,576,1235,37006,Legumes,,False,True
4,1389,Cassava,576,1236,37006,Cassava,,False,True
5,1390,"Number of improved agronomic, pest and disease...",576,1237,37006,"Number of improved agronomic, pest and disease...",,False,True
6,1391,"Number of improved post-harvest storage, labor...",576,1238,37006,"Number of improved post-harvest storage, labor...",,False,True
7,1392,Lead farmer,577,1239,37006,Lead farmer,,False,True
8,1393,Follower farmer,577,1240,37006,Follower farmer,,False,True
9,1394,Female farmer,577,1241,37006,Female farmer,,False,True


In [158]:
data_disaggregations = pd.DataFrame(data_disaggregations).merge(dimension_values, how='inner', left_on='dimension_value', right_on='id')

In [182]:
data_disaggregations = data_disaggregations.rename(columns={
    'value_x':'aggr_value',
    'id_y':'id',
    'id_x':'data_id'
})

In [183]:
data_disaggregations

Unnamed: 0,data_id,dimension_name,created_at,last_modified_at,aggr_value,numerator,denominator,narrative,incomplete_data,dimension_value,update,data_id.1,period_id,id,value_y,name,parent_dimension_value,result,commodity,country,has_commodity,has_country
0,3892,576,2019-08-22T11:39:54.708977,2019-08-22T11:39:54.709009,55.0,,,,True,1385,23093,23093,288162,1385,Number of improved seed varieties,576,1232,37006,Number of improved seed varieties,,False,True
1,3899,576,2019-08-22T11:41:10.613311,2019-08-22T11:41:10.613338,11.0,,,,True,1385,23094,23094,288176,1385,Number of improved seed varieties,576,1232,37006,Number of improved seed varieties,,False,True
2,3910,576,2019-08-22T11:44:54.807644,2019-08-22T11:44:54.807691,3.0,,,,True,1385,23098,23098,288218,1385,Number of improved seed varieties,576,1232,37006,Number of improved seed varieties,,False,True
3,3893,576,2019-08-22T11:39:54.735719,2019-08-22T11:39:54.735750,12.0,,,,True,1386,23093,23093,288162,1386,Maize,576,1233,37006,Maize,,False,True
4,3900,576,2019-08-22T11:41:10.639458,2019-08-22T11:41:10.639499,4.0,,,,True,1386,23094,23094,288176,1386,Maize,576,1233,37006,Maize,,False,True
5,3911,576,2019-08-22T11:44:54.832976,2019-08-22T11:44:54.833004,0.0,,,,True,1386,23098,23098,288218,1386,Maize,576,1233,37006,Maize,,False,True
6,3894,576,2019-08-22T11:39:54.761077,2019-08-22T11:39:54.761105,9.0,,,,True,1387,23093,23093,288162,1387,Rice,576,1234,37006,Rice,,False,True
7,3901,576,2019-08-22T11:41:10.664501,2019-08-22T11:41:10.664528,3.0,,,,True,1387,23094,23094,288176,1387,Rice,576,1234,37006,Rice,,False,True
8,3912,576,2019-08-22T11:44:54.855823,2019-08-22T11:44:54.855854,2.0,,,,True,1387,23098,23098,288218,1387,Rice,576,1234,37006,Rice,,False,True
9,3895,576,2019-08-22T11:39:54.787192,2019-08-22T11:39:54.787229,34.0,,,,True,1388,23093,23093,288162,1388,Legumes,576,1235,37006,Legumes,,False,True


## Test periods

In [184]:
test = pd.DataFrame(periods)

In [185]:
test['empty_disagregation'] = test['disaggregation_targets'].apply(lambda x: 1 if len(x) > 1 else 0)

In [186]:
test = test[test['empty_disagregation'] == 1]

In [187]:
test = test.reset_index()

In [188]:
test['disaggregation_targets'][1]

[{'period': 288162, 'id': 166, 'value': 0.0, 'dimension_value': 1389},
 {'period': 288162, 'id': 167, 'value': 4.0, 'dimension_value': 1391},
 {'period': 288162, 'id': 168, 'value': 23.0, 'dimension_value': 1388},
 {'period': 288162, 'id': 169, 'value': 17.0, 'dimension_value': 1390},
 {'period': 288162, 'id': 170, 'value': 55.0, 'dimension_value': 1385},
 {'period': 288162, 'id': 171, 'value': 8.0, 'dimension_value': 1387},
 {'period': 288162, 'id': 172, 'value': 24.0, 'dimension_value': 1386}]

### About Period

- Has many Data
- Belongs to Indicator
- Data has Many Disaggregation

_Data Format_ :

In [None]:
with open('example_data_rsr_indicator.json') as example:
    print(example)

### About Disaggregation Target

- Specific Per-projects
- Doesn't have any effect to children or parent projects
- Only for verification of Actual Value
- Belongs to Period
- Has many dimension value

## Update Result Framework

In [189]:
periods_df = pd.DataFrame(periods)
periods_df = periods_df.groupby(['is_yearly','result']).size().to_frame('size').reset_index().to_dict('records')

In [190]:
reports_annual = []
reports_semester = []
reports_both = []

In [191]:
for period_df in periods_df:
    if period_df['is_yearly']:
        reports_annual.append(period_df['result'])
    else:
        reports_semester.append(period_df['result'])
for y in reports_annual:
    for s in reports_semester:
        if y == s:
            reports_both.append(y)
for m in reports_both:
    reports_annual.remove(m)
    reports_semester.remove(m)

In [192]:
reports_both

[37006]

In [193]:
results_framework_new = []
for rf in results_framework:
    report_type = 'both'
    if rf['id'] in reports_annual:
        report_type = 'annual'
    if rf['id'] in reports_semester:
        report_type = 'semeseter'
    rf.update({'report_type':report_type})
    #try:
    #    child_project = get_sibling_id(rf['child_projects'])
    #    rf.update({'child_projects': child_project})
    #except:
    #    rf.update({'child_projects': None})
    try:
        parent_project = get_sibling_id(rf['parent_project'])
        rf.update({'parent_project': parent_project})
    except:
        rf.update({'parent_project': None})
    del rf['indicators']
    results_framework_new.append(rf)

### API Response

In [196]:
response = {
    'results_framework':results_framework_new,
    'indicators':indicators,
    'periods':periods,
    'dimension_names':dimension_names,
    'dimension_values':dimension_values,
    'dimension_data':data_disaggregations
}

In [477]:
d_indicators = pd.DataFrame(response['indicators'])

In [478]:
d_periods = pd.DataFrame(response['periods'])

In [481]:
d_periods['period_time'] = d_periods['period_start'] + ' - ' + d_periods['period_end']

In [482]:
d_periods = d_periods.groupby(['id',
                               'is_yearly',
                               'indicator',
                               'percent_accomplishment',
                               'period_time',
                               'target_value',
                               'actual_value']).size().to_frame('period_total').reset_index()

In [483]:
d_periods = d_periods.merge(d_indicators,
                            how='inner', 
                            left_on='indicator',
                            right_on='id').groupby(['id_x',
                                                    'id_y',
                                                    'title',
                                                    'period_time',
                                                    'description',
                                                    'indicator',
                                                    'target_value',
                                                    'percent_accomplishment',
                                                    'actual_value']).size().to_frame('total').reset_index()

In [484]:
d_periods = d_periods.drop(columns='id_y').rename(columns={'id_x':'id'})

In [485]:
d_periods

Unnamed: 0,id,title,period_time,description,indicator,target_value,percent_accomplishment,actual_value,total
0,288162,PDO 1 Number of technologies that are being ma...,2017-01-01 - 2017-12-31,"Technology refers to the tools, methods or mac...",80788,76,94.7,72.0,1
1,288176,PDO 3 Number of technologies generated or prom...,2017-01-01 - 2017-12-31,“Technology generated” - innovation developed....,80790,15,120.0,18.0,1
2,288197,Number of collaborative research and developme...,2017-01-01 - 2017-12-31,,80792,30,73.3,22.0,1
3,288204,Percentage of collaborative research and devel...,2017-01-01 - 2017-12-31,,80793,80,60.0,48.0,1
4,288218,Total number of improved technologies formally...,2017-01-01 - 2017-12-31,,80795,43,7.0,3.0,1
5,288225,Percentage of APPSA funded R&D dissemination p...,2017-01-01 - 2017-12-31,,80796,30,173.3,52.0,1
6,288239,Number of staff trained per research center,2017-01-01 - 2017-12-31,,80798,47,5104.3,2399.0,1
7,288246,Total # of funded scholarship holder that have...,2017-01-01 - 2017-12-31,,80799,25,16.0,4.0,1
8,288274,Overall implementation performance of change m...,2017-01-01 - 2017-12-31,,80803,4,25.0,1.0,1


## Output Schema

In [486]:
d_names = pd.DataFrame(response['dimension_names'])
d_names = d_names.rename(columns={'name':'disaggregation_type'})

In [487]:
d_results = pd.DataFrame(response['results_framework'])

In [488]:
d_names = d_names.merge(d_periods, how='inner', left_on='indicator', right_on='indicator')

In [489]:
d_names

Unnamed: 0,id_x,disaggregation_type,project,parent_dimension_name,result,indicator,id_y,title,period_time,description,target_value,percent_accomplishment,actual_value,total
0,576,Type of technology,7924,540,37006,80788,288162,PDO 1 Number of technologies that are being ma...,2017-01-01 - 2017-12-31,"Technology refers to the tools, methods or mac...",76,94.7,72.0,1
1,576,Type of technology,7924,540,37006,80790,288176,PDO 3 Number of technologies generated or prom...,2017-01-01 - 2017-12-31,“Technology generated” - innovation developed....,15,120.0,18.0,1
2,1656,Type of research project,7924,1653,37007,80792,288197,Number of collaborative research and developme...,2017-01-01 - 2017-12-31,,30,73.3,22.0,1
3,1656,Type of research project,7924,1653,37007,80793,288204,Percentage of collaborative research and devel...,2017-01-01 - 2017-12-31,,80,60.0,48.0,1
4,576,Type of technology,7924,540,37007,80795,288218,Total number of improved technologies formally...,2017-01-01 - 2017-12-31,,43,7.0,3.0,1
5,575,Type of training,7924,539,37008,80798,288239,Number of staff trained per research center,2017-01-01 - 2017-12-31,,47,5104.3,2399.0,1
6,579,Gender RCoL,7924,543,37008,80798,288239,Number of staff trained per research center,2017-01-01 - 2017-12-31,,47,5104.3,2399.0,1
7,1661,Type of scholarship,7924,1658,37008,80799,288246,Total # of funded scholarship holder that have...,2017-01-01 - 2017-12-31,,25,16.0,4.0,1


### Childs Aggregation

In [490]:
d_data = pd.DataFrame(response['dimension_data']).drop(['result','value_y','has_commodity','has_country'], axis=1)

In [491]:
d_names = d_names[['id_y',
                   'indicator',
                   'title',
                   'description',
                   'disaggregation_type',
                   'period_time',
                   'target_value',
                   'actual_value',
                   'percent_accomplishment'
                  ]].rename(columns={'id_y':'period_id'})

In [492]:
d_data = d_names.merge(d_data, how='inner', left_on='period_id',right_on='period_id')

In [494]:
d_data = d_data[[
    'title',
    'description',
    'disaggregation_type',
    'period_time',
    'target_value',
    'actual_value',
    'percent_accomplishment',
    'commodity',
    'country',
    'aggr_value'
]].rename(columns={'commodity':'dimension'}).sort_index()

In [496]:
d_data.groupby([
    'title',
    'description',
    'disaggregation_type',
    'period_time',
    'target_value',
    'actual_value',
    'percent_accomplishment',
    'dimension',
    'country',
    'aggr_value'
]).first()

title,description,disaggregation_type,period_time,target_value,actual_value,percent_accomplishment,dimension,country,aggr_value
Number of collaborative research and development projects under implementation,,Type of research project,2017-01-01 - 2017-12-31,30,22.0,73.3,Technology dissemination,,9.0
Number of collaborative research and development projects under implementation,,Type of research project,2017-01-01 - 2017-12-31,30,22.0,73.3,Technology generation,,13.0
Number of staff trained per research center,,Gender RCoL,2017-01-01 - 2017-12-31,47,2399.0,5104.3,Female,,1008.0
Number of staff trained per research center,,Gender RCoL,2017-01-01 - 2017-12-31,47,2399.0,5104.3,Male,,1391.0
Number of staff trained per research center,,Gender RCoL,2017-01-01 - 2017-12-31,47,2399.0,5104.3,management and leadership training,,23.0
Number of staff trained per research center,,Gender RCoL,2017-01-01 - 2017-12-31,47,2399.0,5104.3,technical research and dissemination training,,1916.0
Number of staff trained per research center,,Gender RCoL,2017-01-01 - 2017-12-31,47,2399.0,5104.3,training in administrative processes,,460.0
Number of staff trained per research center,,Type of training,2017-01-01 - 2017-12-31,47,2399.0,5104.3,Female,,1008.0
Number of staff trained per research center,,Type of training,2017-01-01 - 2017-12-31,47,2399.0,5104.3,Male,,1391.0
Number of staff trained per research center,,Type of training,2017-01-01 - 2017-12-31,47,2399.0,5104.3,management and leadership training,,23.0


### All APPSA

In [258]:
d_values = pd.DataFrame(response['dimension_values']).drop(['result','value','has_commodity','has_country'], axis=1)

In [259]:
d_values = d_values.merge(d_names, how='inner', left_on='name', right_on='id')
d_values = d_values.merge(d_results, how='inner', left_on='result', right_on='id')

In [260]:
d_values = d_values.rename(columns={
    'title_x':'indicator_title',
    'title_y': 'title'
})

In [261]:
d_values = d_values.groupby(['title','indicator_title','period_time','actual_value','disaggregation_type','commodity','country','report_type','child_projects']).size().to_frame('size').sort_index().reset_index()

In [262]:
#FINAL d_values.groupby(['title','child_projects','report_type','indicator_title','size','actual_value','disaggregation_type','country','commodity','period_time']).first()

In [263]:
d_values.groupby(['title','child_projects','report_type','indicator_title','size','actual_value','disaggregation_type','country','commodity','period_time']).first()

## Test Reports

In [None]:
test_report = d_values[['title','indicator_title','actual_value','disaggregation_type','commodity','country','period_time']]

In [None]:
test_report = test_report.groupby(['title','indicator_title','actual_value','period_time','disaggregation_type','commodity','country']).first()

In [None]:
test_report

In [None]:
#test_report.to_excel('test_final_with_zuhdil_060919.xlsx')