# Create country profiles

In [59]:
import csv
import pandas as pd
import json
import urllib3  # allows to access a URL with python
import urllib
import re
import statistics
import math
import os
import numpy as np

### User parameters

In [47]:
release = '2019.Q2.G.01' # Make sure to have the correct release here

dir_path = os.path.dirname(os.path.realpath('__file__'))
print(dir_path)

wd_dir = r'../../data/unsd/countryProfiles'
print('data inputs dir: ' + wd_dir)

C:\Users\L.GonzalezMorales\Documents\GitHub\FIS4SDGs\notebooks\unsdCountryProfiles
data inputs dir: ../../data/unsd/countryProfiles


## Utilities

#### Compute a hash of a dictionary

In [48]:
def dict_hash(d):
    out = hashlib.md5()
    for key, value in d.items():
        out.update(key.encode('utf-8'))
        out.update(value.encode('utf-8'))
    return out.hexdigest()


#### Get unique dictionaries in a list

In [49]:
def unique_dicts(dictionary_list):

    uniques_map = {}

    for d in dictionary_list:
        uniques_map[dict_hash(d)] = d

    return list(uniques_map.values())


#### Extract subset of key-value pairs from Python dictionary object

In [50]:
def subdict_list(dict_list, keys_list, exclude = False):
    sub_d_list = []
    if exclude:
        for d in dict_list:
            sub_d= {k: d[k] for k in d.keys() if k not in keys_list}
            sub_d_list.append(sub_d)
    else:
        for d in dict_list:
            sub_d= {k: d[k] for k in keys_list}
            sub_d_list.append(sub_d)
    
    return sub_d_list




#### Get a dict from a list based on something inside the dict

In [51]:
def select_dict(dict_list, k, v):
    selected = []
    for d in dict_list:
        if d[k] == v:
            selected.append(d)
    return selected

### Read fact-builder conditions

In [52]:
fact_builder = []
with open(wd_dir + r'/input/CountryProfileBuilder_new.csv', newline = '') as countryProfileBuilder:                                                                                          
    country_profile_builder = csv.DictReader(countryProfileBuilder, delimiter=',')
    for row in country_profile_builder:
        fact_builder.append(dict(row))
        
print(fact_builder[1])

#fact_builder


{'countryProfile': '1', 'goal': '1', 'target': '1.2', 'indicator': '1.2.1', 'seriesCode': 'SI_POV_NAHC', 'dashboardId': '62f44e7da0864b1eb6f9d4bb5f9d2f54', 'seriesDesc': 'Proportion of population living below the national poverty line (%)', 'sliceDesc': 'Proportion of population living below the national poverty line (percent)', 'ageCode': '', 'locationCode': '_T', 'sexCode': '', 'boundsCode': '', 'educationLevelCode': '', 'typeOfProductCode': '', 'typeOfMobileTechnologyCode': '', 'typeOfSpeedCode': '', 'zero.removal': '1', 'Text.type': '1', 'unit1': '%', 'unit2': '', 'DA2.1': 'The proportion of population living below the national poverty line was approximately', 'DA2.2': '', 'DA3.1': 'The proportion of population living below the national poverty line', 'DA3.2': '', 'Down': ' declined from ', 'Up': ' increased from ', 'Threshold': ''}


### Get catalogue of UNSD items on SDG open data groups

In [53]:
with open(r'..\..\data\unsd\unsd_catalogue_20190711.json') as json_file:  
    hub_catalogue = json.load(json_file)
    
hub_catalogue[0]['series'][0]

{'seriesCode': 'SI_POV_EMP1',
 'seriesName': 'Employed population below international poverty line  by sex and age (%)',
 'release': '2019.Q2.G.01',
 'id': '254e360b5e434665b99233dd1d6747cb',
 'owner': 'unstats_admin',
 'created': 1562809876000,
 'modified': 1562837282000,
 'title': 'Indicator 1.1.1: Employed population below international poverty line, by sex and age (percent)',
 'type': 'Feature Service',
 'tags': ['poverty line',
  'poverty',
  'standard of living',
  'basic needs',
  'decent work',
  '2019.Q2.G.01'],
 'url': 'https://services7.arcgis.com/gp50Ao2knMlOM89z/arcgis/rest/services/SI_POV_EMP1_1_1_1_2019Q2G01/FeatureServer'}

## Analyze fact builder against unsd catalogue 

In [54]:
for f in fact_builder:
    f['hubID'] = None
    for g in hub_catalogue:
        if f['goal'] != str(g['goal']):
            continue
        for s in g['series']:
            if s['seriesCode'] != f['seriesCode']:
                continue
            f['hubID'] = s['id']

In [57]:
#for f in fact_builder:
    #display(f['seriesCode'])
    #display(f['hubID'])
    #print('----')

In [58]:
fact_builder[0]

{'countryProfile': '1',
 'goal': '1',
 'target': '1.1',
 'indicator': '1.1.1',
 'seriesCode': 'SI_POV_DAY1',
 'dashboardId': '62f44e7da0864b1eb6f9d4bb5f9d2f54',
 'seriesDesc': 'Proportion of population below international poverty line (%)',
 'sliceDesc': 'Population below international poverty line (percent)',
 'ageCode': '',
 'locationCode': '',
 'sexCode': '',
 'boundsCode': '',
 'educationLevelCode': '',
 'typeOfProductCode': '',
 'typeOfMobileTechnologyCode': '',
 'typeOfSpeedCode': '',
 'zero.removal': '1',
 'Text.type': '1',
 'unit1': '%',
 'unit2': '%',
 'DA2.1': 'The proportion of the population living below the extreme poverty line was approximately ',
 'DA2.2': '',
 'DA3.1': 'The proportion of the population living below the extreme poverty line',
 'DA3.2': '',
 'Down': ' declined from ',
 'Up': ' increased from ',
 'Threshold': '',
 'hubID': 'd80b4105e32a4832bbea4e0bd3dcbf40'}

In [61]:
fact_builder_df = pd.DataFrame.from_dict(fact_builder, orient='columns')

In [65]:
fact_builder_df.head(4)
fact_builder_df.to_csv(wd_dir + '/input/CountryProfileBuilder_20190711.csv', index=False)