# Create country profiles

In [36]:
import csv
import json
import urllib3  # allows to access a URL with python
import urllib
import re
import statistics
import math
import os
import numpy as np

### User parameters

In [37]:
release = '2019.Q2.G.01' # Make sure to have the correct release here

dir_path = os.path.dirname(os.path.realpath('__file__'))
print(dir_path)

wd_dir = r'../../data/unsd/countryProfiles'
print('data inputs dir: ' + wd_dir)

C:\Users\L.GonzalezMorales\Documents\GitHub\FIS4SDGs\notebooks\unsdCountryProfiles
data inputs dir: ../../data/unsd/countryProfiles


## Utilities

#### Compute a hash of a dictionary

In [38]:
def dict_hash(d):
    out = hashlib.md5()
    for key, value in d.items():
        out.update(key.encode('utf-8'))
        out.update(value.encode('utf-8'))
    return out.hexdigest()


#### Get unique dictionaries in a list

In [39]:
def unique_dicts(dictionary_list):

    uniques_map = {}

    for d in dictionary_list:
        uniques_map[dict_hash(d)] = d

    return list(uniques_map.values())


#### Extract subset of key-value pairs from Python dictionary object

In [40]:
def subdict_list(dict_list, keys_list, exclude = False):
    sub_d_list = []
    if exclude:
        for d in dict_list:
            sub_d= {k: d[k] for k in d.keys() if k not in keys_list}
            sub_d_list.append(sub_d)
    else:
        for d in dict_list:
            sub_d= {k: d[k] for k in keys_list}
            sub_d_list.append(sub_d)
    
    return sub_d_list




#### Get a dict from a list based on something inside the dict

In [41]:
def select_dict(dict_list, k, v):
    selected = []
    for d in dict_list:
        if d[k] == v:
            selected.append(d)
    return selected

### Read fact-builder conditions

In [42]:
fact_builder = []
with open(wd_dir + r'/input/CountryProfileBuilder_new.csv', newline = '') as countryProfileBuilder:                                                                                          
    country_profile_builder = csv.DictReader(countryProfileBuilder, delimiter=',')
    for row in country_profile_builder:
        fact_builder.append(dict(row))
        
print(fact_builder[1])

#fact_builder


{'countryProfile': '1', 'goal': '1', 'target': '1.2', 'indicator': '1.2.1', 'seriesCode': 'SI_POV_NAHC', 'dashboardId': '62f44e7da0864b1eb6f9d4bb5f9d2f54', 'seriesDesc': 'Proportion of population living below the national poverty line (%)', 'sliceDesc': 'Proportion of population living below the national poverty line (percent)', 'ageCode': '', 'locationCode': '_T', 'sexCode': '', 'boundsCode': '', 'educationLevelCode': '', 'typeOfProductCode': '', 'typeOfMobileTechnologyCode': '', 'typeOfSpeedCode': '', 'zero.removal': '1', 'Text.type': '1', 'unit1': '%', 'unit2': '', 'DA2.1': 'The proportion of population living below the national poverty line was approximately', 'DA2.2': '', 'DA3.1': 'The proportion of population living below the national poverty line', 'DA3.2': '', 'Down': ' declined from ', 'Up': ' increased from ', 'Threshold': ''}


### Get catalogue of UNSD items on SDG open data groups

In [43]:
with open(r'..\..\data\unsd\unsd_catalogue_20190711.json') as json_file:  
    hub_catalogue = json.load(json_file)
    
hub_catalogue[0]['series'][0]

{'seriesCode': 'SI_POV_EMP1',
 'seriesName': 'Employed population below international poverty line  by sex and age (%)',
 'release': '2019.Q2.G.01',
 'id': '254e360b5e434665b99233dd1d6747cb',
 'owner': 'unstats_admin',
 'created': 1562809876000,
 'modified': 1562837282000,
 'title': 'Indicator 1.1.1: Employed population below international poverty line, by sex and age (percent)',
 'type': 'Feature Service',
 'tags': ['poverty line',
  'poverty',
  'standard of living',
  'basic needs',
  'decent work',
  '2019.Q2.G.01'],
 'url': 'https://services7.arcgis.com/gp50Ao2knMlOM89z/arcgis/rest/services/SI_POV_EMP1_1_1_1_2019Q2G01/FeatureServer'}

## Analyze fact builder against unsd catalogue 

In [44]:
for f in fact_builder:
    f['hubID'] = None
    for g in hub_catalogue:
        if f['goal'] != str(g['goal']):
            continue
        for s in g['series']:
            if s['seriesCode'] != f['seriesCode']:
                continue
            f['hubID'] = s['id']

In [45]:
for f in fact_builder:
    display(f['seriesCode'])
    display(f['hubID'])
    print('----')

'SI_POV_DAY1'

'd80b4105e32a4832bbea4e0bd3dcbf40'

----


'SI_POV_NAHC'

'ee9dd63027a442cb9af00d1d289a14e4'

----


'SH_STA_OVRWGT'

'17d4351d412c4c05a0e45c31f5cc1385'

----


'SH_DYN_MORT'

'f7f1ee3e71f14210a777d25aa95b846c'

----


'SH_DTH_NCOM'

'68674946646d4303a8429ca180b14481'

----


'SG_GEN_PARL'

'29b8dbe8209c4966802edf3a38623f2c'

----


'EG_ELC_ACCS'

'efe5c0628d4749b38b4be596740e2956'

----


'NY_GDP_PCAP'

'97f4c9aba77f4fed8f217784dfdd649f'

----


'SL_TLF_UEM'

'c386199f78d041d5b1c7b5403ed6bc52'

----


'EN_ATM_CO2GDP'

'36ff192d48504b34bbc8af3ff30f565b'

----


'EN_LND_SLUM'

'27d1c1e5ebc6410597603e9bf52829f3'

----


'EN_MAT_DOMCMPC'

'b224da3fb5df4cbe83dc8b20de71dc89'

----


'AG_LND_FRST'

'2f7acf84fdad47d183f7e2041b474e02'

----


'DT_TDS_DECT'

'e061fd67f9f84933addcd27ca6154354'

----


'SI_POV_EMP1'

'254e360b5e434665b99233dd1d6747cb'

----


'SH_STA_STUNT'

'c4bdefff898c4183a01dcdc6d085ff26'

----


'SE_PRE_PARTN'

'8bb6d84e7a5a4253ba5dba1c9144439b'

----


'SP_DYN_MRBF18'

'c87948a7e6ab4aec92abffb70bab4e7c'

----


'SH_H2O_SAFE'

'db95089773654b6898ef4af01c440140'

----


'SH_SAN_DEFECT'

'6d84807ee23f4ddbaf2b9ba0768c8a12'

----


'SH_SAN_SAFE'

'961186c499834224bb09e32f68092855'

----


'IT_MOB_NTWK'

'bcf2ed2c448a4a3bafe7ea8951d4a259'

----


'SL_EMP_GTOTL'

'54681bb23185478eb1e01e147bf32f5d'

----


'SG_REG_BRTH'

'359a92a4ce7545a7a6614a31440f9f5a'

----


'SE_GPI_MATACH'

'9e748f270b064e76b062cba1d81982a5'

----


'SE_GPI_REAACH'

'164c07649d7d4c0fa50ea2175619ba61'

----


'SN_ITK_DEFC'

'08f4e15e1afe43d8a9bbeeae7f4bd3ca'

----


'EN_MAT_DOMCMPG'

'7794a89241654512bc4117205a9d6d00'

----


'ER_PTD_FRWRT'

'9fcf31b0eb6b40ddb8a4c963bf78d761'

----


'ER_PTD_TERRS'

'50b0017bc20a400e8bfb92da7591a28b'

----


'EG_EGY_CLEAN'

'1dc5d53ca0834e8da4594e86180cf55e'

----


'ER_MRN_MARIN'

'f205b31db8bf44ea9f8d23b41cd4a9a9'

----


'GB_XPD_RSDV'

'06bbe00c627d46f884b9caf36413ad97'

----


'BX_TRF_PWKR'

'00e65cf61d8042f591c27ddb56d10063'

----


'EN_ATM_PM25'

'e9df4c5818464ca89b1f0cc403897836'

----


'VC_IHR_PSRC'

'f0b52ccea69a48b09df3f5fd58e58e84'

----


'SG_DSR_LGRGSR'

'184b20498cbc4f48831c47e5d9433d4c'

----


'SG_STT_NSDSFND'

'f503b6296a1b48439c57788ac9a10619'

----


'SH_HIV_INCD'

'6e8b1982b7a643b9bb1906a8edfd31a2'

----


'SH_STA_MMR'

'1e40231c75c64219952a92c61d916452'

----


'IT_NET_BBP'

'9e7ac88c99e1423abcac29a5f5a3ab28'

----


'SH_STA_WASTE'

'1206e053761f4924869688ab3bb6a53c'

----


'SH_TBS_INCID'

'3a49aa2198544ff5ad4f856f1a71135b'

----


'SH_STA_MALR'

'6070453223094596870eb5f16996d0bb'

----


'SE_GPI_TRATEA'

'2220f2631b044c80811e3209baed8871'

----


'IC_GEN_MGTL'

'b23f560693c240b29009e570903b5ff2'

----


'SH_SAN_DEFECT'

'6d84807ee23f4ddbaf2b9ba0768c8a12'

----


'SH_SAN_DEFECT'

'6d84807ee23f4ddbaf2b9ba0768c8a12'

----


'SH_SAN_SAFE'

'961186c499834224bb09e32f68092855'

----


'SH_SAN_SAFE'

'961186c499834224bb09e32f68092855'

----


'SL_TLF_UEM'

'c386199f78d041d5b1c7b5403ed6bc52'

----


'SL_TLF_UEM'

'c386199f78d041d5b1c7b5403ed6bc52'

----


'SL_TLF_UEM'

'c386199f78d041d5b1c7b5403ed6bc52'

----


'SG_STT_NSDSIMPL'

'007e996bd74146de86baa15033995ee2'

----
