# Create country profiles

In [1]:
import csv
import json
import re
import statistics
import math
import os

### User parameters

In [2]:
dir_path = os.path.dirname(os.path.realpath('__file__'))
print(dir_path)

wd_dir = r'../../data/unsd/countryProfiles'
syb_dir = r'../../data/unsd/UNSYB/output'
print('data inputs dir: ' + wd_dir)



# https://volderette.de/jupyter-notebook-tip-multiple-outputs/
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


C:\Users\L.GonzalezMorales\Documents\GitHub\FIS4SDGs\notebooks\unsdCountryProfiles
data inputs dir: ../../data/unsd/countryProfiles


## Utilities

#### Commercial rounding

In [3]:
def round_KFM(x, n):
    posneg = math.copysign(1, x)
    z = abs(x)*10**n
    z = z + 0.5
    z = math.trunc(z)
    z = z/10**n
    result = z * posneg
    return result
    
print(round_KFM(3.5123,2))
print(round_KFM(4.0,1))

3.51
4.0


#### Regular expression to capture numeric values (including those in scientific notation)
The regex is

```
-?      # an optional -
\d+     # a series of digits
(?:     # start non capturing group
  \.    # a dot
  \d+   # a series of digits
)?      
(?:     # start non capturing group
  e     # "e"
  -?    # an optional -
  \d+   # digits
)?
```


In [4]:
def numeric_part(v):
    numeric_part_f = re.compile(r'-?\d+(?:\.\d+)?(?:e-?\d+)?')
    x = numeric_part_f.findall(v)
    if len(x) > 0:
        return float(x[0])
    else:
        return None


In [5]:
print(numeric_part('<4'))
print(numeric_part('abs'))
print(numeric_part('-23.2e2'))
print(numeric_part('-23.2e2 or more'))

print(numeric_part('0.01'))

4.0
None
-2320.0
-2320.0
0.01


#### Compute a hash of a dictionary

In [6]:
def dict_hash(d):
    out = hashlib.md5()
    for key, value in d.items():
        out.update(key.encode('utf-8'))
        out.update(value.encode('utf-8'))
    return out.hexdigest()


#### Get unique dictionaries in a list

In [7]:
def unique_dicts(dictionary_list):

    uniques_map = {}

    for d in dictionary_list:
        uniques_map[dict_hash(d)] = d

    return list(uniques_map.values())


#### Extract subset of key-value pairs from Python dictionary object

In [8]:
def subdict_list(dict_list, keys_list, exclude = False):
    sub_d_list = []
    if exclude:
        for d in dict_list:
            sub_d= {k: d[k] for k in d.keys() if k not in keys_list}
            sub_d_list.append(sub_d)
    else:
        for d in dict_list:
            sub_d= {k: d[k] for k in keys_list}
            sub_d_list.append(sub_d)
    
    return sub_d_list




#### Get a dict from a list based on something inside the dict

In [9]:
def select_dict(dict_list, k, v):
    selected = []
    for d in dict_list:
        if d[k] == v:
            selected.append(d)
    return selected

### Create list of countries

In [10]:
countryArray = []
with open(r'..\..\globalResources\refAreas.txt', newline = '') as countryListXY:                                                                                          
    countryListXY = csv.DictReader(countryListXY, delimiter='\t')
    for row in countryListXY:
        countryArray.append(dict(row))
        
print(countryArray[1])

    
print("Total number or countries: ", len(countryArray))

{'X': '20.06660928', 'Y': '41.13897007', 'ISO3': 'ALB', 'M49': '8', 'areaName': 'Albania', 'UN_Member': '1', 'Country_Profile': '1'}
Total number or countries:  248


### Get catalogue of UN SYB series

In [18]:
syb_dir = r'..\..\data\unsd\UNSYB\output'

with open(r'..\..\data\unsd\UNSYB\output\SYB_Series_Catalog.json') as json_file:  
    syb_catalogue = json.load(json_file)
    
syb_catalogue[0].keys()
syb_catalogue[0]['tables'][0].keys()
syb_catalogue[0]['tables'][0]['series'][0].keys()

syb_catalogue[0]['tables'][0]['series'][0]

dict_keys(['topicId', 'topicNameEN', 'topicNameFR', 'tables'])

dict_keys(['tableCode', 'tableId', 'tableName', 'tableNote', 'tableStatusId', 'tbBkCode', 'tbBkPrint', 'series'])

dict_keys(['newSeriesName', 'units', 'multiplier', 'baseYear', 'SELECT', 'seriesCode', 'SYB_series'])

{'newSeriesName': 'Infant mortality ratio',
 'units': 'Number of deaths per 1,000 live births',
 'multiplier': 0,
 'baseYear': None,
 'SELECT': None,
 'seriesCode': 'SYB011',
 'SYB_series': [253]}

### Get catalogue of SYB items on SDG open data groups

In [19]:
with open(r'..\..\data\unsd\UNSYB\output\unsd_syb_catalogue_201907.json') as json_file:  
    hub_catalogue = json.load(json_file)
    
hub_catalogue[0]['series'][0]

{'seriesName': 'Asylum seekers  including pending cases',
 'publicationYear': '2018',
 'id': 'a1bc495b927c4873b6d314a0ce3b7bd1',
 'owner': 'unstats_admin',
 'created': 1561274445000,
 'modified': 1561310873000,
 'title': 'SYB indicator: Asylum seekers, including pending cases',
 'type': 'Feature Service',
 'tags': ['SYB'],
 'url': 'https://services7.arcgis.com/gp50Ao2knMlOM89z/arcgis/rest/services/SYB072_2018/FeatureServer',
 'seriesCode': 'SYB072'}

### Get SYB fact builder

In [20]:
fact_builder = []
with open(wd_dir + r'/input/SYB_CountryProfileBuilder_201907.csv', newline = '') as countryProfileBuilder:                                                                                          
    country_profile_builder = csv.DictReader(countryProfileBuilder, delimiter=',')
    for row in country_profile_builder:
        fact_builder.append(dict(row))
        
print(fact_builder[1])


{'countryProfile': '1', 'topicId': '2', 'topicName': 'Population and migration', 'tableId': '89', 'tableName': 'Population, density and surface area', 'seriesCode': 'SYB087', 'hub': '493ee3f35221493fa582b807034fe488', 'seriesDesc': 'Share of population by age group', 'units': 'Percent', 'multiplier': '0', 'baseYear': '', 'multiplierDesc': '', 'personnelTypeCode': '', 'personnelTypeDesc': '', 'activityCode': '', 'activityDesc': '', 'speciesCode': '', 'speciesDesc': '', 'ODACode': '', 'ODADesc': '', 'urbanizationCode': '', 'urbanizationDesc': '', 'ageCode': 'Y_GE65', 'ageDesc': '65 years old and over', 'sexCode': '', 'sexDesc': '', 'educationLevCode': '', 'educationLevDesc': '', 'Country_profiles': 'x', 'FactName': 'Share of population 65 years of age an over', 'Theme': 'Population and migration', 'Units': 'Percent'}


## Create SYB country facts

In [21]:
countryArray[0].keys()
countryArray[0]

dict_keys(['X', 'Y', 'ISO3', 'M49', 'areaName', 'UN_Member', 'Country_Profile'])

{'X': '66.02688198',
 'Y': '33.83160199',
 'ISO3': 'AFG',
 'M49': '4',
 'areaName': 'Afghanistan',
 'UN_Member': '1',
 'Country_Profile': '1'}

In [None]:
count_country = 0

for this_country in countryArray:
    #if this_country['M49'] != '4':
    #    continue
    
    if this_country['Country_Profile'] != '1':
        continue
    
    print('Building SYB country facts for: ' + this_country['M49'] + '-' + this_country['areaName'])
    
    count_country += 1
    
    country_syb_facts = {}
    country_code = str(this_country['M49'])
    country_name = this_country['areaName']
    
    country_syb_facts['country_code'] = country_code
    country_syb_facts['country_name'] = country_name
    country_syb_facts['UNMember'] = this_country['UN_Member']
    country_syb_facts['CountryProfile'] = this_country['Country_Profile']
    country_syb_facts['themes'] = [{'themeTitle':'Population and migration'},
                                   {'themeTitle':'National accounts and labour market'},
                                   {'themeTitle':'Trade and balance of payments'},
                                   {'themeTitle':'Environment'}]
    
    

    
    #----Loop through the series of the SYB fact builder:
    
    for th in country_syb_facts['themes']:
        
        thisTheme_facts = []
        
        for fb in select_dict(fact_builder, 'Theme', th['themeTitle']):
            
            if fb['Country_profiles'] != 'x':
                continue
                
            if fb['seriesCode'] == 'SYB068':
                continue
                
            this_fact = {}
            this_fact['seriesCode'] = fb['seriesCode']
            this_fact['seriesTitle'] = fb['FactName']
            this_fact['units'] = fb['Units']
            this_fact['hub'] = fb['hub']
            
            dim_values = {}
            
            for d in ['personnelTypeCode',
                      'activityCode',
                      'speciesCode',
                      'ODACode',
                      'urbanizationCode',
                      'ageCode',
                      'sexCode',
                      'educationLevCode']:
            
                if fb[d] != '':
                    dim_values[d] = fb[d]
            
            #display(dim_values)
            
            #-----read syb series data---
            
            data_file = r'../../data/unsd/UNSYB/output/Topic' + fb['topicId'].zfill(2) + '_Table' + fb['tableId'].zfill(3) + '_Series' + fb['seriesCode']+ '.json'
            with open(data_file) as json_file:  
                data = json.load(json_file)
                
            data = select_dict(data['refAreas'], 'refAreaCode', int(country_code))
            
            if len(data)>0:
                data = data[0]['data']
                if len(dim_values) > 0:
                    for k,v in dim_values.items():
                        data = select_dict(data, k, v)
                        
                #display(data)

                this_fact['years'] = []
                this_fact['values'] = []
                
                years_sorted = []
                values_sorted = []

                for i in data:
                    
                    if i['year'] >= 2000:
                        
                        decimal_pos = str(i['value']).find('.')

                        if decimal_pos == -1 and i['value'] is not None:
                            i['value'] = int(i['value'])

                        if decimal_pos > 0:
                            if len(str(i['value'])) - decimal_pos > 2:
                                i['value'] = round(i['value'],2)
                        
                        this_fact['years'].append(i['year'])
                        this_fact['values'].append(i['value'])
                        
                sort_order = sorted(range(len(this_fact['years'])), key = lambda k: this_fact['years'][k])
                
                for j in sort_order:
                    if this_fact['years'][j] < 2020:
                        years_sorted.append(this_fact['years'][j])
                        values_sorted.append(this_fact['values'][j])
                    
                
                 
                this_fact['years'] = years_sorted
                this_fact['values'] = values_sorted

                #display(this_fact)

                thisTheme_facts.append(this_fact)
                
        th['themeFacts'] = thisTheme_facts
        
    #display(country_syb_facts)       
    
    
    with open(wd_dir + r'/output/SYB_country_profile' + str(country_code).zfill(3) + ".json", 'w') as outfile:
        json.dump(country_syb_facts, outfile, indent=4 )    

        
        

   

Building SYB country facts for: 4-Afghanistan
Building SYB country facts for: 8-Albania
Building SYB country facts for: 10-Antarctica
Building SYB country facts for: 12-Algeria
Building SYB country facts for: 16-American Samoa
Building SYB country facts for: 20-Andorra
Building SYB country facts for: 24-Angola
Building SYB country facts for: 28-Antigua and Barbuda
Building SYB country facts for: 31-Azerbaijan
Building SYB country facts for: 32-Argentina
Building SYB country facts for: 36-Australia
Building SYB country facts for: 40-Austria
Building SYB country facts for: 44-Bahamas
Building SYB country facts for: 48-Bahrain
Building SYB country facts for: 50-Bangladesh
Building SYB country facts for: 51-Armenia
Building SYB country facts for: 52-Barbados
Building SYB country facts for: 56-Belgium
Building SYB country facts for: 60-Bermuda
Building SYB country facts for: 64-Bhutan
Building SYB country facts for: 68-Bolivia (Plurinational State of)
Building SYB country facts for: 70-Bosn