# Create SYB country fact builder

In [1]:
import csv
import json
import re
import statistics
import math
import os

### User parameters

In [22]:
dir_path = os.path.dirname(os.path.realpath('__file__'))
print(dir_path)

wd_dir = r'../../data/unsd/countryProfiles'
syb_dir = r'../../data/unsd/UNSYB/output'
print('data inputs dir: ' + wd_dir)



# https://volderette.de/jupyter-notebook-tip-multiple-outputs/
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


C:\Users\L.GonzalezMorales\Documents\GitHub\FIS4SDGs\notebooks\unsdCountryProfiles
data inputs dir: ../../data/unsd/countryProfiles


## Utilities

#### Commercial rounding

In [3]:
def round_KFM(x, n):
    posneg = math.copysign(1, x)
    z = abs(x)*10**n
    z = z + 0.5
    z = math.trunc(z)
    z = z/10**n
    result = z * posneg
    return result
    
print(round_KFM(3.5123,2))
print(round_KFM(4.0,1))

3.51
4.0


#### Regular expression to capture numeric values (including those in scientific notation)
The regex is

```
-?      # an optional -
\d+     # a series of digits
(?:     # start non capturing group
  \.    # a dot
  \d+   # a series of digits
)?      
(?:     # start non capturing group
  e     # "e"
  -?    # an optional -
  \d+   # digits
)?
```


In [4]:
def numeric_part(v):
    numeric_part_f = re.compile(r'-?\d+(?:\.\d+)?(?:e-?\d+)?')
    x = numeric_part_f.findall(v)
    if len(x) > 0:
        return float(x[0])
    else:
        return None


In [5]:
print(numeric_part('<4'))
print(numeric_part('abs'))
print(numeric_part('-23.2e2'))
print(numeric_part('-23.2e2 or more'))

print(numeric_part('0.01'))

4.0
None
-2320.0
-2320.0
0.01


#### Compute a hash of a dictionary

In [6]:
def dict_hash(d):
    out = hashlib.md5()
    for key, value in d.items():
        out.update(key.encode('utf-8'))
        out.update(value.encode('utf-8'))
    return out.hexdigest()


#### Get unique dictionaries in a list

In [7]:
def unique_dicts(dictionary_list):

    uniques_map = {}

    for d in dictionary_list:
        uniques_map[dict_hash(d)] = d

    return list(uniques_map.values())


#### Extract subset of key-value pairs from Python dictionary object

In [8]:
def subdict_list(dict_list, keys_list, exclude = False):
    sub_d_list = []
    if exclude:
        for d in dict_list:
            sub_d= {k: d[k] for k in d.keys() if k not in keys_list}
            sub_d_list.append(sub_d)
    else:
        for d in dict_list:
            sub_d= {k: d[k] for k in keys_list}
            sub_d_list.append(sub_d)
    
    return sub_d_list




#### Get a dict from a list based on something inside the dict

In [9]:
def select_dict(dict_list, k, v):
    selected = []
    for d in dict_list:
        if d[k] == v:
            selected.append(d)
    return selected

### Create list of countries

In [10]:
countryArray = []
with open(r'..\..\globalResources\refAreas.txt', newline = '') as countryListXY:                                                                                          
    countryListXY = csv.DictReader(countryListXY, delimiter='\t')
    for row in countryListXY:
        countryArray.append(dict(row))
        
print(countryArray[1])

    
print("Total number or countries: ", len(countryArray))

{'X': '20.06660928', 'Y': '41.13897007', 'ISO3': 'ALB', 'M49': '8', 'areaName': 'Albania', 'UN_Member': '1', 'Country_Profile': '1'}
Total number or countries:  248


In [18]:
syb_dir = r'..\..\data\unsd\UNSYB\output'

with open(r'..\..\data\unsd\UNSYB\output\SYB_Series_Catalog.json') as json_file:  
    syb_catalogue = json.load(json_file)
    
syb_catalogue[0].keys()

{'topicId': 2,
 'topicNameEN': 'Population and migration',
 'topicNameFR': 'Population et migration',
 'tables': [{'tableCode': 'lifecbmort',
   'tableId': 8,
   'tableName': 'Population growth and indicators of fertility and mortality',
   'tableNote': None,
   'tableStatusId': 3,
   'tbBkCode': 'X03 ',
   'tbBkPrint': 'Y',
   'series': [{'newSeriesName': 'Infant mortality ratio',
     'units': 'Number of deaths per 1,000 live births',
     'multiplier': 0,
     'baseYear': None,
     'SELECT': None,
     'seriesCode': 'SYB011',
     'SYB_series': [253]},
    {'newSeriesName': 'Life expectancy at birth',
     'units': 'Number of years',
     'multiplier': 0,
     'baseYear': None,
     'SELECT': None,
     'seriesCode': 'SYB012',
     'SYB_series': [36, 268, 269]},
    {'newSeriesName': 'Maternal mortality ratio',
     'units': 'Number of deaths per 1,000 population',
     'multiplier': 0,
     'baseYear': None,
     'SELECT': None,
     'seriesCode': 'SYB013',
     'SYB_series': [115