# Create SYB country fact builder

In [1]:
import csv
import json
import re
import statistics
import math
import os

### User parameters

In [2]:
dir_path = os.path.dirname(os.path.realpath('__file__'))
print(dir_path)

wd_dir = r'../../data/unsd/countryProfiles'
syb_dir = r'../../data/unsd/UNSYB/output'
print('data inputs dir: ' + wd_dir)



# https://volderette.de/jupyter-notebook-tip-multiple-outputs/
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


C:\Users\L.GonzalezMorales\Documents\GitHub\FIS4SDGs\notebooks\unsdCountryProfiles
data inputs dir: ../../data/unsd/countryProfiles


## Utilities

#### Commercial rounding

In [3]:
def round_KFM(x, n):
    posneg = math.copysign(1, x)
    z = abs(x)*10**n
    z = z + 0.5
    z = math.trunc(z)
    z = z/10**n
    result = z * posneg
    return result
    
print(round_KFM(3.5123,2))
print(round_KFM(4.0,1))

3.51
4.0


#### Regular expression to capture numeric values (including those in scientific notation)
The regex is

```
-?      # an optional -
\d+     # a series of digits
(?:     # start non capturing group
  \.    # a dot
  \d+   # a series of digits
)?      
(?:     # start non capturing group
  e     # "e"
  -?    # an optional -
  \d+   # digits
)?
```


In [4]:
def numeric_part(v):
    numeric_part_f = re.compile(r'-?\d+(?:\.\d+)?(?:e-?\d+)?')
    x = numeric_part_f.findall(v)
    if len(x) > 0:
        return float(x[0])
    else:
        return None


In [5]:
print(numeric_part('<4'))
print(numeric_part('abs'))
print(numeric_part('-23.2e2'))
print(numeric_part('-23.2e2 or more'))

print(numeric_part('0.01'))

4.0
None
-2320.0
-2320.0
0.01


#### Compute a hash of a dictionary

In [6]:
def dict_hash(d):
    out = hashlib.md5()
    for key, value in d.items():
        out.update(key.encode('utf-8'))
        out.update(value.encode('utf-8'))
    return out.hexdigest()


#### Get unique dictionaries in a list

In [7]:
def unique_dicts(dictionary_list):

    uniques_map = {}

    for d in dictionary_list:
        uniques_map[dict_hash(d)] = d

    return list(uniques_map.values())


#### Extract subset of key-value pairs from Python dictionary object

In [8]:
def subdict_list(dict_list, keys_list, exclude = False):
    sub_d_list = []
    if exclude:
        for d in dict_list:
            sub_d= {k: d[k] for k in d.keys() if k not in keys_list}
            sub_d_list.append(sub_d)
    else:
        for d in dict_list:
            sub_d= {k: d[k] for k in keys_list}
            sub_d_list.append(sub_d)
    
    return sub_d_list




#### Get a dict from a list based on something inside the dict

In [9]:
def select_dict(dict_list, k, v):
    selected = []
    for d in dict_list:
        if d[k] == v:
            selected.append(d)
    return selected

### Get SYB Series catalogue

In [12]:
syb_dir = r'..\..\data\unsd\UNSYB\output'

with open(r'..\..\data\unsd\UNSYB\output\SYB_Series_Catalog.json') as json_file:  
    syb_catalogue = json.load(json_file)
    
#syb_catalogue

In [13]:
for t1 in syb_catalogue:
    #print("- in topic " + str(t1['topicId']) + ': ' + t1['topicNameEN'])

    for t2 in t1['tables']:

        for s in t2['series']:

            ##--- Read data for this series---#

            filename = 'wide_Topic' + str(t1['topicId']).zfill(2) + '_Table'+ str(t2['tableId']).zfill(3) + '_Series' + s['seriesCode'] + '.json'
            with open(r'../../data/unsd/UNSYB/output/' + filename) as json_file:  
                data = json.load(json_file)

            slices = []

            slice = {}
            slice['topicId'] = t1['topicId']
            slice['topicName'] = t1['topicNameEN']
            slice['tableId'] = t2['tableId']
            slice['tableName'] = t2['tableName']
            slice['seriesCode'] = s['seriesCode']
            slice['seriesDesc'] = s['newSeriesName']
            
            slice
            print('---------------')
            data['data']
            
                
            



{'topicId': 2,
 'topicName': 'Population and migration',
 'tableId': 8,
 'tableName': 'Population growth and indicators of fertility and mortality',
 'seriesCode': 'SYB011',
 'seriesDesc': 'Infant mortality ratio'}

---------------


[{'parentRegionId': '34',
  'parentRegion_DescEN': 'Southern Asia',
