### Load required libraries

In [1]:
import json
import requests 
import os
from bs4 import BeautifulSoup
import re
import pandas as pd

### Create folder for data

In [181]:
os.mkdir('..\\data\\ILO_Metadata')

### Interactive shell

In [2]:
# https://volderette.de/jupyter-notebook-tip-multiple-outputs/
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### get indicator urls

In [3]:
#read info from email
idctr = pd.read_csv('..\\data\\ILO_indicators\\email.csv')
idctr

Unnamed: 0,indicator,disaggregation,code
0,03a Labour force participation rate for person...,male,collection=YI\n\nindicator=EAP_DWAP_SEX_AGE_RT...
1,03a Labour force participation rate for person...,female,collection=YI\n\nindicator=EAP_DWAP_SEX_AGE_RT...
2,03a Labour force participation rate for person...,bothsex,collection=YI\n\nindicator=EAP_DWAP_SEX_AGE_RT...
3,03b Labour force participation rate for person...,male,collection=YI\n\nindicator=EAP_DWAP_SEX_AGE_RT...
4,03b Labour force participation rate for person...,female,collection=YI\n\nindicator=EAP_DWAP_SEX_AGE_RT...
5,03b Labour force participation rate for person...,bothsex,collection=YI\n\nindicator=EAP_DWAP_SEX_AGE_RT...
6,04 Proportion of employed who are own-account ...,male,collection=YI\n\nindicator=EMP_TEMP_SEX_STE_DT...
7,04 Proportion of employed who are own-account ...,female,collection=YI\n\nindicator=EMP_TEMP_SEX_STE_DT...
8,04 Proportion of employed who are own-account ...,bothsex,collection=YI\n\nindicator=EMP_TEMP_SEX_STE_DT...
9,05 Proportion of employed who are contributing...,male,collection=YI\n\nindicator=EMP_TEMP_SEX_STE_DT...


### get data structures

In [44]:
# together we have 4 data structures for 27 indicators.
# DS1: EAP_DWAP_SEX_AGE_RT for indicator(03a, 03b)
url_list = set()
base_url = 'https://www.ilo.org/sdmx/rest/datastructure/ILO/YI_ALL_'
for c in idctr.code:   # first 24 indicators have 8 dimensions according to dsd
    params = [i.split('=')[1].strip() for i in c.split('\n\n')]
    spec_url = params[1]
    url_list.add(base_url + spec_url)

url_list



{'https://www.ilo.org/sdmx/rest/datastructure/ILO/YI_ALL_EAP_DWAP_SEX_AGE_RT',
 'https://www.ilo.org/sdmx/rest/datastructure/ILO/YI_ALL_EMP_PTER_SEX_RT',
 'https://www.ilo.org/sdmx/rest/datastructure/ILO/YI_ALL_EMP_TEMP_SEX_ECO_DT',
 'https://www.ilo.org/sdmx/rest/datastructure/ILO/YI_ALL_EMP_TEMP_SEX_STE_DT'}

### find all related code lists

In [149]:
# find union of codes among 4 data structures
metadata = {}
headers = {'Accept' : 'application/vnd.sdmx.structure+xml;version=2.1'}

for url in url_list:
    resp = requests.get(url, headers = headers) 
    doc = resp.text
    soup = BeautifulSoup(doc)
    for i in soup.find_all('structure:dimension'):
        try:
            CL = i.find('structure:localrepresentation').find('structure:enumeration').ref['id']
        except:
            CL = None
        metadata[i['id']] = {'Code_list' : CL}
    for j in soup.find_all('structure:attribute'):
        try:
            CL = j.find('structure:localrepresentation').find('structure:enumeration').ref['id']
        except:
            CL = None
        metadata[j['id']] = {'Code_list': CL}
        
metadata

{'COLLECTION': {'Code_list': 'CL_COLLECTION'},
 'REF_AREA': {'Code_list': 'CL_AREA'},
 'FREQ': {'Code_list': 'CL_FREQ'},
 'SURVEY': {'Code_list': 'CL_SURVEY'},
 'MEASURE': {'Code_list': 'CL_MEASURE'},
 'SEX': {'Code_list': 'CL_SEX'},
 'ECO': {'Code_list': 'CL_ECO'},
 'S3': {'Code_list': 'CL_NOTE_S3'},
 'T3': {'Code_list': 'CL_NOTE_T3'},
 'S11': {'Code_list': 'CL_NOTE_S11'},
 'T12': {'Code_list': 'CL_NOTE_T12'},
 'I13': {'Code_list': 'CL_NOTE_I13'},
 'R1': {'Code_list': 'CL_NOTE_R1'},
 'S4': {'Code_list': 'CL_NOTE_S4'},
 'T5': {'Code_list': 'CL_NOTE_T5'},
 'S14': {'Code_list': 'CL_NOTE_S14'},
 'S7': {'Code_list': 'CL_NOTE_S7'},
 'S9': {'Code_list': 'CL_NOTE_S9'},
 'I11': {'Code_list': 'CL_NOTE_I11'},
 'C5': {'Code_list': 'CL_NOTE_C5'},
 'T2': {'Code_list': 'CL_NOTE_T2'},
 'S5': {'Code_list': 'CL_NOTE_S5'},
 'OBS_STATUS': {'Code_list': 'CL_OBS_STATUS'},
 'UNIT_MEASURE_TYPE': {'Code_list': 'CL_UNIT_MEASURE_TYPE'},
 'UNIT_MEASURE': {'Code_list': 'CL_UNIT_MEASURE'},
 'UNIT_MULT': {'Code_lis

### parse each code list to get the full dictionary

In [179]:
#
for code in metadata:
    if metadata[code]['Code_list'] is not None:
        url = 'https://www.ilo.org/sdmx/rest/codelist/ILO/' + metadata[code]['Code_list']
        soup = BeautifulSoup(requests.get(url).text)
        desc_list = [i.string for i in soup.find('structure:codelist').find_all('common:name', attrs = {'xml:lang' : 'en'})]
        code_list = [i['id'] for i in soup.find('structure:codelist').find_all('structure:code')]
        metadata[code]['Description'] = desc_list[0]
        metadata[code]['Values'] = dict(zip(code_list,desc_list[1:]))
metadata

{'COLLECTION': {'Code_list': 'CL_COLLECTION',
  'Description': 'Collection',
  'Values': {'ILOEST': 'ILO estimates',
   'SDG': 'SDG labour market indicators',
   'YI': 'Yearly indicators',
   'STI': 'Short term indicators',
   'IR': 'Industrial relations',
   'ILMS': 'Labour Migration Statistics',
   'KI': 'Key Indicators',
   'CP': 'Country Profiles',
   'SSI': 'Social Security Indicators',
   'YTH': 'youthSTATS'}},
 'REF_AREA': {'Code_list': 'CL_AREA',
  'Description': 'Reference Area',
  'Values': {'DNK': 'Denmark',
   'EGY': 'Egypt',
   'ERI': 'Eritrea',
   'EST': 'Estonia',
   'ETH': 'Ethiopia',
   'FIN': 'Finland',
   'FJI': 'Fiji',
   'FLK': 'Falkland Islands (Malvinas)',
   'FRO': 'Faeroe Islands',
   'GBR': 'United Kingdom',
   'GEO': 'Georgia',
   'GGY': 'Guernsey',
   'GHA': 'Ghana',
   'GIB': 'Gibraltar',
   'GMB': 'Gambia',
   'GRC': 'Greece',
   'GRD': 'Grenada',
   'GRL': 'Greenland',
   'GUM': 'Guam',
   'GUY': 'Guyana',
   'HKG': 'Hong Kong, China',
   'HRV': 'Croatia'

### Save to json file

In [182]:

file = '..\\data\\ILO_Metadata\\ILO_Metadata.json'
with open(file, 'w') as json_file:
    json.dump(metadata, json_file)

### usage example -- translating one indicator

In [202]:
ind_num = 1 #select one indicator


data_file = '..\\data\\ILO_indicators\\' + idctr.indicator[ind_num] + '_' + idctr.disaggregation[ind_num] + '.csv'
data = pd.read_csv(data_file).astype(str)
data

meta_file = '..\\data\\ILO_Metadata\\ILO_Metadata.json'
with open(meta_file, 'r') as f:
    meta = json.load(f)

    
for col in data.columns:
    if col in ('TIME_PERIOD', 'OBS_VALUE', 'DATAFLOW'):  # these 2 columns are self-explanatory
        continue
    try:
        col_elem_dict =  meta[col]['Values']
        data[col].replace(col_elem_dict, inplace = True)
    except:
        continue

col_dict = {k:meta[k]['Description'] for k in meta if meta[k].get('Description') is not None}

data.rename(columns = col_dict, inplace = True)
data # seems quite readable

Unnamed: 0,DATAFLOW,COLLECTION,REF_AREA,FREQ,SURVEY,MEASURE,SEX,AGE,TIME_PERIOD,OBS_VALUE,...,S5,S6,OBS_STATUS,UNIT_MEASURE_TYPE,UNIT_MEASURE,UNIT_MULT,FREE_TEXT_NOTE,DECIMALS,SOURCE,INDICATOR
0,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),YI,NOR,A,1505,EAP_DWAP_RT,SEX_F,AGE_YTHADULT_Y15-24,1946,46.87,...,,,,RT,PT,0,,1,AA,EAP_DWAP_SEX_AGE_RT
1,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),YI,NOR,A,1505,EAP_DWAP_RT,SEX_F,AGE_YTHADULT_Y15-24,1950,49.82,...,,,,RT,PT,0,,1,AA,EAP_DWAP_SEX_AGE_RT
2,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),YI,NOR,A,1505,EAP_DWAP_RT,SEX_F,AGE_YTHADULT_Y15-24,1960,44.76,...,,,,RT,PT,0,,1,AA,EAP_DWAP_SEX_AGE_RT
3,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),YI,NOR,A,1505,EAP_DWAP_RT,SEX_F,AGE_YTHADULT_Y15-24,1970,40.28,...,,,,RT,PT,0,,1,AA,EAP_DWAP_SEX_AGE_RT
4,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),YI,NOR,A,1505,EAP_DWAP_RT,SEX_F,AGE_YTHADULT_Y15-24,2001,64.87,...,,,,RT,PT,0,,1,AA,EAP_DWAP_SEX_AGE_RT
5,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),YI,LUX,A,1370,EAP_DWAP_RT,SEX_F,AGE_YTHADULT_Y15-24,1947,47.13,...,,,,RT,PT,0,,1,AA,EAP_DWAP_SEX_AGE_RT
6,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),YI,LUX,A,1370,EAP_DWAP_RT,SEX_F,AGE_YTHADULT_Y15-24,1960,52.44,...,,,,RT,PT,0,,1,AA,EAP_DWAP_SEX_AGE_RT
7,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),YI,LUX,A,1370,EAP_DWAP_RT,SEX_F,AGE_YTHADULT_Y15-24,1966,53.22,...,,,,RT,PT,0,,1,AA,EAP_DWAP_SEX_AGE_RT
8,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),YI,LUX,A,1370,EAP_DWAP_RT,SEX_F,AGE_YTHADULT_Y15-24,1970,54.27,...,,,,RT,PT,0,,1,AA,EAP_DWAP_SEX_AGE_RT
9,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),YI,LUX,A,1370,EAP_DWAP_RT,SEX_F,AGE_YTHADULT_Y15-24,1981,57.67,...,,,,RT,PT,0,,1,AA,EAP_DWAP_SEX_AGE_RT


Unnamed: 0,DATAFLOW,Collection,Reference Area,FREQ,Survey,Measure,Classification: SEX,Classification: AGE,TIME_PERIOD,OBS_VALUE,...,Population coverage,Establishment size coverage,OBS_STATUS,UNIT_MEASURE_TYPE,UNIT_MEASURE,UNIT_MULT,FREE_TEXT_NOTE,DECIMALS,Source,Indicator
0,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),Yearly indicators,Norway,Annual,Population Census,Labour force participation rate,Female,15-24,1946,46.87,...,,,,Rate,Percentage,Units,,1,Population census,Labour force participation rate by sex and age
1,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),Yearly indicators,Norway,Annual,Population Census,Labour force participation rate,Female,15-24,1950,49.82,...,,,,Rate,Percentage,Units,,1,Population census,Labour force participation rate by sex and age
2,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),Yearly indicators,Norway,Annual,Population Census,Labour force participation rate,Female,15-24,1960,44.76,...,,,,Rate,Percentage,Units,,1,Population census,Labour force participation rate by sex and age
3,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),Yearly indicators,Norway,Annual,Population Census,Labour force participation rate,Female,15-24,1970,40.28,...,,,,Rate,Percentage,Units,,1,Population census,Labour force participation rate by sex and age
4,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),Yearly indicators,Norway,Annual,Population Census,Labour force participation rate,Female,15-24,2001,64.87,...,,,,Rate,Percentage,Units,,1,Population census,Labour force participation rate by sex and age
5,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),Yearly indicators,Luxembourg,Annual,Population Census,Labour force participation rate,Female,15-24,1947,47.13,...,,,,Rate,Percentage,Units,,1,Population census,Labour force participation rate by sex and age
6,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),Yearly indicators,Luxembourg,Annual,Population Census,Labour force participation rate,Female,15-24,1960,52.44,...,,,,Rate,Percentage,Units,,1,Population census,Labour force participation rate by sex and age
7,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),Yearly indicators,Luxembourg,Annual,Population Census,Labour force participation rate,Female,15-24,1966,53.22,...,,,,Rate,Percentage,Units,,1,Population census,Labour force participation rate by sex and age
8,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),Yearly indicators,Luxembourg,Annual,Population Census,Labour force participation rate,Female,15-24,1970,54.27,...,,,,Rate,Percentage,Units,,1,Population census,Labour force participation rate by sex and age
9,ILO:DF_YI_ALL_EAP_DWAP_SEX_AGE_RT(1.0),Yearly indicators,Luxembourg,Annual,Population Census,Labour force participation rate,Female,15-24,1981,57.67,...,,,,Rate,Percentage,Units,,1,Population census,Labour force participation rate by sex and age
