# Integrate datasets

In [1]:
import pandas as pd
import os
from IPython.display import clear_output
import requests
import json
from datetime import datetime

## Load prepared SDG dataset¶

In [2]:
# Load the Excel file
xls = pd.ExcelFile('../inputs/sdg_dataset.xlsx')

# Get the names of all sheets in the Excel file
sheet_names = xls.sheet_names

# Read each sheet into a separate DataFrame and store them in a dictionary
dfs_sdg = {}
for sheet in sheet_names:
    dfs_sdg[sheet.lower()] = pd.read_excel(xls, sheet_name=sheet).fillna('')


In [3]:
dfs_sdg.keys()

dict_keys(['data', 'schema', 'cl_geo'])

In [4]:
# Load the Excel file
xls = pd.ExcelFile('../inputs/ilo_dataset.xlsx')

# Get the names of all sheets in the Excel file
sheet_names = xls.sheet_names

# Read each sheet into a separate DataFrame and store them in a dictionary
dfs_ilo = {}
for sheet in sheet_names:
    dfs_ilo[sheet.lower()] = pd.read_excel(xls, sheet_name=sheet).fillna('')


In [5]:
dfs_ilo.keys()

dict_keys(['data', 'schema', 'cl_geo'])

In [6]:

# Load the Excel file
xls = pd.ExcelFile('../inputs/Example_mappings.xlsx')

# Get the names of all sheets in the Excel file
sheet_names = xls.sheet_names

# Read each sheet into a separate DataFrame and store them in a dictionary
dfs_mappings = {}
for sheet in sheet_names:
    dfs_mappings[sheet.lower()] = pd.read_excel(xls, sheet_name=sheet).fillna('')


In [7]:
dfs_mappings.keys()

dict_keys(['concept_mapping__undata_ilo', 'concept_mapping__undata_sdg', 'enum_mapping_geo__undata_sdg', 'enum_mapping_geo__undata_ilo'])

## Integrate datasets into a single table

In [8]:
dfs_sdg['schema'].head(3)

Unnamed: 0,ConceptID,ConceptName,Coded,Role
0,sdg:SERIES_CODE,Series Code,True,Dimension
1,sdg:SERIES_DESCRIPTION,Series Description,False,Attribute
2,sdg:VARIABLE_CODE,Variable Code,True,Attribute


In [9]:
dfs_ilo['schema'].head(3)

Unnamed: 0,ConceptID,ConceptName,Coded,Role
0,ilo:DATAFLOW,Dataflow,False,Attribute
1,ilo:REF_AREA,Ref Area,True,Dimension
2,ilo:FREQ,Freq,True,Dimension


In [10]:
dfs_mappings['concept_mapping__undata_ilo'].head(10)

Unnamed: 0,subject_id,subject_label,predicate_id,object_id,object_label,mapping_justification,mapping_date,author_id,subject_source,subject_source_version,object_source,object_source_version,confidence
0,undata:ACTIVE_DIMS,Active dimensions,,,,,,,,,,,
1,undata:AGE,Age group,,,,,,,,,,,
2,undata:BASE_PERIOD,Base period,,,,,,,,,,,
3,undata:DATAFLOW,Dataflow,skos:exactMatch,ilo:DATAFLOW,Dataflow,,,,,,,,
4,undata:DECIMALS,Decimals,skos:exactMatch,ilo:DECIMALS,Decimals,,,,,,,,
5,undata:FOOTNOTE,Footnote,skos:exactMatch,ilo:NOTE_CLASSIF,Note Classif,,,,,,,,
6,undata:FOOTNOTE,Footnote,skos:exactMatch,ilo:NOTE_INDICATOR,Note Indicator,,,,,,,,
7,undata:FOOTNOTE,Footnote,skos:exactMatch,ilo:NOTE_SOURCE,Note Source,,,,,,,,
8,undata:FREQ,Frequency,skos:exactMatch,ilo:FREQ,Freq,,,,,,,,
9,undata:GEOGRAPHY,Geography,skos:exactMatch,ilo:REF_AREA,Ref Area,,,,,,,,


## 1a. Change column names

In [11]:
list(dfs_sdg['data'])

['sdg:SERIES_CODE',
 'sdg:SERIES_DESCRIPTION',
 'sdg:VARIABLE_CODE',
 'sdg:VARIABLE_DESCRIPTION',
 'sdg:VARIABLE_ACTIVE_DIMS',
 'sdg:GEOGRAPHY_CODE',
 'sdg:GEOGRAPHY_NAME',
 'sdg:GEOGRAPHY_TYPE',
 'sdg:GEO_AREA_CODE',
 'sdg:GEO_AREA_NAME',
 'sdg:CITIES',
 'sdg:SAMPLING_STATIONS',
 'sdg:IS_LATEST_PERIOD',
 'sdg:TIME_PERIOD',
 'sdg:TIME_DETAIL',
 'sdg:TIME_COVERAGE',
 'sdg:FREQ',
 'sdg:AGE',
 'sdg:SEX',
 'sdg:OBS_VALUE',
 'sdg:VALUE_TYPE',
 'sdg:UPPER_BOUND',
 'sdg:LOWER_BOUND',
 'sdg:UNIT_MEASURE',
 'sdg:UNIT_MULT',
 'sdg:BASE_PERIOD',
 'sdg:NATURE',
 'sdg:SOURCE',
 'sdg:GEO_INFO_URL',
 'sdg:FOOT_NOTE',
 'sdg:REPORTING_TYPE',
 'sdg:OBS_STATUS',
 'sdg:RELEASE_STATUS',
 'sdg:RELEASE_NAME']

In [12]:
# Create a dictionary from 'old' to 'new' columns
m = dfs_mappings['concept_mapping__undata_sdg'].loc[(dfs_mappings['concept_mapping__undata_sdg']['subject_id'] != '') & (dfs_mappings['concept_mapping__undata_sdg']['object_id'] != '')]
rename_dict = dict(zip(m['object_id'], m['subject_id']))
# Rename columns in DataFrame X
dfs_sdg['data'].rename(columns=rename_dict, inplace=True)
dfs_sdg['data'].head(6)
list(dfs_sdg['data'])

['undata:MEASURE',
 'sdg:SERIES_DESCRIPTION',
 'undata:VARIABLE',
 'sdg:VARIABLE_DESCRIPTION',
 'undata:ACTIVE_DIMS',
 'undata:GEOGRAPHY',
 'sdg:GEOGRAPHY_NAME',
 'undata:GEOGRAPHY_TYPE',
 'sdg:GEO_AREA_CODE',
 'sdg:GEO_AREA_NAME',
 'sdg:CITIES',
 'sdg:SAMPLING_STATIONS',
 'undata:IS_LATEST_PERIOD',
 'undata:TIME_PERIOD',
 'undata:TIME_DETAIL',
 'undata:TIME_COVERAGE',
 'undata:FREQ',
 'undata:AGE',
 'undata:SEX',
 'undata:OBS_VALUE',
 'undata:VALUE_TYPE',
 'undata:UPPER_BOUND',
 'undata:LOWER_BOUND',
 'undata:UNIT_MEASURE',
 'undata:UNIT_MULT',
 'undata:BASE_PERIOD',
 'undata:NATURE',
 'undata:SOURCE',
 'sdg:GEO_INFO_URL',
 'undata:FOOTNOTE',
 'undata:REPORTING_TYPE',
 'undata:OBS_STATUS',
 'undata:RELEASE_STATUS',
 'undata:RELEASE']

In [13]:
rename_dict

{'sdg:VARIABLE_ACTIVE_DIMS': 'undata:ACTIVE_DIMS',
 'sdg:AGE': 'undata:AGE',
 'sdg:BASE_PERIOD': 'undata:BASE_PERIOD',
 'sdg:FOOT_NOTE': 'undata:FOOTNOTE',
 'sdg:FREQ': 'undata:FREQ',
 'sdg:GEOGRAPHY_CODE': 'undata:GEOGRAPHY',
 'sdg:GEOGRAPHY_TYPE': 'undata:GEOGRAPHY_TYPE',
 'sdg:IS_LATEST_PERIOD': 'undata:IS_LATEST_PERIOD',
 'sdg:LOWER_BOUND': 'undata:LOWER_BOUND',
 'sdg:SERIES_CODE': 'undata:MEASURE',
 'sdg:NATURE': 'undata:NATURE',
 'sdg:OBS_STATUS': 'undata:OBS_STATUS',
 'sdg:OBS_VALUE': 'undata:OBS_VALUE',
 'sdg:RELEASE_NAME': 'undata:RELEASE',
 'sdg:RELEASE_STATUS': 'undata:RELEASE_STATUS',
 'sdg:REPORTING_TYPE': 'undata:REPORTING_TYPE',
 'sdg:SEX': 'undata:SEX',
 'sdg:SOURCE': 'undata:SOURCE',
 'sdg:TIME_COVERAGE': 'undata:TIME_COVERAGE',
 'sdg:TIME_DETAIL': 'undata:TIME_DETAIL',
 'sdg:TIME_PERIOD': 'undata:TIME_PERIOD',
 'sdg:UNIT_MEASURE': 'undata:UNIT_MEASURE',
 'sdg:UNIT_MULT': 'undata:UNIT_MULT',
 'sdg:UPPER_BOUND': 'undata:UPPER_BOUND',
 'sdg:VALUE_TYPE': 'undata:VALUE_TYP

## 2a. Transcode values for geography concept

In [14]:
dfs_mappings['enum_mapping_geo__undata_sdg'].head(4)

Unnamed: 0,subject_id,subject_label,predicate_id,object_id,object_label,mapping_justification,mapping_date,author_id,subject_source,subject_source_version,object_source,object_source_version,confidence
0,undata:GEOGRAPHY/G00000010,Abu Dhabi,,,,,NaT,,,,,,
1,undata:GEOGRAPHY/G00000020,Afghanistan,skos:exactMatch,sdg:GEOGRAPHY_CODE/4,Afghanistan,semapv:LexicalSimilarityThresholdMatching,NaT,,,,,,1.0
2,undata:GEOGRAPHY/G00000030,Ajman,,,,,NaT,,,,,,
3,undata:GEOGRAPHY/G00000040,Åland Islands,skos:exactMatch,sdg:GEOGRAPHY_CODE/248,Åland Islands,semapv:LexicalSimilarityThresholdMatching,NaT,,,,,,1.0


In [15]:
# Create a dictionary from 'old' to 'new' columns
transcode_dict = dict(zip(dfs_mappings['enum_mapping_geo__undata_sdg']['object_id'], dfs_mappings['enum_mapping_geo__undata_sdg']['subject_id']))
dfs_sdg['data']['undata:GEOGRAPHY'] = dfs_sdg['data']['undata:GEOGRAPHY'].replace(transcode_dict)


In [16]:
transcode_dict

{'': 'undata:GEOGRAPHY/G00900070',
 'sdg:GEOGRAPHY_CODE/4': 'undata:GEOGRAPHY/G00000020',
 'sdg:GEOGRAPHY_CODE/248': 'undata:GEOGRAPHY/G00000040',
 'sdg:GEOGRAPHY_CODE/8': 'undata:GEOGRAPHY/G00000050',
 'sdg:GEOGRAPHY_CODE/12': 'undata:GEOGRAPHY/G00000060',
 'sdg:GEOGRAPHY_CODE/16': 'undata:GEOGRAPHY/G00000070',
 'sdg:GEOGRAPHY_CODE/20': 'undata:GEOGRAPHY/G00000080',
 'sdg:GEOGRAPHY_CODE/24': 'undata:GEOGRAPHY/G00000090',
 'sdg:GEOGRAPHY_CODE/660': 'undata:GEOGRAPHY/G00000100',
 'sdg:GEOGRAPHY_CODE/10': 'undata:GEOGRAPHY/G00000110',
 'sdg:GEOGRAPHY_CODE/28': 'undata:GEOGRAPHY/G00000120',
 'sdg:GEOGRAPHY_CODE/32': 'undata:GEOGRAPHY/G00000130',
 'sdg:GEOGRAPHY_CODE/51': 'undata:GEOGRAPHY/G00000140',
 'sdg:GEOGRAPHY_CODE/533': 'undata:GEOGRAPHY/G00000150',
 'sdg:GEOGRAPHY_CODE/36': 'undata:GEOGRAPHY/G00000170',
 'sdg:GEOGRAPHY_CODE/40': 'undata:GEOGRAPHY/G00000180',
 'sdg:GEOGRAPHY_CODE/31': 'undata:GEOGRAPHY/G00000190',
 'sdg:GEOGRAPHY_CODE/920': 'undata:GEOGRAPHY/G00000200',
 'sdg:GEOGR

In [17]:
dfs_sdg['data']

Unnamed: 0,undata:MEASURE,sdg:SERIES_DESCRIPTION,undata:VARIABLE,sdg:VARIABLE_DESCRIPTION,undata:ACTIVE_DIMS,undata:GEOGRAPHY,sdg:GEOGRAPHY_NAME,undata:GEOGRAPHY_TYPE,sdg:GEO_AREA_CODE,sdg:GEO_AREA_NAME,...,undata:UNIT_MULT,undata:BASE_PERIOD,undata:NATURE,undata:SOURCE,sdg:GEO_INFO_URL,undata:FOOTNOTE,undata:REPORTING_TYPE,undata:OBS_STATUS,undata:RELEASE_STATUS,undata:RELEASE
0,sdg:SERIES_CODE/SI_POV_EMP1,Employed population below international povert...,sdg:VARIABLE_CODE/SI_POV_EMP1@AGE--Y15T24,Employed population below international povert...,['AGE'],undata:GEOGRAPHY/G00100000,World,Region,sdg:GEO_AREA_CODE/1,World,...,,,sdg:NATURE/N,"Source = ILO modelled estimates, Nov. 2022",,,sdg:REPORTING_TYPE/G,sdg:OBS_STATUS/E,Published,2023.Q2.G.01
1,sdg:SERIES_CODE/SI_POV_EMP1,Employed population below international povert...,sdg:VARIABLE_CODE/SI_POV_EMP1@AGE--Y15T24,Employed population below international povert...,['AGE'],undata:GEOGRAPHY/G00100000,World,Region,sdg:GEO_AREA_CODE/1,World,...,,,sdg:NATURE/N,"Source = ILO modelled estimates, Nov. 2022",,,sdg:REPORTING_TYPE/G,sdg:OBS_STATUS/E,Published,2023.Q2.G.01
2,sdg:SERIES_CODE/SI_POV_EMP1,Employed population below international povert...,sdg:VARIABLE_CODE/SI_POV_EMP1@AGE--Y15T24,Employed population below international povert...,['AGE'],undata:GEOGRAPHY/G00100000,World,Region,sdg:GEO_AREA_CODE/1,World,...,,,sdg:NATURE/N,"Source = ILO modelled estimates, Nov. 2022",,,sdg:REPORTING_TYPE/G,sdg:OBS_STATUS/E,Published,2023.Q2.G.01
3,sdg:SERIES_CODE/SI_POV_EMP1,Employed population below international povert...,sdg:VARIABLE_CODE/SI_POV_EMP1@AGE--Y15T24,Employed population below international povert...,['AGE'],undata:GEOGRAPHY/G00100000,World,Region,sdg:GEO_AREA_CODE/1,World,...,,,sdg:NATURE/N,"Source = ILO modelled estimates, Nov. 2022",,,sdg:REPORTING_TYPE/G,sdg:OBS_STATUS/E,Published,2023.Q2.G.01
4,sdg:SERIES_CODE/SI_POV_EMP1,Employed population below international povert...,sdg:VARIABLE_CODE/SI_POV_EMP1@AGE--Y15T24,Employed population below international povert...,['AGE'],undata:GEOGRAPHY/G00100000,World,Region,sdg:GEO_AREA_CODE/1,World,...,,,sdg:NATURE/N,"Source = ILO modelled estimates, Nov. 2022",,,sdg:REPORTING_TYPE/G,sdg:OBS_STATUS/E,Published,2023.Q2.G.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29275,sdg:SERIES_CODE/SI_POV_EMP1,Employed population below international povert...,sdg:VARIABLE_CODE/SI_POV_EMP1@AGE--Y_GE25__SEX--M,Employed population below international povert...,"['AGE', 'SEX']",undata:GEOGRAPHY/G00003490,Zambia,Country,sdg:GEO_AREA_CODE/894,Zambia,...,,,sdg:NATURE/M,"Source = ILO modelled estimates, Nov. 2022",,,sdg:REPORTING_TYPE/G,sdg:OBS_STATUS/E,Published,2023.Q2.G.01
29276,sdg:SERIES_CODE/SI_POV_EMP1,Employed population below international povert...,sdg:VARIABLE_CODE/SI_POV_EMP1@AGE--Y_GE25__SEX--M,Employed population below international povert...,"['AGE', 'SEX']",undata:GEOGRAPHY/G00003490,Zambia,Country,sdg:GEO_AREA_CODE/894,Zambia,...,,,sdg:NATURE/M,"Source = ILO modelled estimates, Nov. 2022",,,sdg:REPORTING_TYPE/G,sdg:OBS_STATUS/E,Published,2023.Q2.G.01
29277,sdg:SERIES_CODE/SI_POV_EMP1,Employed population below international povert...,sdg:VARIABLE_CODE/SI_POV_EMP1@AGE--Y_GE25__SEX--M,Employed population below international povert...,"['AGE', 'SEX']",undata:GEOGRAPHY/G00003490,Zambia,Country,sdg:GEO_AREA_CODE/894,Zambia,...,,,sdg:NATURE/M,"Source = ILO modelled estimates, Nov. 2022",,,sdg:REPORTING_TYPE/G,sdg:OBS_STATUS/E,Published,2023.Q2.G.01
29278,sdg:SERIES_CODE/SI_POV_EMP1,Employed population below international povert...,sdg:VARIABLE_CODE/SI_POV_EMP1@AGE--Y_GE25__SEX--M,Employed population below international povert...,"['AGE', 'SEX']",undata:GEOGRAPHY/G00003490,Zambia,Country,sdg:GEO_AREA_CODE/894,Zambia,...,,,sdg:NATURE/M,"Source = ILO modelled estimates, Nov. 2022",,,sdg:REPORTING_TYPE/G,sdg:OBS_STATUS/E,Published,2023.Q2.G.01


## 1b. Change column names

In [18]:
list(dfs_ilo['data'])

['ilo:DATAFLOW',
 'ilo:REF_AREA',
 'ilo:FREQ',
 'ilo:MEASURE',
 'ilo:QTL',
 'ilo:TIME_PERIOD',
 'ilo:OBS_VALUE',
 'ilo:OBS_STATUS',
 'ilo:UNIT_MEASURE_TYPE',
 'ilo:UNIT_MEASURE',
 'ilo:UNIT_MULT',
 'ilo:SOURCE',
 'ilo:NOTE_SOURCE',
 'ilo:NOTE_INDICATOR',
 'ilo:NOTE_CLASSIF',
 'ilo:DECIMALS',
 'ilo:UPPER_BOUND',
 'ilo:LOWER_BOUND']

In [19]:
m = dfs_mappings['concept_mapping__undata_ilo'].loc[(dfs_mappings['concept_mapping__undata_ilo']['subject_id'] != '') & (dfs_mappings['concept_mapping__undata_sdg']['object_id'] != '')]
rename_dict = dict(zip(m['object_id'], m['subject_id']))

# Rename columns in DataFrame X
dfs_ilo['data'].rename(columns=rename_dict, inplace=True)
dfs_ilo['data'].head(6)

Unnamed: 0,ilo:DATAFLOW,undata:GEOGRAPHY,undata:FREQ,undata:MEASURE,undata:QUANTILE,undata:TIME_PERIOD,undata:OBS_VALUE,ilo:OBS_STATUS,undata:UNIT_MEASURE_TYPE,undata:UNIT_MEASURE,undata:UNIT_MULT,undata:SOURCE,undata:FOOTNOTE,undata:FOOTNOTE.1,undata:FOOTNOTE.2,ilo:DECIMALS,undata:UPPER_BOUND,undata:LOWER_BOUND
0,ILO:DF_LAP_2LID_QTL_RT(1.0),ilo:REF_AREA/AFG,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_01,2013,0.44,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,
1,ILO:DF_LAP_2LID_QTL_RT(1.0),ilo:REF_AREA/AFG,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_02,2013,1.01,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,
2,ILO:DF_LAP_2LID_QTL_RT(1.0),ilo:REF_AREA/AFG,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_03,2013,1.7,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,
3,ILO:DF_LAP_2LID_QTL_RT(1.0),ilo:REF_AREA/AFG,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_04,2013,2.88,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,
4,ILO:DF_LAP_2LID_QTL_RT(1.0),ilo:REF_AREA/AFG,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_05,2013,4.79,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,
5,ILO:DF_LAP_2LID_QTL_RT(1.0),ilo:REF_AREA/AFG,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_06,2013,6.89,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,


## 2a. Transcode values for geography concept

In [20]:
dfs_mappings['enum_mapping_geo__undata_ilo'].head(4)

Unnamed: 0,subject_id,subject_label,predicate_id,object_id,object_label,mapping_justification,mapping_date,author_id,subject_source,subject_source_version,object_source,object_source_version,confidence
0,undata:GEOGRAPHY/G00000010,Abu Dhabi,,,,,,,,,,,
1,undata:GEOGRAPHY/G00000020,Afghanistan,skos:exactMatch,ilo:REF_AREA/AFG,Afghanistan,,,,,,,,
2,undata:GEOGRAPHY/G00000030,Ajman,,,,,,,,,,,
3,undata:GEOGRAPHY/G00000040,Åland Islands,,,,,,,,,,,


In [21]:
# Create a dictionary from 'old' to 'new' columns
transcode_dict = dict(zip(dfs_mappings['enum_mapping_geo__undata_ilo']['object_id'], dfs_mappings['enum_mapping_geo__undata_ilo']['subject_id']))
dfs_ilo['data']['undata:GEOGRAPHY'] = dfs_sdg['data']['undata:GEOGRAPHY'].replace(transcode_dict)
dfs_ilo['data']

Unnamed: 0,ilo:DATAFLOW,undata:GEOGRAPHY,undata:FREQ,undata:MEASURE,undata:QUANTILE,undata:TIME_PERIOD,undata:OBS_VALUE,ilo:OBS_STATUS,undata:UNIT_MEASURE_TYPE,undata:UNIT_MEASURE,undata:UNIT_MULT,undata:SOURCE,undata:FOOTNOTE,undata:FOOTNOTE.1,undata:FOOTNOTE.2,ilo:DECIMALS,undata:UPPER_BOUND,undata:LOWER_BOUND
0,ILO:DF_LAP_2LID_QTL_RT(1.0),undata:GEOGRAPHY/G00100000,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_01,2013,0.44,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,
1,ILO:DF_LAP_2LID_QTL_RT(1.0),undata:GEOGRAPHY/G00100000,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_02,2013,1.01,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,
2,ILO:DF_LAP_2LID_QTL_RT(1.0),undata:GEOGRAPHY/G00100000,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_03,2013,1.70,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,
3,ILO:DF_LAP_2LID_QTL_RT(1.0),undata:GEOGRAPHY/G00100000,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_04,2013,2.88,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,
4,ILO:DF_LAP_2LID_QTL_RT(1.0),undata:GEOGRAPHY/G00100000,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_05,2013,4.79,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21675,ILO:DF_LAP_2LID_QTL_RT(1.0),undata:GEOGRAPHY/G00800370,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_06,2020,1.40,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,
21676,ILO:DF_LAP_2LID_QTL_RT(1.0),undata:GEOGRAPHY/G00800370,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_07,2020,2.47,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,
21677,ILO:DF_LAP_2LID_QTL_RT(1.0),undata:GEOGRAPHY/G00800370,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_08,2020,5.45,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,
21678,ILO:DF_LAP_2LID_QTL_RT(1.0),undata:GEOGRAPHY/G00800370,ilo:FREQ/A,ilo:MEASURE/LAP_2LID_RT,ilo:QTL/QTL_DECILE_09,2020,17.10,ilo:OBS_STATUS/M,ilo:UNIT_MEASURE_TYPE/RT,ilo:UNIT_MEASURE/PT,ilo:UNIT_MULT/0,ILO - Modelled Estimates,,,,1,,


---
# Create integrated dataset

In [34]:
x1 = dfs_sdg['data']
x1 = x1.reset_index(drop=True, inplace=True)

In [40]:
x2 =dfs_ilo['data']

x2 = x2.reset_index(drop=True, inplace=True)

In [41]:
x1.head(3)

Unnamed: 0,undata:MEASURE,sdg:SERIES_DESCRIPTION,undata:VARIABLE,sdg:VARIABLE_DESCRIPTION,undata:ACTIVE_DIMS,undata:GEOGRAPHY,sdg:GEOGRAPHY_NAME,undata:GEOGRAPHY_TYPE,sdg:GEO_AREA_CODE,sdg:GEO_AREA_NAME,...,undata:UNIT_MULT,undata:BASE_PERIOD,undata:NATURE,undata:SOURCE,sdg:GEO_INFO_URL,undata:FOOTNOTE,undata:REPORTING_TYPE,undata:OBS_STATUS,undata:RELEASE_STATUS,undata:RELEASE
0,sdg:SERIES_CODE/SI_POV_EMP1,Employed population below international povert...,sdg:VARIABLE_CODE/SI_POV_EMP1@AGE--Y15T24,Employed population below international povert...,['AGE'],undata:GEOGRAPHY/G00100000,World,Region,sdg:GEO_AREA_CODE/1,World,...,,,sdg:NATURE/N,"Source = ILO modelled estimates, Nov. 2022",,,sdg:REPORTING_TYPE/G,sdg:OBS_STATUS/E,Published,2023.Q2.G.01
1,sdg:SERIES_CODE/SI_POV_EMP1,Employed population below international povert...,sdg:VARIABLE_CODE/SI_POV_EMP1@AGE--Y15T24,Employed population below international povert...,['AGE'],undata:GEOGRAPHY/G00100000,World,Region,sdg:GEO_AREA_CODE/1,World,...,,,sdg:NATURE/N,"Source = ILO modelled estimates, Nov. 2022",,,sdg:REPORTING_TYPE/G,sdg:OBS_STATUS/E,Published,2023.Q2.G.01
2,sdg:SERIES_CODE/SI_POV_EMP1,Employed population below international povert...,sdg:VARIABLE_CODE/SI_POV_EMP1@AGE--Y15T24,Employed population below international povert...,['AGE'],undata:GEOGRAPHY/G00100000,World,Region,sdg:GEO_AREA_CODE/1,World,...,,,sdg:NATURE/N,"Source = ILO modelled estimates, Nov. 2022",,,sdg:REPORTING_TYPE/G,sdg:OBS_STATUS/E,Published,2023.Q2.G.01


In [42]:
x2.head(3)

AttributeError: 'NoneType' object has no attribute 'head'

In [43]:
print(list(x1))
print(list(x2))

['undata:MEASURE', 'sdg:SERIES_DESCRIPTION', 'undata:VARIABLE', 'sdg:VARIABLE_DESCRIPTION', 'undata:ACTIVE_DIMS', 'undata:GEOGRAPHY', 'sdg:GEOGRAPHY_NAME', 'undata:GEOGRAPHY_TYPE', 'sdg:GEO_AREA_CODE', 'sdg:GEO_AREA_NAME', 'sdg:CITIES', 'sdg:SAMPLING_STATIONS', 'undata:IS_LATEST_PERIOD', 'undata:TIME_PERIOD', 'undata:TIME_DETAIL', 'undata:TIME_COVERAGE', 'undata:FREQ', 'undata:AGE', 'undata:SEX', 'undata:OBS_VALUE', 'undata:VALUE_TYPE', 'undata:UPPER_BOUND', 'undata:LOWER_BOUND', 'undata:UNIT_MEASURE', 'undata:UNIT_MULT', 'undata:BASE_PERIOD', 'undata:NATURE', 'undata:SOURCE', 'sdg:GEO_INFO_URL', 'undata:FOOTNOTE', 'undata:REPORTING_TYPE', 'undata:OBS_STATUS', 'undata:RELEASE_STATUS', 'undata:RELEASE']


TypeError: 'NoneType' object is not iterable

In [46]:
pd.concat([x1,x2]).to_excel("../inputs/integrated_dataset.xlsx", index=False)