# Development of National Data Structure Definition for the SDGs
This notebook illustrates a series of steps necessary to create a national Data Structure Definition for SDG indicators that is aligned to the Global SDG Indicator Framework

# Initial settings

In [48]:
import pandas as pd
import os 
import json
import urllib.request
from pandas.io.json import json_normalize    


dir_path = os.path.dirname(os.path.realpath('__file__'))
print(dir_path)

global_dsd_dir = r'../global DSD'
print('data inputs dir: ' + global_dsd_dir)


C:\Users\L.GonzalezMorales\Documents\GitHub\PCBS\notebooks
data inputs dir: ../global DSD


The steps are as follows:

1. **Global DSD**

  1. Create a data frame with the most recent version of the global indicator framework
  2. Collect the catalogue of data series from the global indicator database
  3. Collect the code lists from the global DSD


## 1. Global DSD

### 1.A. Global indicator framework

Get the global indicator framework from Global SDG Indicators Database API

In [31]:
series = pd.read_json('https://unstats.un.org/SDGAPI/v1/sdg/Series/List?allreleases=false')
series.head(10)

Unnamed: 0,code,description,goal,indicator,release,target,uri
0,DC_ODA_BDVDL,Total official development assistance for biod...,"[15, 15]","[15.a.1, 15.b.1]",2019.Q1.G.01,"[15.a, 15.b]",/v1/sdg/Series/DC_ODA_BDVDL
1,DC_TOF_HLTHNT,Total official development assistance to medic...,[3],[3.b.2],2019.Q1.G.01,[3.b],/v1/sdg/Series/DC_TOF_HLTHNT
2,DC_TOF_INFRAL,"Total official flows for infrastructure, by re...",[9],[9.a.1],2019.Q1.G.01,[9.a],/v1/sdg/Series/DC_TOF_INFRAL
3,DC_TOF_SCHIPSL,"Total official flows for scholarships, by reci...",[4],[4.b.1],2019.Q1.G.01,[4.b],/v1/sdg/Series/DC_TOF_SCHIPSL
4,DC_TOF_TRDCMDL,Total official flows (commitments) for Aid for...,[8],[8.a.1],2019.Q1.G.01,[8.a],/v1/sdg/Series/DC_TOF_TRDCMDL
5,DC_TOF_WASHL,Total official development assistance (gross d...,[6],[6.a.1],2019.Q1.G.01,[6.a],/v1/sdg/Series/DC_TOF_WASHL
6,DC_TRF_TOTDL,"Total assistance for development, by donor cou...",[10],[10.b.1],2019.Q1.G.01,[10.b],/v1/sdg/Series/DC_TRF_TOTDL
7,DT_TDS_DECT,Debt service as a proportion of exports of goo...,[17],[17.4.1],2019.Q1.G.01,[17.4],/v1/sdg/Series/DT_TDS_DECT
8,EG_EGY_CLEAN,Proportion of population with primary reliance...,[7],[7.1.2],2019.Q1.G.01,[7.1],/v1/sdg/Series/EG_EGY_CLEAN
9,EN_ATM_CO2,Carbon dioxide emissions from fuel combustion ...,[9],[9.4.1],2019.Q1.G.01,[9.4],/v1/sdg/Series/EN_ATM_CO2


In [34]:
global_series = []

for index, row in series.iterrows():
    
    series_dict = {}
    
    series = row['code']
    series_desc = row['description']
    
    for i in range(len(row['indicator'])):
        
        goal =  row['goal'][i]
        target = row['target'][i]
        indicator = row['indicator'][i]
        indicator = row['indicator'][i]
        release = row['release']
        
        series_dict['goal_code'] = goal_code
        series_dict['target_code'] = target_code
        series_dict['indicator_code'] = indicator_code
        series_dict['series_code'] = series_code
        series_dict['series_desc'] = series_desc
        
        global_series.append(series_dict)

global_series[0]

{'goal_code': '15',
 'indicator_code': '15.b.1',
 'series_code': 'DC_ODA_BDVDL',
 'series_desc': 'Total official development assistance for biodiversity, by donor countries (millions of constant 2016 United States dollars)',
 'target_code': '15.b'}

In [33]:
indicators = pd.read_json('https://unstats.un.org/SDGAPI/v1/sdg/Indicator/List')
indicators.head(10)

Unnamed: 0,code,description,goal,series,target,tier,uri
0,1.1.1,Proportion of population below the internation...,1,[],1.1,1,/v1/sdg/Indicator/1.1.1
1,1.2.1,Proportion of population living below the nati...,1,[],1.2,1,/v1/sdg/Indicator/1.2.1
2,1.3.1,Proportion of population covered by social pro...,1,[],1.3,1,/v1/sdg/Indicator/1.3.1
3,1.5.1,"Number of deaths, missing persons and directly...",1,[],1.5,1,/v1/sdg/Indicator/1.5.1
4,1.5.2,Direct economic loss attributed to disasters i...,1,[],1.5,1,/v1/sdg/Indicator/1.5.2
5,1.5.3,Number of countries that adopt and implement n...,1,[],1.5,1,/v1/sdg/Indicator/1.5.3
6,1.5.4,Proportion of local governments that adopt and...,1,[],1.5,2,/v1/sdg/Indicator/1.5.4
7,1.a.2,Proportion of total government spending on ess...,1,[],1.a,2,/v1/sdg/Indicator/1.a.2
8,2.1.1,Prevalence of undernourishment,2,[],2.1,1,/v1/sdg/Indicator/2.1.1
9,2.1.2,Prevalence of moderate or severe food insecuri...,2,[],2.1,1,/v1/sdg/Indicator/2.1.2


In [43]:
for s in range(len(global_series)):
    
    indicator_desc = indicators.loc[indicators['code'] == global_series[s]['indicator_code']]['description'].to_string(index=False)
    global_series[s]['indicator_desc']=indicator_desc

In [None]:
indicators = pd.read_json('https://unstats.un.org/SDGAPI/v1/sdg/Indicator/List')
indicators.head(10)

In [44]:
global_series[4]

{'goal_code': '4',
 'indicator_code': '4.b.1',
 'indicator_desc': 'Volume of official development assistance flow...',
 'series_code': 'DC_TOF_SCHIPSL',
 'series_desc': 'Total official flows for scholarships, by recipient countries (millions of constant 2016 United States dollars)',
 'target_code': '4.b'}

In [45]:
x = pd.read_json('https://unstats.un.org/SDGAPI/v1/sdg/Goal/List?includechildren=true')
x.head(10)

Unnamed: 0,code,description,targets,title,uri
0,1,Goal 1 calls for an end to poverty in all its ...,"[{'goal': None, 'code': '1.1', 'title': 'By 20...",End poverty in all its forms everywhere,/v1/sdg/Goal/1
1,2,Goal 2 seeks to end hunger and all forms of ma...,"[{'goal': None, 'code': '2.1', 'title': 'By 20...","End hunger, achieve food security and improved...",/v1/sdg/Goal/2
2,3,Goal 3 aims to ensure health and well-being fo...,"[{'goal': None, 'code': '3.1', 'title': 'By 20...",Ensure healthy lives and promote well-being fo...,/v1/sdg/Goal/3
3,4,Goal 4 focuses on the acquisition of foundatio...,"[{'goal': None, 'code': '4.1', 'title': 'By 20...",Ensure inclusive and equitable quality educati...,/v1/sdg/Goal/4
4,5,Goal 5 aims to empower women and girls to reac...,"[{'goal': None, 'code': '5.1', 'title': 'End a...",Achieve gender equality and empower all women ...,/v1/sdg/Goal/5
5,6,"Goal 6 goes beyond drinking water, sanitation ...","[{'goal': None, 'code': '6.1', 'title': 'By 20...",Ensure availability and sustainable management...,/v1/sdg/Goal/6
6,7,Goal 7 seeks to promote broader energy access ...,"[{'goal': None, 'code': '7.1', 'title': 'By 20...","Ensure access to affordable, reliable, sustain...",/v1/sdg/Goal/7
7,8,Goal 8 aims to provide opportunities for full ...,"[{'goal': None, 'code': '8.2', 'title': 'Achie...","Promote sustained, inclusive and sustainable e...",/v1/sdg/Goal/8
8,9,Goal 9 focuses on the promotion of infrastruct...,"[{'goal': None, 'code': '9.1', 'title': 'Devel...","Build resilient infrastructure, promote inclus...",/v1/sdg/Goal/9
9,10,Goal 10 calls for reducing inequalities in inc...,"[{'goal': None, 'code': '10.1', 'title': 'By 2...",Reduce inequality within and among countries,/v1/sdg/Goal/10


In [51]:
with urllib.request.urlopen('https://unstats.un.org/SDGAPI/v1/sdg/Goal/List?includechildren=true') as url:
    sdgs = json.loads(url.read().decode())

In [62]:
global_frmwk = []

print(len(sdgs))

for g in sdgs:
    
    goal_code = g['code']
    goal_desc = g['description']
    
    for t in g['targets']:
    
        target_code = t['code']
        target_desc = t['description']
            
        for i in t['indicators']:
            
            indicator_code = i['code']
            indicaotr_desc = i['description']
            
            for s in i['series']:
                
                record = {}
                
                series_code = s['code']
                series_desc = s['description']
                series_release = s['release']
                
                record['goal_code'] = goal_code
                record['goal_desc'] = goal_desc
                record['target_code'] = target_code
                record['target_desc'] = target_desc
                record['indicator_code'] = indicator_code
                record['inidcator_desc'] = indicator_desc
                record['series_code'] = series_code
                record['series_desc'] = series_desc
                record['series_release'] = series_release
                
                global_frmwk.append(record)

len(global_frmwk)

17


2485

In [65]:
global_frmwk_df = pd.DataFrame(global_frmwk)
global_frmwk_df.head(12)


Unnamed: 0,goal_code,goal_desc,indicator_code,inidcator_desc,series_code,series_desc,series_release,target_code,target_desc
0,1,Goal 1 calls for an end to poverty in all its ...,1.1.1,Number of countries with sustainable consumpti...,SI_POV_DAY1,Proportion of population below international p...,2017.Q2.G.01,1.1,"By 2030, eradicate extreme poverty for all peo..."
1,1,Goal 1 calls for an end to poverty in all its ...,1.1.1,Number of countries with sustainable consumpti...,SI_POV_DAY1,Proportion of population below international p...,2018.Q1.G.01,1.1,"By 2030, eradicate extreme poverty for all peo..."
2,1,Goal 1 calls for an end to poverty in all its ...,1.1.1,Number of countries with sustainable consumpti...,SI_POV_DAY1,Proportion of population below international p...,2018.Q2.G.01,1.1,"By 2030, eradicate extreme poverty for all peo..."
3,1,Goal 1 calls for an end to poverty in all its ...,1.1.1,Number of countries with sustainable consumpti...,SI_POV_DAY1,Proportion of population below international p...,2018.Q4.G.01,1.1,"By 2030, eradicate extreme poverty for all peo..."
4,1,Goal 1 calls for an end to poverty in all its ...,1.1.1,Number of countries with sustainable consumpti...,SI_POV_DAY1,Proportion of population below international p...,2018.Q4.G.02,1.1,"By 2030, eradicate extreme poverty for all peo..."
5,1,Goal 1 calls for an end to poverty in all its ...,1.1.1,Number of countries with sustainable consumpti...,SI_POV_DAY1,Proportion of population below international p...,2019.Q1.G.01,1.1,"By 2030, eradicate extreme poverty for all peo..."
6,1,Goal 1 calls for an end to poverty in all its ...,1.1.1,Number of countries with sustainable consumpti...,SI_POV_EMP1,Employed population below international povert...,2017.Q2.G.01,1.1,"By 2030, eradicate extreme poverty for all peo..."
7,1,Goal 1 calls for an end to poverty in all its ...,1.1.1,Number of countries with sustainable consumpti...,SI_POV_EMP1,Employed population below international povert...,2018.Q1.G.01,1.1,"By 2030, eradicate extreme poverty for all peo..."
8,1,Goal 1 calls for an end to poverty in all its ...,1.1.1,Number of countries with sustainable consumpti...,SI_POV_EMP1,Employed population below international povert...,2018.Q2.G.01,1.1,"By 2030, eradicate extreme poverty for all peo..."
9,1,Goal 1 calls for an end to poverty in all its ...,1.1.1,Number of countries with sustainable consumpti...,SI_POV_EMP1,Employed population below international povert...,2018.Q4.G.01,1.1,"By 2030, eradicate extreme poverty for all peo..."


In [67]:
global_frmwk_last_release = global_frmwk_df.loc[global_frmwk_df['series_release'] == '2019.Q1.G.01']
global_frmwk_last_release.head(12)

Unnamed: 0,goal_code,goal_desc,indicator_code,inidcator_desc,series_code,series_desc,series_release,target_code,target_desc
5,1,Goal 1 calls for an end to poverty in all its ...,1.1.1,Number of countries with sustainable consumpti...,SI_POV_DAY1,Proportion of population below international p...,2019.Q1.G.01,1.1,"By 2030, eradicate extreme poverty for all peo..."
11,1,Goal 1 calls for an end to poverty in all its ...,1.1.1,Number of countries with sustainable consumpti...,SI_POV_EMP1,Employed population below international povert...,2019.Q1.G.01,1.1,"By 2030, eradicate extreme poverty for all peo..."
17,1,Goal 1 calls for an end to poverty in all its ...,1.2.1,Number of countries with sustainable consumpti...,SI_POV_NAHC,Proportion of population living below the nati...,2019.Q1.G.01,1.2,"By 2030, reduce at least by half the proportio..."
23,1,Goal 1 calls for an end to poverty in all its ...,1.3.1,Number of countries with sustainable consumpti...,SI_COV_MATNL,[ILO] Proportion of mothers with newborns rece...,2019.Q1.G.01,1.3,Implement nationally appropriate social protec...
29,1,Goal 1 calls for an end to poverty in all its ...,1.3.1,Number of countries with sustainable consumpti...,SI_COV_POOR,[ILO] Proportion of poor population receiving ...,2019.Q1.G.01,1.3,Implement nationally appropriate social protec...
35,1,Goal 1 calls for an end to poverty in all its ...,1.3.1,Number of countries with sustainable consumpti...,SI_COV_SOCAST,[World Bank] Proportion of population covered ...,2019.Q1.G.01,1.3,Implement nationally appropriate social protec...
41,1,Goal 1 calls for an end to poverty in all its ...,1.3.1,Number of countries with sustainable consumpti...,SI_COV_SOCASTPQ,[World Bank] Poorest quintile covered by socia...,2019.Q1.G.01,1.3,Implement nationally appropriate social protec...
47,1,Goal 1 calls for an end to poverty in all its ...,1.3.1,Number of countries with sustainable consumpti...,SI_COV_SOCINS,[World Bank] Proportion of population covered ...,2019.Q1.G.01,1.3,Implement nationally appropriate social protec...
53,1,Goal 1 calls for an end to poverty in all its ...,1.3.1,Number of countries with sustainable consumpti...,SI_COV_CHLD,[ILO] Proportion of children/households receiv...,2019.Q1.G.01,1.3,Implement nationally appropriate social protec...
59,1,Goal 1 calls for an end to poverty in all its ...,1.3.1,Number of countries with sustainable consumpti...,SI_COV_SOCINSPQ,[World Bank] Poorest quintile covered by socia...,2019.Q1.G.01,1.3,Implement nationally appropriate social protec...


In [70]:
global_frmwk_last_release.shape

(414, 9)