# Analyze SDG Data availability

## Load libraries

In [7]:
import pandas as pd
import os
import csv
import json
import statistics

import numpy as np
from pathlib import Path

dir_path = os.path.dirname(os.path.realpath('__file__'))
print(dir_path)

# https://volderette.de/jupyter-notebook-tip-multiple-outputs/
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

C:\Users\L.GonzalezMorales\Documents\Windows Projects\fis4sdg_python\notebooks\reports


### Read M49 countries and groups

In [9]:
ghana = pd.read_excel('../../data/external/m49_availability.xlsx', sheet_name='Ghana')
regions = pd.read_excel('../../data/external/m49_availability.xlsx', sheet_name='Regions')

ghana.head()
regions.head()

Unnamed: 0,countryCode,countryName,LDC,LLDC,SIDS,Development,regionCode,regionName
0,288,Ghana,,,,Developing,202,Sub-Saharan Africa


Unnamed: 0,Code,SDG_Region_Name
0,1,World
1,747,Northern Africa and Western Asia
2,202,Sub-Saharan Africa
3,419,Latin America and the Caribbean
4,513,Europe and Northern America


### Read metadata

In [10]:
metadatafile = 'metadata_2020.Q1.G.02.json'

with open('../../data/external/'+metadatafile) as json_file:
    metadata = json.load(json_file)

metadata[0].keys()
metadata[0]['targets'][0].keys()
metadata[0]['targets'][0]['indicators'][0].keys()

dict_keys(['code', 'labelEN', 'descEN', 'labelES', 'descES', 'labelFR', 'descFR', 'labelRU', 'descRU', 'labelZN', 'descZN', 'hex', 'rgb', 'ColorScheme', 'thumbnail', 'targets'])

dict_keys(['code', 'labelEN', 'descEN', 'labelES', 'descES', 'labelFR', 'descFR', 'labelRU', 'descRU', 'labelZN', 'descZN', 'indicators'])

dict_keys(['code', 'reference', 'labelEN', 'descEN', 'labelES', 'descES', 'labelFR', 'descFR', 'labelRU', 'descRU', 'labelZN', 'descZN', 'series'])

### Functions

#### 1. Read dataset

In [11]:
def read_sdg_dataset(csvfile):
    return pd.read_csv('../../data/processed/2020.Q1.G.02/'+csvfile) 


#### 2. Build csv file name

In [12]:
def csv_file(indicator, series):
    return ('Indicator_' + indicator.replace('.', '_') + '__Series_' + series + '.csv')

#### Identify disaggregations

In [13]:
def get_disaggregations(data):
   
    dim_columns = list(data.columns)
    exclude_cols = ['goal_code','goal_labelEN','goal_descEN','target_code','target_descEN',
                'indicator_code','indicator_reference','indicator_descEN',
                'series_release','series_tags','series','seriesDescription',
                'geoAreaCode','geoAreaName','level','parentCode','parentName','type',
                'X','Y','ISO3','UN_Member','Country_Profile','timeCoverage','geoInfoUrl',
                'years','min_year','max_year','n_years',
                'unitsCode','unitsDesc','reportingTypeCode','reportingTypeDesc',
                'latest_value','basePeriod','valueDetails','footnotes','sources','timeDetails','nature']

    d = [i for i in dim_columns if i not in exclude_cols and not i.startswith('value_') and not i.startswith('upper') and not i.startswith('lower') and not i.endswith('_desc')] 
    
    #return [sub.replace('_code', '') for sub in d] 
    return d


### Print list of goals, targets, indicators, series

In [14]:
df_list = []


for g in metadata:
    #if g['code'] != '6':
    #    continue
    for t in g['targets']:
        for i in t['indicators']:
            for s in i['series']:
                #if s['code'] != 'SI_POV_EMP1':
                #    continue
                    
                #print(f"Indicator {i['reference']}, Series {s['code']}: {s['description']}.")
                
                f = csv_file(i['reference'], s['code'])
                      
                data = read_sdg_dataset(f)                      
                
                print(f"disaggregations for {s['code']}:")
                get_disaggregations(data)
                
                country_data_cols = ['geoAreaCode', 'geoAreaName','years', 'min_year', 'max_year', 'n_years']
                country_data_cols.extend(get_disaggregations(data))
                #country_data_cols
                      
                ## Join countries and data:
                      
                country_data = pd.merge(ghana, 
                                        data[country_data_cols], 
                                        how='inner', 
                                        left_on=['countryCode'], 
                                        right_on=['geoAreaCode']).reset_index()
                      
                x = country_data.copy()
                      
                #----Check number of years and latest year available

                country_availability = x.groupby(['countryCode', 'countryName'], as_index=False).agg({'max_year':['max'],
                                                                                                          'n_years':['max', 'count']})      
                country_availability.columns = ['countryCode', 'countryName','max_year', 'n_years', 'n_timeSeries']
                      
                #----Check number of disaggregations in the data set
                      
                country_availability['n_dimensions']  = len(get_disaggregations(data)) 
                
                #----Check average number levels/values per dimension
                      
                if len(get_disaggregations(data)) > 0:
                      country_availability['values_per_dim']  =  country_availability['n_timeSeries'] ** (1/len(get_disaggregations(data)))  
                else:
                      country_availability['values_per_dim']  =  None
                
                #----Check disaggregation by sex
                      
                if 'sex_code' in get_disaggregations(data):
                    xx = data[['geoAreaCode']][data['sex_code']=='F'].drop_duplicates()
                    xx = xx['geoAreaCode'].tolist()
                else:
                    xx = []
                
                country_availability['data_for_female'] = np.where(country_availability['countryCode'].isin(xx), 1, 0)
                
                #----add regional groupings
                      
                country_availability = pd.merge(countries,
                                                country_availability,
                                                how='left',
                                                on = ['countryCode']
                                                ).reset_index()
                      
                      
                country_availability['Goal'] = '\''+ g['code']
                country_availability['Target'] = '\'' + t['code']
                country_availability['Indicator_ref'] = i['reference']
                country_availability['Series_code'] = s['code']
                country_availability['Series_desc'] = s['description']
                #country_availability['disaggregations_list'] = get_disaggregations(data)
                      
                #country_availability
                      
                ## ---------------Aggregate by region------------------
                      
                x = country_availability.copy()
                
                if len(get_disaggregations(data)) > 0:
                    country_availability_by_region = x.groupby(['Goal','Target','Indicator_ref','Series_code', 'Series_desc', 'regionCode', 'regionName'], 
                                                               as_index=False).agg({'max_year':['count','median'],
                                                                                    'n_years':['median'],
                                                                                    'n_timeSeries':['median'],
                                                                                    'n_dimensions':['median'],
                                                                                    'values_per_dim':['median'],
                                                                                    'data_for_female':['mean']})
                else:
                    country_availability_by_region = x.groupby(['Goal','Target','Indicator_ref','Series_code','Series_desc','regionCode', 'regionName'], 
                                                               as_index=False).agg({'max_year':['count','median'],
                                                                                    'n_years':['median'],
                                                                                    'n_timeSeries':['median'],
                                                                                    'n_dimensions':['median']})
                      
                    country_availability_by_region['values_per_dim'] = None
                    country_availability_by_region['data_for_female'] = None

                country_availability_by_region.columns = ['Goal','Target','Indicator_ref','Series_code','Series_desc','regionCode', 'regionName',
                                                          'n_countries_available', 'median_max_year', 'median_n_years', 'median_n_timeSeries',
                                                          'n_dimensions', 'median_values_per_dim', 'mean_data_for_female'
                                                         ]
                      
                      
                country_availability_by_region = pd.merge(country_availability_by_region, 
                                                       x.groupby(['regionCode'],as_index=False)['countryCode'].agg('count'),
                                                       how='left',
                                                       on = ['regionCode']).reset_index()
                country_availability_by_region.rename(columns={'countryCode': 'n_countries_total'}, inplace=True)
                
                #country_availability_by_region
                      
                ## ---------------Aggregate by series ------------------  
                      
                      
                x = country_availability.copy()
                
                
                if len(get_disaggregations(data)) > 0:
                    country_availability_world = x.groupby(['Goal','Target','Indicator_ref','Series_code','Series_desc'], 
                                                           as_index=False).agg({'max_year':['count','median'],
                                                                                'n_years':['median'],
                                                                                'n_timeSeries':['median'],
                                                                                'n_dimensions':['median'],
                                                                                'values_per_dim':['median'],
                                                                                'data_for_female':['mean']})
                    
                else:
                    country_availability_world = x.groupby(['Goal','Target','Indicator_ref','Series_code', 'Series_desc'], 
                                                           as_index=False).agg({'max_year':['count','median'],
                                                                                'n_years':['median'],
                                                                                'n_timeSeries':['median'],
                                                                                'n_dimensions':['median']}) 
                    country_availability_world['values_per_dim'] = None
                    country_availability_world['data_for_female'] = None
                
                country_availability_world.columns = ['Goal','Target','Indicator_ref','Series_code','Series_desc',
                                                          'n_countries_available', 'median_max_year', 'median_n_years', 'median_n_timeSeries',
                                                          'n_dimensions', 'median_values_per_dim', 'mean_data_for_female'
                                                         ]
                      
                      
                country_availability_world = pd.merge(country_availability_world, 
                                                       x.groupby(['Series_code','Series_desc'],as_index=False)['countryCode'].agg('count'),
                                                       how='left',
                                                       on = ['Series_code','Series_desc']).reset_index()
                country_availability_world.rename(columns={'countryCode': 'n_countries_total'}, inplace=True)
                
                country_availability_world['regionCode'] = 1
                country_availability_world['regionName'] = 'World'
                      
                frames = [country_availability_world,country_availability_by_region ]
                x = pd.concat(frames)
                      
                x.to_csv('../../data/availability/2020.Q1.G.02/ghana_'+ f)
                      
                df_list.append(x)
                      
availability_by_series_and_region = pd.concat(df_list)
                      
availability_by_series_and_region.to_csv('../../data/availability/2020.Q1.G.02/ghana_Availability_by_series.csv')

print('finished!')
                      

disaggregations for SI_POV_DAY1:


[]

disaggregations for SI_POV_EMP1:


['age_code', 'sex_code']

disaggregations for SI_POV_NAHC:


['location_code']

disaggregations for SI_COV_MATNL:


['sex_code']

disaggregations for SI_COV_POOR:


['sex_code']

disaggregations for SI_COV_SOCAST:


['quantile_code']

disaggregations for SI_COV_SOCINS:


['quantile_code']

disaggregations for SI_COV_CHLD:


['sex_code']

disaggregations for SI_COV_UEMP:


['sex_code']

disaggregations for SI_COV_VULN:


['sex_code']

disaggregations for SI_COV_WKINJRY:


['sex_code']

disaggregations for SI_COV_BENFTS:


['sex_code']

disaggregations for SI_COV_DISAB:


['sex_code']

disaggregations for SI_COV_LMKT:


['quantile_code']

disaggregations for SI_COV_PENSN:


['sex_code']

disaggregations for SP_ACS_BSRVH2O:


['location_code']

disaggregations for SP_ACS_BSRVSAN:


['location_code']

disaggregations for VC_DSR_MISS:


[]

disaggregations for VC_DSR_AFFCT:


[]

disaggregations for VC_DSR_MORT:


[]

disaggregations for VC_DSR_MTMP:


[]

disaggregations for VC_DSR_MTMN:


[]

disaggregations for VC_DSR_DAFF:


[]

disaggregations for VC_DSR_IJILN:


[]

disaggregations for VC_DSR_PDAN:


[]

disaggregations for VC_DSR_PDYN:


[]

disaggregations for VC_DSR_PDLN:


[]

disaggregations for VC_DSR_GDPLS:


[]

disaggregations for VC_DSR_LSGP:


[]

disaggregations for VC_DSR_AGLN:


[]

disaggregations for VC_DSR_HOLN:


[]

disaggregations for VC_DSR_CILN:


[]

disaggregations for VC_DSR_CHLN:


[]

disaggregations for VC_DSR_DDPA:


[]

disaggregations for SG_DSR_LGRGSR:


[]

disaggregations for SG_DSR_SILS:


[]

disaggregations for SG_DSR_SILN:


[]

disaggregations for SG_GOV_LOGV:


[]

disaggregations for SD_XPD_ESED:


[]

disaggregations for SN_ITK_DEFC:


[]

disaggregations for SN_ITK_DEFCN:


[]

disaggregations for AG_PRD_FIESMSI:


['observationStatus_code', 'age_code', 'sex_code']

disaggregations for AG_PRD_FIESMSIN:


['observationStatus_code', 'age_code', 'sex_code']

disaggregations for AG_PRD_FIESSI:


['observationStatus_code', 'age_code', 'sex_code']

disaggregations for AG_PRD_FIESSIN:


['observationStatus_code', 'age_code', 'sex_code']

disaggregations for SH_STA_STUNT:


['age_code']

disaggregations for SH_STA_STUNTN:


['age_code']

disaggregations for SH_STA_WASTE:


['age_code']

disaggregations for SH_STA_WASTEN:


['age_code']

disaggregations for SH_STA_OVRWGT:


['age_code']

disaggregations for SH_STA_OVRWGTN:


['age_code']

disaggregations for PD_AGR_SSFP:


[]

disaggregations for SI_AGR_SSFP:


[]

disaggregations for ER_GRF_ANIMRCNTN:


[]

disaggregations for ER_GRF_PLNTSTOR:


[]

disaggregations for ER_RSK_LBREDS:


[]

disaggregations for AG_PRD_ORTIND:


['observationStatus_code']

disaggregations for AG_PRD_AGVAS:


['observationStatus_code']

disaggregations for AG_XPD_AGSGB:


['observationStatus_code']

disaggregations for DC_TOF_AGRL:


[]

disaggregations for AG_PRD_XSUBDY:


[]

disaggregations for AG_FPA_COMM:


['typeOfProduct_code']

disaggregations for AG_FPA_CFPI:


[]

disaggregations for SH_STA_MMR:


[]

disaggregations for SH_STA_BRTC:


[]

disaggregations for SH_DYN_IMRTN:


['age_code', 'sex_code']

disaggregations for SH_DYN_MORT:


['age_code', 'sex_code']

disaggregations for SH_DYN_IMRT:


['age_code', 'sex_code']

disaggregations for SH_DYN_MORTN:


['age_code', 'sex_code']

disaggregations for SH_DYN_NMRTN:


['age_code', 'sex_code']

disaggregations for SH_DYN_NMRT:


['age_code', 'sex_code']

disaggregations for SH_HIV_INCD:


['age_code', 'sex_code']

disaggregations for SH_TBS_INCID:


[]

disaggregations for SH_STA_MALR:


[]

disaggregations for SH_HAP_HBSAG:


['age_code']

disaggregations for SH_TRP_INTVN:


[]

disaggregations for SH_DTH_NCOM:


['age_code', 'sex_code']

disaggregations for SH_DTH_RNCOM:


['sex_code', 'nameOfNonCommunicableDisease_code']

disaggregations for SH_STA_SCIDE:


['sex_code']

disaggregations for SH_STA_SCIDEN:


['sex_code']

disaggregations for SH_ALC_CONSPT:


['age_code', 'sex_code']

disaggregations for SH_STA_TRAF:


[]

disaggregations for SH_FPL_MTMM:


['age_code', 'sex_code']

disaggregations for SP_DYN_ADKL:


['age_code', 'sex_code']

disaggregations for SH_ACS_UNHC:


[]

disaggregations for SH_XPD_EARN25:


[]

disaggregations for SH_XPD_EARN10:


[]

disaggregations for SH_HAP_ASMORT:


[]

disaggregations for SH_STA_AIRP:


[]

disaggregations for SH_STA_ASAIRP:


[]

disaggregations for SH_AAP_MORT:


[]

disaggregations for SH_AAP_ASMORT:


[]

disaggregations for SH_HAP_MORT:


[]

disaggregations for SH_STA_WASH:


[]

disaggregations for SH_STA_POISN:


['sex_code']

disaggregations for SH_PRV_SMOK:


['age_code', 'sex_code']

disaggregations for SH_ACS_DTP3:


[]

disaggregations for SH_ACS_MCV2:


[]

disaggregations for SH_ACS_PCV3:


[]

disaggregations for SH_ACS_HPV:


[]

disaggregations for DC_TOF_HLTHNT:


[]

disaggregations for DC_TOF_HLTHL:


[]

disaggregations for SH_MED_HEAWOR:


['typeOfOccupation_code']

disaggregations for SH_MED_HWRKDIS:


['sex_code', 'typeOfOccupation_code']

disaggregations for SH_IHR_CAPS:


['ihrCapacity_code']

disaggregations for SE_TOT_PRFL:


['sex_code', 'educationLevel_code', 'typeOfSkill_code']

disaggregations for SE_DEV_ONTRK:


['age_code']

disaggregations for SE_PRE_PARTN:


['sex_code']

disaggregations for SE_ADT_EDUCTRN:


['sex_code']

disaggregations for SE_ADT_ACTS:


['sex_code', 'typeOfSkill_code']

disaggregations for SE_PRE_GPIPARTN:


[]

disaggregations for SE_GPI_TRATEA:


['educationLevel_code']

disaggregations for SE_GPI_PART:


[]

disaggregations for SE_GPI_ICTS:


['typeOfSkill_code']

disaggregations for SE_IMP_FPOF:


['typeOfSkill_code']

disaggregations for SE_NAP_ACHI:


['educationLevel_code', 'typeOfSkill_code']

disaggregations for SE_LGP_ACHI:


['educationLevel_code', 'typeOfSkill_code']

disaggregations for SE_TOT_GPI:


['educationLevel_code', 'typeOfSkill_code']

disaggregations for SE_TOT_SESPI:


['educationLevel_code', 'typeOfSkill_code']

disaggregations for SE_TOT_RUPI:


['educationLevel_code', 'typeOfSkill_code']

disaggregations for SE_ADT_FUNS:


['age_code', 'sex_code', 'typeOfSkill_code']

disaggregations for SE_ACC_COMP:


['educationLevel_code']

disaggregations for SE_ACC_DWAT:


['educationLevel_code']

disaggregations for SE_ACC_ELEC:


['educationLevel_code']

disaggregations for SE_ACC_HNWA:


['educationLevel_code']

disaggregations for SE_ACC_INTN:


['educationLevel_code']

disaggregations for SE_ACC_SANI:


['educationLevel_code']

disaggregations for SE_INF_DSBL:


['educationLevel_code']

disaggregations for DC_TOF_SCHIPSL:


[]

disaggregations for SE_TRA_GRDL:


['sex_code', 'educationLevel_code']

disaggregations for SG_LGL_GENEQLFP:


[]

disaggregations for SG_LGL_GENEQVAW:


[]

disaggregations for SG_LGL_GENEQEMP:


[]

disaggregations for SG_LGL_GENEQMAR:


[]

disaggregations for VC_VAW_MARR:


['age_code', 'sex_code']

disaggregations for SP_DYN_MRBF18:


['age_code', 'sex_code']

disaggregations for SP_DYN_MRBF15:


['age_code', 'sex_code']

disaggregations for SH_STA_FGMS:


['age_code', 'sex_code']

disaggregations for SL_DOM_TSPDCW:


['age_code', 'location_code', 'sex_code']

disaggregations for SL_DOM_TSPDDC:


['age_code', 'location_code', 'sex_code']

disaggregations for SL_DOM_TSPD:


['age_code', 'location_code', 'sex_code']

disaggregations for SG_GEN_PARLN:


['sex_code']

disaggregations for SG_GEN_PARLNT:


['sex_code']

disaggregations for SG_GEN_PARL:


['sex_code']

disaggregations for SG_GEN_LOCGELS:


['sex_code']

disaggregations for IC_GEN_MGTL:


['sex_code']

disaggregations for IC_GEN_MGTN:


['sex_code']

disaggregations for SH_FPL_INFM:


['age_code', 'sex_code']

disaggregations for SH_FPL_INFMSR:


['age_code', 'sex_code']

disaggregations for SH_FPL_INFMCU:


['age_code', 'sex_code']

disaggregations for SH_FPL_INFMRH:


['age_code', 'sex_code']

disaggregations for SH_LGR_ACSRHE:


[]

disaggregations for SH_LGR_ACSRHEC1:


[]

disaggregations for SH_LGR_ACSRHEC10:


[]

disaggregations for SH_LGR_ACSRHEC11:


[]

disaggregations for SH_LGR_ACSRHEC12:


[]

disaggregations for SH_LGR_ACSRHEC13:


[]

disaggregations for SH_LGR_ACSRHEC2:


[]

disaggregations for SH_LGR_ACSRHEC3:


[]

disaggregations for SH_LGR_ACSRHEC4:


[]

disaggregations for SH_LGR_ACSRHEC5:


[]

disaggregations for SH_LGR_ACSRHEC6:


[]

disaggregations for SH_LGR_ACSRHEC7:


[]

disaggregations for SH_LGR_ACSRHEC8:


[]

disaggregations for SH_LGR_ACSRHEC9:


[]

disaggregations for SH_LGR_ACSRHES1:


[]

disaggregations for SH_LGR_ACSRHES2:


[]

disaggregations for SH_LGR_ACSRHES3:


[]

disaggregations for SH_LGR_ACSRHES4:


[]

disaggregations for SP_LGL_LNDAGSEC:


['sex_code']

disaggregations for SP_GNP_WNOWNS:


[]

disaggregations for SG_LGL_LNDWMN:


[]

disaggregations for IT_MOB_OWN:


['sex_code']

disaggregations for SG_GEN_EQPWN:


[]

disaggregations for SH_H2O_SAFE:


['location_code']

disaggregations for SH_SAN_HNDWSH:


['location_code']

disaggregations for SH_SAN_SAFE:


['location_code']

disaggregations for SH_SAN_DEFECT:


['location_code']

disaggregations for EN_WWT_WWDS:


[]

disaggregations for EN_H2O_OPAMBQ:


[]

disaggregations for EN_H2O_RVAMBQ:


[]

disaggregations for EN_H2O_GRAMBQ:


[]

disaggregations for EN_H2O_WBAMBQ:


[]

disaggregations for ER_H2O_WUEYST:


[]

disaggregations for ER_H2O_STRESS:


['observationStatus_code']

disaggregations for ER_H2O_IWRMD:


[]

disaggregations for ER_H2O_IWRMP:


['levelStatus_code']

disaggregations for EG_TBA_H2CO:


[]

disaggregations for EG_TBA_H2COAQ:


[]

disaggregations for EG_TBA_H2CORL:


[]

disaggregations for EN_WBE_PMPR:


[]

disaggregations for EN_WBE_PMNR:


[]

disaggregations for EN_WBE_PMPP:


[]

disaggregations for EN_WBE_PMPN:


[]

disaggregations for EN_WBE_NDETOT:


[]

disaggregations for EN_WBE_NDOPW:


[]

disaggregations for EN_WBE_NDQLGRW:


[]

disaggregations for EN_WBE_NDQLOPW:


[]

disaggregations for EN_WBE_NDQLRVR:


[]

disaggregations for EN_WBE_NDQLTOT:


[]

disaggregations for EN_WBE_NDQTGRW:


[]

disaggregations for EN_WBE_NDQTOPW:


[]

disaggregations for EN_WBE_NDQTRVR:


[]

disaggregations for EN_WBE_NDQTTOT:


[]

disaggregations for EN_WBE_NDRV:


[]

disaggregations for EN_WBE_NDWTL:


[]

disaggregations for DC_TOF_WASHL:


[]

disaggregations for ER_WAT_PROCED:


[]

disaggregations for ER_H2O_PARTIC:


['location_code']

disaggregations for ER_H2O_PROCED:


['location_code']

disaggregations for ER_WAT_PARTIC:


[]

disaggregations for ER_H2O_RURP:


['location_code']

disaggregations for ER_H2O_PRDU:


['location_code']

disaggregations for ER_WAT_PART:


[]

disaggregations for ER_WAT_PRDU:


[]

disaggregations for EG_ELC_ACCS:


['location_code']

disaggregations for EG_EGY_CLEAN:


[]

disaggregations for EG_FEC_RNEW:


[]

disaggregations for EG_EGY_PRIM:


[]

disaggregations for EG_IFF_RANDN:


[]

disaggregations for NY_GDP_PCAP:


[]

disaggregations for SL_EMP_PCAP:


[]

disaggregations for SL_ISV_IFRM:


['sex_code', 'activity_code']

disaggregations for EN_MAT_FTPRPG:


['typeOfProduct_code']

disaggregations for EN_MAT_FTPRPC:


['typeOfProduct_code']

disaggregations for EN_MAT_FTPRTN:


['typeOfProduct_code']

disaggregations for EN_MAT_DOMCMPT:


['typeOfProduct_code']

disaggregations for EN_MAT_DOMCMPG:


['typeOfProduct_code']

disaggregations for EN_MAT_DOMCMPC:


['typeOfProduct_code']

disaggregations for SL_EMP_AEARN:


['sex_code', 'typeOfOccupation_code']

disaggregations for SL_TLF_UEM:


['age_code', 'sex_code']

disaggregations for SL_TLF_UEMDIS:


['sex_code', 'disabilityStatus_code']

disaggregations for SL_TLF_NEET:


['age_code', 'sex_code']

disaggregations for SL_TLF_CHLDEC:


['age_code', 'sex_code']

disaggregations for SL_TLF_CHLDEA:


['age_code', 'sex_code']

disaggregations for SL_EMP_FTLINJUR:


['sex_code', 'migratoryStatus_code']

disaggregations for SL_EMP_INJUR:


['sex_code', 'migratoryStatus_code']

disaggregations for SL_LBR_NTLCPL:


[]

disaggregations for ST_GDP_ZS:


[]

disaggregations for FB_ATM_TOTL:


['observationStatus_code', 'age_code']

disaggregations for FB_CBK_BRCH:


['observationStatus_code', 'age_code']

disaggregations for FB_BNK_ACCSS:


['age_code', 'sex_code']

disaggregations for DC_TOF_TRDCMDL:


[]

disaggregations for DC_TOF_TRDDBMDL:


[]

disaggregations for DC_TOF_TRDDBML:


[]

disaggregations for DC_TOF_TRDCML:


[]

disaggregations for SL_CPA_YEMP:


[]

disaggregations for IS_RDP_FRGVOL:


['modeOfTransportation_code']

disaggregations for IS_RDP_PFVOL:


['modeOfTransportation_code']

disaggregations for IS_RDP_PORFVOL:


[]

disaggregations for IS_RDP_LULFRG:


[]

disaggregations for NV_IND_MANFPC:


[]

disaggregations for NV_IND_MANF:


[]

disaggregations for SL_TLF_MANF:


[]

disaggregations for NV_IND_SSIS:


[]

disaggregations for FC_ACC_SSID:


[]

disaggregations for EN_ATM_CO2:


[]

disaggregations for EN_ATM_CO2MVA:


[]

disaggregations for EN_ATM_CO2GDP:


[]

disaggregations for GB_XPD_RSDV:


[]

disaggregations for GB_POP_SCIERD:


[]

disaggregations for DC_TOF_INFRAL:


[]

disaggregations for NV_IND_TECH:


[]

disaggregations for IT_MOB_2GNTWK:


[]

disaggregations for IT_MOB_3GNTWK:


[]

disaggregations for IT_MOB_4GNTWK:


[]

disaggregations for SI_HEI_TOTL:


['quantile_code']

disaggregations for SL_EMP_GTOTL:


[]

disaggregations for FI_FSI_FSANL:


['observationStatus_code']

disaggregations for FI_FSI_FSERA:


['observationStatus_code']

disaggregations for FI_FSI_FSKA:


['observationStatus_code']

disaggregations for FI_FSI_FSKNL:


['observationStatus_code']

disaggregations for FI_FSI_FSKRTC:


['observationStatus_code']

disaggregations for FI_FSI_FSLS:


['observationStatus_code']

disaggregations for FI_FSI_FSSNO:


['observationStatus_code']

disaggregations for SG_INT_MBRDEV:


['nameOfInternationalInstitution_code']

disaggregations for SG_INT_VRTDEV:


['nameOfInternationalInstitution_code']

disaggregations for SG_CPA_MIGR:


['policyDomains_code']

disaggregations for SG_CPA_MIGRP:


['policyDomains_code']

disaggregations for TM_TRF_ZERO:


['typeOfProduct_code']

disaggregations for DC_TRF_TOTDL:


[]

disaggregations for DC_TRF_TOTL:


[]

disaggregations for DC_TRF_TFDV:


[]

disaggregations for SI_RMT_COST:


[]

disaggregations for EN_LND_SLUM:


[]

disaggregations for VC_DSR_MISS:


[]

disaggregations for VC_DSR_AFFCT:


[]

disaggregations for VC_DSR_MORT:


[]

disaggregations for VC_DSR_MTMP:


[]

disaggregations for VC_DSR_MTMN:


[]

disaggregations for VC_DSR_DAFF:


[]

disaggregations for VC_DSR_IJILN:


[]

disaggregations for VC_DSR_PDAN:


[]

disaggregations for VC_DSR_PDYN:


[]

disaggregations for VC_DSR_PDLN:


[]

disaggregations for VC_DSR_GDPLS:


[]

disaggregations for VC_DSR_LSGP:


[]

disaggregations for VC_DSR_AGLN:


[]

disaggregations for VC_DSR_HOLN:


[]

disaggregations for VC_DSR_CILN:


[]

disaggregations for VC_DSR_CHLN:


[]

disaggregations for VC_DSR_CDAN:


[]

disaggregations for VC_DSR_HFDN:


[]

disaggregations for VC_DSR_EFDN:


[]

disaggregations for VC_DSR_CDYN:


[]

disaggregations for VC_DSR_BSDN:


[]

disaggregations for VC_DSR_ESDN:


[]

disaggregations for VC_DSR_HSDN:


[]

disaggregations for VC_DSR_OBDN:


[]

disaggregations for VC_DSR_DDPA:


[]

disaggregations for EN_REF_WASCOL:


['cities_code']

disaggregations for EN_ATM_PM25:


['location_code']

disaggregations for SG_DSR_LGRGSR:


[]

disaggregations for SG_DSR_SILS:


[]

disaggregations for SG_DSR_SILN:


[]

disaggregations for SG_GOV_LOGV:


[]

disaggregations for SG_SCP_CNTRY:


[]

disaggregations for SG_SCP_CORMEC:


[]

disaggregations for SG_SCP_POLINS:


['policyInstruments_code']

disaggregations for SG_SCP_OTHER:


[]

disaggregations for SG_SCP_TOTL:


[]

disaggregations for EN_MAT_FTPRPG:


['typeOfProduct_code']

disaggregations for EN_MAT_FTPRPC:


['typeOfProduct_code']

disaggregations for EN_MAT_FTPRTN:


['typeOfProduct_code']

disaggregations for EN_MAT_DOMCMPT:


['typeOfProduct_code']

disaggregations for EN_MAT_DOMCMPG:


['typeOfProduct_code']

disaggregations for EN_MAT_DOMCMPC:


['typeOfProduct_code']

disaggregations for AG_FLS_IDX:


[]

disaggregations for SG_HAZ_CMRMNTRL:


[]

disaggregations for SG_HAZ_CMRROTDAM:


[]

disaggregations for SG_HAZ_CMRBASEL:


[]

disaggregations for SG_HAZ_CMRSTHOLM:


[]

disaggregations for SG_HAZ_CMRMNMT:


[]

disaggregations for EN_EWT_GENV:


[]

disaggregations for EN_EWT_GENPCAP:


[]

disaggregations for EN_EWT_RCYV:


[]

disaggregations for EN_EWT_RCYR:


[]

disaggregations for EN_EWT_RCYPCAP:


[]

disaggregations for EN_HAZ_GENV:


[]

disaggregations for EN_HAZ_PCAP:


[]

disaggregations for EN_HAZ_GENGDP:


[]

disaggregations for EN_HAZ_TREATV:


['typeOfWasteTreatment_code']

disaggregations for EN_HAZ_TRTDISR:


[]

disaggregations for EN_HAZ_TRTDISV:


[]

disaggregations for EN_EWT_RCYV:


[]

disaggregations for EN_EWT_RCYR:


[]

disaggregations for EN_EWT_RCYPCAP:


[]

disaggregations for ST_EEV_STDACCT:


[]

disaggregations for ST_EEV_ACCSEEA:


[]

disaggregations for ST_EEV_ACCTSA:


[]

disaggregations for ER_FFS_PRTSST:


[]

disaggregations for ER_FFS_PRTSPC:


[]

disaggregations for ER_FFS_PRTSPR:


[]

disaggregations for VC_DSR_MISS:


[]

disaggregations for VC_DSR_AFFCT:


[]

disaggregations for VC_DSR_MORT:


[]

disaggregations for VC_DSR_MTMP:


[]

disaggregations for VC_DSR_MTMN:


[]

disaggregations for VC_DSR_DAFF:


[]

disaggregations for VC_DSR_IJILN:


[]

disaggregations for VC_DSR_PDAN:


[]

disaggregations for VC_DSR_PDYN:


[]

disaggregations for VC_DSR_PDLN:


[]

disaggregations for SG_DSR_LGRGSR:


[]

disaggregations for SG_DSR_SILS:


[]

disaggregations for SG_DSR_SILN:


[]

disaggregations for SG_GOV_LOGV:


[]

disaggregations for ER_OAW_MNACD:


['samplingStations_code']

disaggregations for ER_H2O_FWTL:


[]

disaggregations for ER_MRN_MARINT:


[]

disaggregations for ER_MRN_MARIN:


[]

disaggregations for ER_MRN_MPA:


[]

disaggregations for ER_REG_UNFCIM:


[]

disaggregations for ER_RDE_OSEX:


[]

disaggregations for ER_REG_SSFRAR:


[]

disaggregations for AG_LND_TOTL:


[]

disaggregations for AG_LND_FRSTN:


[]

disaggregations for AG_LND_FRST:


[]

disaggregations for ER_PTD_FRWRT:


[]

disaggregations for ER_PTD_TERRS:


[]

disaggregations for AG_LND_FRSTBIOPHA:


[]

disaggregations for AG_LND_FRSTCERT:


[]

disaggregations for AG_LND_FRSTCHG:


[]

disaggregations for AG_LND_FRSTMGT:


[]

disaggregations for AG_LND_FRSTPRCT:


[]

disaggregations for AG_LND_DGRD:


[]

disaggregations for ER_PTD_MOTN:


[]

disaggregations for ER_MTN_GRNCVI:


[]

disaggregations for ER_MTN_TOTL:


[]

disaggregations for ER_MTN_GRNCOV:


[]

disaggregations for ER_RSK_LSTI:


[]

disaggregations for ER_CBD_SMTA:


[]

disaggregations for ER_CBD_NAGOYA:


[]

disaggregations for ER_CBD_ABSCLRHS:


[]

disaggregations for ER_CBD_ORSPGRFA:


[]

disaggregations for ER_CBD_PTYPGRFA:


[]

disaggregations for ER_IAS_LEGIS:


[]

disaggregations for ER_IAS_NBSAP:


[]

disaggregations for ER_BDY_ABT2NP:


['levelStatus_code']

disaggregations for ER_BDY_SEEA:


[]

disaggregations for DC_ODA_BDVDL:


[]

disaggregations for DC_ODA_BDVL:


[]

disaggregations for DC_ODA_BDVDL:


[]

disaggregations for DC_ODA_BDVL:


[]

disaggregations for VC_IHR_PSRC:


['sex_code']

disaggregations for VC_IHR_PSRCN:


['sex_code']

disaggregations for VC_VOV_PHYL:


['sex_code']

disaggregations for VC_VOV_ROBB:


['sex_code']

disaggregations for VC_VOV_SEXL:


['sex_code']

disaggregations for VC_SNS_WALN:


[]

disaggregations for VC_VAW_PHYPYV:


['age_code']

disaggregations for VC_HTF_DETVFL:


['age_code', 'sex_code']

disaggregations for VC_HTF_DETVOP:


['age_code', 'sex_code']

disaggregations for VC_HTF_DETVOG:


['age_code', 'sex_code']

disaggregations for VC_HTF_DETVSX:


['age_code', 'sex_code']

disaggregations for VC_HTF_DETV:


['age_code', 'sex_code']

disaggregations for VC_VAW_SXVLN:


['age_code', 'sex_code']

disaggregations for VC_PRR_PHYV:


['sex_code']

disaggregations for VC_PRR_SEXV:


['sex_code']

disaggregations for VC_PRR_ROBB:


['sex_code']

disaggregations for VC_PRS_UNSEC:


[]

disaggregations for IU_COR_BRIB:


['sex_code']

disaggregations for IC_FRM_BRIB:


[]

disaggregations for GF_XPD_GBPC:


[]

disaggregations for SG_INT_MBRDEV:


['nameOfInternationalInstitution_code']

disaggregations for SG_INT_VRTDEV:


['nameOfInternationalInstitution_code']

disaggregations for SG_REG_BRTH:


['age_code']

disaggregations for VC_VAW_MTUHRA:


['sex_code']

disaggregations for SG_INF_ACCSS:


[]

disaggregations for SG_NHR_IMPL:


[]

disaggregations for SG_NHR_IMPLN:


[]

disaggregations for SG_NHR_INTEXST:


[]

disaggregations for SG_NHR_NOSTUSN:


[]

disaggregations for SG_NHR_INTEXSTN:


[]

disaggregations for SG_NHR_NOAPPLN:


[]

disaggregations for GR_G14_GDP:


['observationStatus_code']

disaggregations for GR_G14_XDC:


['observationStatus_code']

disaggregations for GC_GOB_TAXD:


['observationStatus_code']

disaggregations for DC_ODA_SIDSG:


[]

disaggregations for DC_ODA_LDCG:


[]

disaggregations for DC_ODA_LLDC:


[]

disaggregations for DC_ODA_SIDS:


[]

disaggregations for DC_ODA_LDCS:


[]

disaggregations for DC_ODA_LLDCG:


[]

disaggregations for DC_ODA_TOTG:


[]

disaggregations for DC_ODA_TOTL:


[]

disaggregations for DC_ODA_TOTLGE:


[]

disaggregations for DC_ODA_TOTGGE:


[]

disaggregations for BX_TRF_PWKR:


[]

disaggregations for DT_TDS_DECT:


[]

disaggregations for IT_NET_BBN:


['typeOfSpeed_code']

disaggregations for IT_NET_BBP:


['typeOfSpeed_code']

disaggregations for IT_USE_ii99:


[]

disaggregations for DC_FTA_TOTAL:


[]

disaggregations for TM_TAX_WMFN:


['typeOfProduct_code']

disaggregations for TM_TAX_WMPS:


['typeOfProduct_code']

disaggregations for TX_IMP_GBMRCH:


[]

disaggregations for TX_EXP_GBMRCH:


[]

disaggregations for TX_EXP_GBSVR:


[]

disaggregations for TX_IMP_GBSVR:


[]

disaggregations for TM_TAX_DMFN:


['typeOfProduct_code']

disaggregations for TM_TAX_DPRF:


['typeOfProduct_code']

disaggregations for SG_PLN_PRVRIMON:


[]

disaggregations for SG_PLN_RECRIMON:


[]

disaggregations for SG_PLN_PRVNDI:


[]

disaggregations for SG_PLN_RECNDI:


[]

disaggregations for SG_PLN_PRVRICTRY:


[]

disaggregations for SG_PLN_RECRICTRY:


[]

disaggregations for SG_PLN_REPOLRES:


[]

disaggregations for SG_PLN_PRPOLRES:


[]

disaggregations for SG_PLN_MSTKSDG:


[]

disaggregations for SG_STT_FPOS:


[]

disaggregations for SG_STT_NSDSFDGVT:


[]

disaggregations for SG_STT_NSDSFDDNR:


[]

disaggregations for SG_STT_NSDSFDOTHR:


[]

disaggregations for SG_STT_NSDSIMPL:


[]

disaggregations for SG_STT_NSDSFND:


[]

disaggregations for SG_STT_CAPTY:


[]

disaggregations for SG_REG_BRTH90:


[]

disaggregations for SG_REG_DETH75:


[]

disaggregations for SG_REG_CENSUS:


[]

disaggregations for SG_REG_CENSUSN:


[]

disaggregations for SG_REG_BRTH90N:


[]

disaggregations for SG_REG_DETH75N:


[]

finished!


### Availability by indicator

In [9]:
x = availability_by_series_and_region

x.median_values_per_dim = pd.to_numeric(x.median_values_per_dim)
x.mean_data_for_female= pd.to_numeric(x.mean_data_for_female)

availability_by_indicator_and_region = x.groupby(['Goal','Target','Indicator_ref', 'regionCode', 'regionName'], 
                                                  as_index=False).agg({'n_countries_available':['mean'],
                                                                      'median_max_year':['mean'],
                                                                      'median_n_years':['mean'],
                                                                      'median_n_timeSeries':['mean'],
                                                                      'n_dimensions':['mean'],
                                                                      'median_values_per_dim':['mean'],
                                                                      'mean_data_for_female':['mean'],
                                                                      'n_countries_total':['mean']})

availability_by_indicator_and_region.columns = ['Goal',
'Target',
'Indicator_ref',
'regionCode',
'regionName',
'avg_n_countries_available',
'avg_median_max_year',
'avg_median_n_years',
'avg_median_n_timeSeries',
'avg_n_dimensions',
'avg_median_values_per_dim',
'mean_data_for_female',
'n_countries_total']

availability_by_indicator_and_region.to_csv('../../data/availability/2020.Q1.G.02/Availability_by_indicator.csv')

print('finished!')
                     
       

finished!


In [11]:
x = availability_by_indicator_and_region

x.median_values_per_dim = pd.to_numeric(x.avg_median_values_per_dim)
x.mean_data_for_female= pd.to_numeric(x.mean_data_for_female)

availability_by_target_and_region = x.groupby(['Goal','Target', 'regionCode', 'regionName'], 
                                                  as_index=False).agg({'avg_n_countries_available':['mean'],
                                                                      'avg_median_max_year':['mean'],
                                                                      'avg_median_n_years':['mean'],
                                                                      'avg_median_n_timeSeries':['mean'],
                                                                      'avg_n_dimensions':['mean'],
                                                                      'avg_median_values_per_dim':['mean'],
                                                                      'mean_data_for_female':['mean'],
                                                                      'n_countries_total':['mean']})


availability_by_target_and_region.columns = ['Goal',
'Target',
'regionCode',
'regionName',
'avg_n_countries_available',
'avg_median_max_year',
'avg_median_n_years',
'avg_median_n_timeSeries',
'avg_n_dimensions',
'avg_median_values_per_dim',
'mean_data_for_female',
'n_countries_total']

availability_by_target_and_region.to_csv('../../data/availability/2020.Q1.G.02/Availability_by_target.csv')

print('finished!')
                    

finished!


In [13]:
x = availability_by_target_and_region

x.median_values_per_dim = pd.to_numeric(x.avg_median_values_per_dim)
x.mean_data_for_female= pd.to_numeric(x.mean_data_for_female)

availability_by_goal_and_region = x.groupby(['Goal','regionCode', 'regionName'], 
                                                  as_index=False).agg({'avg_n_countries_available':['mean'],
                                                                      'avg_median_max_year':['mean'],
                                                                      'avg_median_n_years':['mean'],
                                                                      'avg_median_n_timeSeries':['mean'],
                                                                      'avg_n_dimensions':['mean'],
                                                                      'avg_median_values_per_dim':['mean'],
                                                                      'mean_data_for_female':['mean'],
                                                                      'n_countries_total':['mean']})


availability_by_goal_and_region.columns = ['Goal',
'regionCode',
'regionName',
'avg_n_countries_available',
'avg_median_max_year',
'avg_median_n_years',
'avg_median_n_timeSeries',
'avg_n_dimensions',
'avg_median_values_per_dim',
'mean_data_for_female',
'n_countries_total']

availability_by_goal_and_region.to_csv('../../data/availability/2020.Q1.G.02/Availability_by_goal.csv')

print('finished!')
                    

finished!
