# Explore SDG Enumerations


For all SDG statistical variables currently in DC, we have the list of ‘constraintProperties’, i.e., the list of dimensions that are actively used by at least one statistical variable.  Each of these properties has a set of values, which in turn as of a specific Enumeration class.  

In [1]:
# Import Data Commons

import pandas as pd
import numpy as np
import datacommons_pandas as dc

# Import other required libraries
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import pandas as pd

import json
import time

import ast

import os
from dotenv import load_dotenv

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from IPython.display import clear_output, display
from IPython.core.display import HTML, JSON

import requests
from bs4 import BeautifulSoup


import sys
sys.path.append('../') # add the project's root directory to the system path

from utils_excel import * # import the utils_excel module from the project's root directory


---

In [2]:
load_dotenv()
api_key = os.getenv("DC_KEY")

True

### SPARQL Query

In [3]:
# set up the API endpoint URL
url = "https://api.datacommons.org/v1/query"
headers = {"X-API-Key": api_key}

### REST API

In [4]:
def call_api(endpoint, parameters):
    url = f"http://api.datacommons.org/{endpoint}{parameters}?key={api_key}"
    print(f"http://api.datacommons.org/{endpoint}{parameters}")
    response = requests.get(url)
    return json.loads(response.content)

---

In [12]:
variable_properties = pd.read_excel('../data/output/Variable_Properties.xlsx').fillna('')
variable_properties.head(3)

Unnamed: 0,dcid,measuredProperty,measuredProperty__name,memberOf,memberOf__name,name,populationType,populationType__name,provenance,provenance__name,...,sdg_typeOfWasteTreatment,sdg_typeOfWasteTreatment__name,sdg_frequencyOfChlorophyllAConcentration,sdg_frequencyOfChlorophyllAConcentration__name,sdg_deviationLevel,sdg_deviationLevel__name,sdg_typeOfRenewableTechnology,sdg_typeOfRenewableTechnology__name,sdg_foodWasteSector,sdg_foodWasteSector__name
0,sdg/VC_DSR_AFFCT,value,value,"['dc/g/SDG_1.5.1', 'dc/g/SDG_11.5.1', 'dc/g/SD...","['1.5.1: Number of deaths, missing persons and...",Number of people affected by disaster,SDG_VC_DSR_AFFCT,Number of people affected by disaster,dc/base/HumanReadableStatVars,HumanReadableStatVars,...,,,,,,,,,,
1,sdg/VC_DSR_DAFF,value,value,"['dc/g/SDG_1.5.1', 'dc/g/SDG_11.5.1', 'dc/g/SD...","['1.5.1: Number of deaths, missing persons and...",Number of directly affected persons attributed...,SDG_VC_DSR_DAFF,Number of directly affected persons attributed...,dc/base/HumanReadableStatVars,HumanReadableStatVars,...,,,,,,,,,,
2,sdg/VC_DSR_IJILN,value,value,"['dc/g/SDG_1.5.1', 'dc/g/SDG_11.5.1', 'dc/g/SD...","['1.5.1: Number of deaths, missing persons and...",Number of injured or ill people attributed to ...,SDG_VC_DSR_IJILN,Number of injured or ill people attributed to ...,dc/base/HumanReadableStatVars,HumanReadableStatVars,...,,,,,,,,,,


### 1. Obtain properties directly linked to SDG statistical variables through the `onstraintProperties` relation

In [19]:
ConstraintProperties = list(variable_properties['constraintProperties'])


ConstraintProperties_unique = []

for element in ConstraintProperties:
    # Check if the element is a simple string
    if isinstance(element, str) and not element.startswith("["):
        if element not in ConstraintProperties_unique:
            ConstraintProperties_unique.append(element)
    # Check if the element is a complex string that can be parsed as an array of simple strings
    elif isinstance(element, str) and element.startswith("[") and element.endswith("]"):
        parsed_array = ast.literal_eval(element)
        for value in parsed_array:
            if value not in ConstraintProperties_unique:
                ConstraintProperties_unique.append(value)

ConstraintProperties_unique = sorted(ConstraintProperties_unique)
ConstraintProperties_unique.remove('')
print(f"{len(ConstraintProperties_unique)=}")
ConstraintProperties_unique

len(ConstraintProperties_unique)=36


['age',
 'disabilityStatus',
 'educationalAttainment',
 'gender',
 'sdg_activity',
 'sdg_counterpart',
 'sdg_customBreakdown',
 'sdg_deviationLevel',
 'sdg_fiscalInterventionStage',
 'sdg_foodWasteSector',
 'sdg_frequencyOfChlorophyllAConcentration',
 'sdg_governmentName',
 'sdg_groundsOfDiscrimination',
 'sdg_ihrCapacity',
 'sdg_levelOfGovernment',
 'sdg_levelStatus',
 'sdg_location',
 'sdg_migratoryStatus',
 'sdg_modeOfTransportation',
 'sdg_mountainElevation',
 'sdg_nameOfInternationalInstitution',
 'sdg_nameOfNonCommunicableDisease',
 'sdg_parliamentaryCommittees',
 'sdg_policyDomains',
 'sdg_policyInstruments',
 'sdg_populationGroup',
 'sdg_quantile',
 'sdg_samplingStations',
 'sdg_serviceAttribute',
 'sdg_substanceUseDisorders',
 'sdg_typeOfOccupation',
 'sdg_typeOfProduct',
 'sdg_typeOfRenewableTechnology',
 'sdg_typeOfSkill',
 'sdg_typeOfSpeed',
 'sdg_typeOfWasteTreatment']

In [44]:
enumerations = []

for idx, dimension in enumerate(ConstraintProperties_unique):

    dim_values = sorted(list(set([x for x in list(variable_properties[dimension]) if x != ''])))
    
    for dv in dim_values:
        row = dict()
        row['dimension'] = dimension
        row['dim_value'] = dv
        x = call_api('v1/property/values', f'/out/{dv}/typeOf')['values']
        row['dim_enumeration'] =  x[0]['dcid']
        row['dim_enumeration_name'] =  x[0]['name']
        enumerations.append(row)
    
enumerations = pd.DataFrame(enumerations)
enumerations
    
    
    

http://api.datacommons.org/v1/property/values/out/SDG_AgeEnum_1-14/typeOf
http://api.datacommons.org/v1/property/values/out/SDG_AgeEnum_1-4/typeOf
http://api.datacommons.org/v1/property/values/out/SDG_AgeEnum_10-14/typeOf
http://api.datacommons.org/v1/property/values/out/SDG_AgeEnum_10-17/typeOf
http://api.datacommons.org/v1/property/values/out/SDG_AgeEnum_10GEQ/typeOf
http://api.datacommons.org/v1/property/values/out/SDG_AgeEnum_12-14/typeOf
http://api.datacommons.org/v1/property/values/out/SDG_AgeEnum_12-24/typeOf
http://api.datacommons.org/v1/property/values/out/SDG_AgeEnum_12GEQ/typeOf
http://api.datacommons.org/v1/property/values/out/SDG_AgeEnum_14GEQ/typeOf
http://api.datacommons.org/v1/property/values/out/SDG_AgeEnum_15-19/typeOf
http://api.datacommons.org/v1/property/values/out/SDG_AgeEnum_15-24/typeOf
http://api.datacommons.org/v1/property/values/out/SDG_AgeEnum_15-49/typeOf
http://api.datacommons.org/v1/property/values/out/SDG_AgeEnum_15-64/typeOf
http://api.datacommons.org/v

Unnamed: 0,dimension,dim_value,dim_enumeration,dim_enumeration_name
0,age,SDG_AgeEnum_1-14,SDG_AgeEnum,AgeEnum
1,age,SDG_AgeEnum_1-4,SDG_AgeEnum,AgeEnum
2,age,SDG_AgeEnum_10-14,SDG_AgeEnum,AgeEnum
3,age,SDG_AgeEnum_10-17,SDG_AgeEnum,AgeEnum
4,age,SDG_AgeEnum_10GEQ,SDG_AgeEnum,AgeEnum
...,...,...,...,...
979,sdg_typeOfWasteTreatment,SDG_TypeOfWasteTreatmentEnum_INCINRTEGY,SDG_TypeOfWasteTreatmentEnum,TypeOfWasteTreatmentEnum
980,sdg_typeOfWasteTreatment,SDG_TypeOfWasteTreatmentEnum_LANDFIL,SDG_TypeOfWasteTreatmentEnum,TypeOfWasteTreatmentEnum
981,sdg_typeOfWasteTreatment,SDG_TypeOfWasteTreatmentEnum_LANDFILCTL,SDG_TypeOfWasteTreatmentEnum,TypeOfWasteTreatmentEnum
982,sdg_typeOfWasteTreatment,SDG_TypeOfWasteTreatmentEnum_OTHERWM,SDG_TypeOfWasteTreatmentEnum,TypeOfWasteTreatmentEnum


In [47]:
Dimension_Enumeration = enumerations[['dimension', 'dim_enumeration', 'dim_enumeration_name']].drop_duplicates()

new_col_names = {'dimension': 'dimension', 'dim_enumeration': 'enumeration', 'dim_enumeration_name': 'enumeration_name'}
Dimension_Enumeration = Dimension_Enumeration.rename(columns=new_col_names)
Dimension_Enumeration = Dimension_Enumeration.reset_index(drop=True)
Dimension_Enumeration

write_to_excel(Dimension_Enumeration, '../data/output/Dimension_Enumeration.xlsx', 'Dimension_Enumeration', 90)

Unnamed: 0,dimension,enumeration,enumeration_name
0,age,SDG_AgeEnum,AgeEnum
1,disabilityStatus,SDG_DisabilityStatusEnum,DisabilityStatusEnum
2,educationalAttainment,SDG_EducationLevelEnum,EducationLevelEnum
3,gender,SDG_SexEnum,SexEnum
4,sdg_activity,SDG_ActivityEnum,ActivityEnum
5,sdg_counterpart,SDG_CounterpartEnum,CounterpartEnum
6,sdg_customBreakdown,SDG_CustomBreakdownEnum,CustomBreakdownEnum
7,sdg_deviationLevel,SDG_DeviationLevelEnum,DeviationLevelEnum
8,sdg_fiscalInterventionStage,SDG_FiscalInterventionStageEnum,FiscalInterventionStageEnum
9,sdg_foodWasteSector,SDG_FoodWasteSectorEnum,FoodWasteSectorEnum


In [57]:
for idx, e in enumerate(Dimension_Enumeration['enumeration']):
    code_list = call_api('v1/property/values', f'/in/{e}/typeOf')['values']
    write_to_excel(pd.DataFrame(code_list), f'../data/output/Enumerations/{e}.xlsx', 'Enumeration', 90)
        

http://api.datacommons.org/v1/property/values/in/SDG_AgeEnum/typeOf
http://api.datacommons.org/v1/property/values/in/SDG_DisabilityStatusEnum/typeOf
http://api.datacommons.org/v1/property/values/in/SDG_EducationLevelEnum/typeOf
http://api.datacommons.org/v1/property/values/in/SDG_SexEnum/typeOf
http://api.datacommons.org/v1/property/values/in/SDG_ActivityEnum/typeOf
http://api.datacommons.org/v1/property/values/in/SDG_CounterpartEnum/typeOf
http://api.datacommons.org/v1/property/values/in/SDG_CustomBreakdownEnum/typeOf
http://api.datacommons.org/v1/property/values/in/SDG_DeviationLevelEnum/typeOf
http://api.datacommons.org/v1/property/values/in/SDG_FiscalInterventionStageEnum/typeOf
http://api.datacommons.org/v1/property/values/in/SDG_FoodWasteSectorEnum/typeOf
http://api.datacommons.org/v1/property/values/in/SDG_FrequencyOfChlorophyllAConcentrationEnum/typeOf
http://api.datacommons.org/v1/property/values/in/SDG_GovernmentNameEnum/typeOf
http://api.datacommons.org/v1/property/values/in