In [3]:
import pandas as pd
import numpy as np
from pathlib import Path

# import own functions
from fill_and_map import map_econ_sdg, map_inds_sdg, fill_econ, fill_inds_trans, sep_inds

In [4]:
# get data
fpath = Path('data_raw/SDG-DEA_indicators_V5_20200708.xlsx')
inds = fill_inds_trans(fpath)
inds_econ = fill_econ(fpath)
inds_map = map_inds_sdg(fpath)
econ_map = map_econ_sdg(fpath)

# match index of economic indicators to SDG indicators, since some countries were exluded due to missing data
inds_econ = inds_econ.loc[inds.index, :]

Removing countries with >=20.0% missing indicator values:
 AND    0.588235
ATG    0.392157
BHR    0.245098
BHS    0.313725
BRB    0.235294
BRN    0.235294
CPV    0.215686
CUB    0.245098
DMA    0.480392
ERI    0.313725
FSM    0.411765
GNB    0.235294
GNQ    0.294118
GRD    0.460784
KIR    0.450980
KNA    0.549020
LBY    0.294118
LCA    0.323529
LIE    0.735294
MCO    0.696078
MHL    0.529412
NRU    0.617647
PLW    0.578431
PRK    0.323529
SLB    0.205882
SMR    0.627451
SOM    0.254902
SSD    0.343137
STP    0.254902
SYC    0.284314
SYR    0.245098
TKM    0.254902
TLS    0.245098
TON    0.284314
TUV    0.598039
VCT    0.382353
WSM    0.245098
dtype: float64

Removing indicators with >=35.0% missing country values:
 sdg2_foodinsec_inv    0.487179
sdg14_fishstocks      0.403846
dtype: float64


In [5]:
# # remove indicators we want to exclude from analysis
# drop_inds = ['sdg17_govex', 'sdg10_fsi7']
# inds = inds.drop(columns=drop_inds)
# inds_map = {k:[ii for ii in v if ii not in drop_inds] for k, v in inds_map.items()}

In [6]:
# # mapping for 4 SDGs with 4 COFOGs where we have good data
# econ_map = {}
# econ_map[1] = ['Expenditure_on_social_protection']
# econ_map[2] = ['Expenditure_on_agriculture_fishing_forestry__hunting']
# econ_map[3] = ['Expenditure_on_health']
# econ_map[4] = ['Expenditure_on_education']

In [7]:
# updated SDG - COFOC expenditure mapping for all countries, manual mapping:
econ_map[4] = ['Expenditure_on_education']
econ_map[5].extend(['Rest_of_COFOG_not_defined'])
econ_map[9] = ['Rest_of_COFOG_not_defined']
econ_map[10].extend(['Rest_of_COFOG_not_defined', 'Expenditure_on_education'])

In [8]:
# # alternative economic indicators for countries with more complete data
# inds_econ_alt = pd.read_csv(Path('data_raw/econ_alt.csv'), index_col=[0])
# inds_econ_alt.insert(0, inds_econ.columns[0],inds_econ.iloc[:,0])
# inds_econ = inds_econ_alt

# # updated SDG - COFOG expenditure mapping for countries with more complete data
# econ_map[2].extend(['Expenditure_on_social_protection'])
# econ_map[4] = ['Expenditure_on_education']
# econ_map[5] = ['Expenditure_on_economic_affairs', 'Expenditure_on_social_protection']
# econ_map[6] = ['Expenditure_on_environment_protection', 'Expenditure_on_housing__community_amenities']
# econ_map[7] = ['Expenditure_on_economic_affairs']
# econ_map[8] = ['Expenditure_on_economic_affairs']
# econ_map[9] = ['Expenditure_on_economic_affairs']
# econ_map[10] = ['Expenditure_on_general_public_services', 'Expenditure_on_defense', 'Expenditure_on_public_order__safety', 'Expenditure_on_education', 'Expenditure_on_social_protection']
# econ_map[11] = ['Expenditure_on_public_order__safety', 'Expenditure_on_housing__community_amenities', 'Expenditure_on_recreation_culture__religion']
# econ_map[12] = ['Expenditure_on_economic_affairs', 'Expenditure_on_environment_protection']
# econ_map[13] = ['Expenditure_on_economic_affairs', 'Expenditure_on_environment_protection']
# econ_map[14] = ['Expenditure_on_economic_affairs', 'Expenditure_on_environment_protection']
# econ_map[15] = ['Expenditure_on_economic_affairs', 'Expenditure_on_environment_protection']
# econ_map[16] = ['Expenditure_on_general_public_services', 'Expenditure_on_defense', 'Expenditure_on_public_order__safety']
# econ_map[17] = ['Expenditure_on_general_public_services']

In [9]:
# shorten cofog names
# in mapping...
for key in econ_map:
    newnames = []
    for ii in econ_map[key]:    
        if ii != 'Rest_of_COFOG_not_defined':
            newnames.append(ii.split('_')[2])
        else:
            newnames.append(ii.split('_')[0].lower())
    econ_map[key] = newnames

# ... and in econ data
inds_econ.columns = [inds_econ.columns[0], 
                    *[ii.split('_')[2] for ii in inds_econ.columns[1:-1]], 
                    inds_econ.columns[-1].split('_')[0].lower()]

In [10]:
# separate indicators which will be used as undesired outputs, or not be used at all
skip_inds, undes_inds, use_inds = sep_inds()

all_sdgs = range(1,18)
gms_input_folder = Path('gams_data_input/')

# define set for countries
set_c = ', '.join((inds_econ.index))

for sdg in all_sdgs:

    # define sets for inputs and outputs
    # set_i = econ_map[sdg]
    set_i = '_'.join((econ_map[sdg])) # handle sum of multiple expenses as input
    des_out = list(set(inds_map[sdg]) - set(undes_inds) - set(skip_inds)) # set operation to get desired outputs
    undes_out = list(set(inds_map[sdg]) & set(undes_inds)) #  set operation to get undesired outputs

    if len(des_out) > 0:
        set_o = ', '.join(des_out)
    else: # placeholder set if empty
        set_o = 'dm'

    if len(undes_out) > 0:
        set_o_u = ', '.join(undes_out)
    else: # placeholder set if empty
        set_o_u = 'dm'

    # write sets to .inc file
    with open(gms_input_folder / 'sets_sdg{0}.inc'.format(sdg), 'w') as text_file:
        set_string = 'set c / {0} /;\nset i / {1} /;\nset o / {2} /;\nset o_u / {3} /;'.format(set_c, set_i, set_o, set_o_u)
        text_file.write(set_string)

    # define parameters for inputs and outputs
    # placeholder column to use if no outputs or undesired outputs exist for an SDG
    dm = pd.DataFrame([1]*len(inds_econ.index), index=inds_econ.index, columns=['dm']) 

    # write inputs to .csv file
    # inds_econ.loc[:,[econ_map[sdg]]].to_csv(gms_input_folder / 'inputs_sdg{0}.csv'.format(sdg))
    inds_econ[[*econ_map[sdg]]].sum(axis=1).to_csv(gms_input_folder / 'inputs_sdg{0}.csv'.format(sdg), header=[set_i])

    # write outputs to .csv file
    if len(des_out) > 0:
        inds.loc[inds_econ.index,des_out].to_csv(gms_input_folder / 'outputs_sdg{0}.csv'.format(sdg))
    else:
        dm.to_csv(gms_input_folder / 'outputs_sdg{0}.csv'.format(sdg))

    # write undesired outputs to .csv file
    if len(undes_out) > 0:
        inds.loc[inds_econ.index,undes_out].to_csv(gms_input_folder / 'undes_outputs_sdg{0}.csv'.format(sdg))
    else:
        dm.to_csv(gms_input_folder / 'undes_outputs_sdg{0}.csv'.format(sdg))