# Create Metadata file

#### Import required libraries

In [21]:
import json
import os
import urllib3  # allows to access a URL with python
import pandas as pd
import re
import csv

#### Specify the working directory

In [22]:
dir_path = os.path.dirname(os.path.realpath('__file__'))
print(dir_path)

wd_dir = r'../../'
print('data inputs dir: ' + wd_dir)

C:\Users\L.GonzalezMorales\Documents\GitHub\FIS4SDGs\notebooks\updateGlobalResources
data inputs dir: ../../


#### Print multiple outputs when running the code block within a notebook cell

In [23]:
# https://volderette.de/jupyter-notebook-tip-multiple-outputs/
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

#### Disable insecure request warnings when using `urllib3`.

In [24]:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

#### Read `sdgTree.json` 

In [25]:
with open(wd_dir + 'globalResources/sdgTree.json') as json_file:  
    sdgTree = json.load(json_file)
    



#### Read `sdg_colors.json`

In [26]:
with open(wd_dir + 'globalResources/sdgColors.json') as json_file:  
    sdgColors = json.load(json_file)

sdgColors['ColorScheme'][0]

{'GoalCode': 1,
 'hex': 'e5243b',
 'rgb': [229, 36, 59],
 'ColorScheme': ['FFDA99',
  'FAC590',
  'F5967F',
  'F1786E',
  'ED5C5E',
  'E5233D',
  'BF162F',
  '93071F']}

#### Read `tags_template.txt`

In [28]:
tags_df = pd.read_csv(wd_dir + 'globalResources/tagsTemplate2019.Q2.G.01.txt', sep='\t')
tags_df.head(3)
tags_df.shape
tags_df = tags_df.drop(columns=['goalCode','targetCode','seriesDesc'] )
tags = tags_df.to_dict('records')
tags[0]

Unnamed: 0,goalCode,goalDesc,targetCode,targetDesc,indicatorCode,indicatorReference,indicatorDesc,seriesCode,seriesDesc,release,TAGS,Column1
0,1,End poverty in all its forms everywhere,1.1,"By 2030, eradicate extreme poverty for all peo...",C010101,1.1.1,Proportion of population below the internatio...,SI_POV_DAY1,Proportion of population below international p...,2019.Q2.G.01,"['poverty line', 'poverty', 'standard of livin...",
1,1,End poverty in all its forms everywhere,1.1,"By 2030, eradicate extreme poverty for all peo...",C010101,1.1.1,Proportion of population below the internatio...,SI_POV_EMP1,Employed population below international povert...,2019.Q2.G.01,"['poverty line', 'poverty', 'standard of livin...",
2,1,End poverty in all its forms everywhere,1.2,"By 2030, reduce at least by half the proportio...",C010201,1.2.1,Proportion of population living below the nat...,SI_POV_NAHC,Proportion of population living below the nati...,2019.Q2.G.01,"['poverty line', 'poverty', 'standard of livin...",


(447, 12)

{'goalDesc': 'End poverty in all its forms everywhere',
 'targetDesc': 'By 2030, eradicate extreme poverty for all people everywhere, currently measured as people living on less than $1.25 a day',
 'indicatorCode': 'C010101',
 'indicatorReference': '1.1.1',
 'indicatorDesc': ' Proportion of population below the international poverty line, by sex, age, employment status and geographical location (urban/rural)',
 'seriesCode': 'SI_POV_DAY1',
 'release': '2019.Q2.G.01',
 'TAGS': "['poverty line', 'poverty', 'standard of living', 'basic needs']",
 'Column1': nan}

In [29]:
for i in tags:
    tags_string = i['TAGS']
    tags_string = re.sub('[\[\]\']','',tags_string)
    tags_list = tags_string.split(', ')
    i['TAGS'] = tags_list

tags[5]

{'goalDesc': 'End poverty in all its forms everywhere',
 'targetDesc': 'Implement nationally appropriate social protection systems and measures for all, including floors, and by 2030 achieve substantial coverage of the poor and the vulnerable',
 'indicatorCode': 'C010301',
 'indicatorReference': '1.3.1',
 'indicatorDesc': ' Proportion of population covered by social protection floors/systems, by sex, distinguishing children, unemployed persons, older persons, persons with disabilities, pregnant women, newborns, work-injury victims and the poor and the vulnerable',
 'seriesCode': 'SI_COV_SOCAST',
 'release': '2019.Q2.G.01',
 'TAGS': ['poverty', 'standard of living', 'basic needs', 'social welfare'],
 'Column1': nan}

In [34]:
#for t in tags:
#    display('indicator: ' + t['indicatorReference'] + ', series: ' + t['seriesCode'] + ', tags: ' + str(t['TAGS']))

## Join SDG Tree and Tags

In [35]:
for g in sdgTree:
    goal = g['code']
    thumbnail = 'https://raw.githubusercontent.com/UNStats/FIS4SDGs/master/sdgIcons/sdgIcons_thumbnails/SDG'+goal.zfill(2)+'.png'
    g['thumbnail'] = thumbnail
    
    for sc in sdgColors['ColorScheme']:
        if str(sc['GoalCode']) == g['code']:
            g['hex'] = sc['hex']
            g['rgb'] = sc['rgb']
            g['colorScheme'] = sc['ColorScheme']
    
    for t in g['targets']:
        
        for i in t['indicators']:
            
            if 'series' in i:
                for s in i['series']:

                    for tg in tags:

                        if tg['indicatorReference'] == i['reference'] and tg['seriesCode'] == s['code']:
                            s['tags'] = tg['TAGS']


#### Save metadata as a json file

In [36]:
with open(wd_dir + 'globalResources/metadata.json', 'w') as f:
    json.dump(sdgTree, f, indent=4)

### Diagnostic: Series with missing tags

In [None]:
with open(wd_dir + 'globalResources/metadata.json') as json_file:  
    metadata = json.load(json_file)

tagsTemplate = []
for g in metadata:
    goalCode = g['code']
    goalDesc = g['descEN']
    for t in g['targets']:
        targetCode = t['code']
        targetDesc = t['descEN']
        for i in t['indicators']:
            indicatorCode = i['code']
            indicatorReference = i['reference']
            indicatorDesc = i['descEN']
            if 'series' in i.keys():
                for s in i['series']:
                    seriesCode = s['code']
                    seriesDesc = s['description']
                    release = s['release']
                    TAGS = []
                    if 'tags' in s.keys():
                        TAGS = s['tags']

                    record = {}

                    record['goalCode'] = goalCode
                    record['goalDesc'] = goalDesc
                    record['targetCode'] = targetCode
                    record['targetDesc'] = targetDesc.encode("utf-8") 
                    record['indicatorCode'] = indicatorCode
                    record['indicatorReference'] = indicatorReference 
                    record['indicatorDesc'] = indicatorDesc.encode("utf-8") 
                    record['seriesCode'] = seriesCode
                    record['seriesDesc'] = seriesDesc.encode("utf-8")
                    record['release'] = release
                    record['TAGS'] = TAGS

                    tagsTemplate.append(record)



In [None]:
filename = wd_dir + 'globalResources/' + 'tagsTemplate' + release + '.txt'    

with open(filename, "w", newline='') as f:
    writer = csv.DictWriter(f, fieldnames= list(tagsTemplate[0].keys()), delimiter = '\t')
    writer.writeheader()
    writer.writerows(tagsTemplate)
    f.close()

In [None]:
f.close()