# Create Metadata file

#### Import required libraries

In [17]:
import json
import os
import urllib3  # allows to access a URL with python
import pandas as pd
import re

#### Specify the working directory

In [18]:
dir_path = os.path.dirname(os.path.realpath('__file__'))
print(dir_path)

wd_dir = r'../../'
print('data inputs dir: ' + wd_dir)

C:\Users\L.GonzalezMorales\Documents\GitHub\FIS4SDGs\notebooks\unsdPublishing
data inputs dir: ../../


#### Print multiple outputs when running the code block within a notebook cell

In [19]:
# https://volderette.de/jupyter-notebook-tip-multiple-outputs/
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

#### Disable insecure request warnings when using `urllib3`.

In [20]:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

#### Read `sdgTree.json` 

In [21]:
with open(wd_dir + 'globalResources/sdgTree.json') as json_file:  
    sdgTree = json.load(json_file)
    



#### Read `sdg_colors.json`

In [22]:
with open(wd_dir + 'globalResources/sdgColors.json') as json_file:  
    sdgColors = json.load(json_file)

sdgColors['ColorScheme'][0]

{'GoalCode': 1,
 'hex': 'e5243b',
 'rgb': [229, 36, 59],
 'ColorScheme': ['FFDA99',
  'FAC590',
  'F5967F',
  'F1786E',
  'ED5C5E',
  'E5233D',
  'BF162F',
  '93071F']}

#### Read `tags_template.txt`

In [23]:
tags_df = pd.read_csv(wd_dir + 'globalResources/tagsTemplate.txt', sep='\t')
tags_df.head(3)
tags_df.shape
tags_df = tags_df.drop(columns=['goalCode','targetCode','seriesDesc'] )
tags = tags_df.to_dict('records')
tags[0]

Unnamed: 0,goalCode,targetCode,indicatorCode,seriesCode,seriesDesc,TAGS
0,1,1.1,1.1.1,SI_POV_DAY1,Proportion of population below international p...,"['poverty line', 'poverty', 'standard of livin..."
1,1,1.1,1.1.1,SI_POV_EMP1,Employed population below international povert...,"['poverty line', 'poverty', 'standard of livin..."
2,1,1.2,1.2.1,SI_POV_NAHC,Proportion of population living below the nati...,"['poverty line', 'poverty', 'standard of livin..."


(429, 6)

{'indicatorCode': '1.1.1',
 'seriesCode': 'SI_POV_DAY1',
 'TAGS': "['poverty line', 'poverty', 'standard of living', 'basic needs']"}

In [24]:
for i in tags:
    tags_string = i['TAGS']
    tags_string = re.sub('[\[\]\']','',tags_string)
    tags_list = tags_string.split(', ')
    i['TAGS'] = tags_list

tags[5]

{'indicatorCode': '1.3.1',
 'seriesCode': 'SI_COV_DISAB',
 'TAGS': ['poverty',
  'standard of living',
  'basic needs',
  'social welfare',
  'disability benefits',
  'persons with disabilities']}

## Join SDG Tree and Tags

In [28]:
for g in sdgTree:
    goal = g['code']
    thumbnail = 'https://raw.githubusercontent.com/UNStats/FIS4SDGs/master/sdgIcons/sdgIcons_thumbnails/SDG'+goal.zfill(2)+'.png'
    g['thumbnail'] = thumbnail
    
    for sc in sdgColors['ColorScheme']:
        if str(sc['GoalCode']) == g['code']:
            g['hex'] = sc['hex']
            g['rgb'] = sc['rgb']
            g['colorScheme'] = sc['ColorScheme']
    
    for t in g['targets']:
        
        for i in t['indicators']:
            
            if 'series' in i:
                for s in i['series']:

                    for tg in tags:

                        if tg['indicatorCode'] == i['reference'] and tg['seriesCode'] == s['code']:
                            s['tags'] = tg['TAGS']


#### Save metadata as a json file

In [29]:
with open(wd_dir + 'publishing/metadata.json', 'w') as f:
    json.dump(sdgTree, f, indent=4)