# Update metadata file

This script allows to update the metadata file
- Get new list of series

## Load necessary libraries

In [1]:
import csv
import json
import urllib3  # allows to access a URL with python
import math
import os
import io
import collections
import copy
import numpy as np
import pandas as pd
import re
from collections import OrderedDict
import xlsxwriter

# https://volderette.de/jupyter-notebook-tip-multiple-outputs/
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


## Working directory

In [2]:
dir_path = os.path.dirname(os.path.realpath('__file__'))
print(dir_path)

wd_dir = r'../'
print('Main dir: ' + wd_dir)

C:\Users\L.GonzalezMorales\Documents\GitHub\FIS4SDGs\unsd\notebooks
Main dir: ../


## Utilities

#### Convert string to camelCase

In [3]:
def camelCase(st):
    """
    https://stackoverflow.com/questions/8347048/camelcase-every-string-any-standard-library
    
    """
    output = ''.join(x for x in st.title() if x.isalnum())
    return output[0].lower() + output[1:]

## Read old `metadata.json`

In [4]:
json_file = open(wd_dir + 'metadata.json')
json_str = json_file.read()
metadata_old = json.loads(json_str)
metadata_old[0]

{'goalCode': '1',
 'goalDesc': 'End poverty in all its forms everywhere',
 'targetCode': '1.1',
 'targetDesc': 'By 2030, eradicate extreme poverty for all people everywhere, currently measured as people living on less than $1.25 a day',
 'indicatorCode': '1.1.1',
 'indicatorDesc': 'Proportion of population below the international poverty line, by sex, age, employment status and geographical location (urban/rural)',
 'indicatorTier': '1',
 'seriesCode': 'SI_POV_DAY1',
 'seriesDesc': 'Proportion of population below international poverty line (%)',
 'seriesRelease': '2019.Q1.G.02',
 'TAGS': ['poverty line', 'poverty', 'standard of living', 'basic needs'],
 'hex': 'e5243b',
 'rgb': [229, 36, 59],
 'iconUrl': 'https://raw.githubusercontent.com/UNStats/FIS4SDGs/master/globalResources/sdgIcons/SDG01.png',
 'ColorScheme': ['FFDA99',
  'FAC590',
  'F5967F',
  'F1786E',
  'ED5C5E',
  'E5233D',
  'BF162F',
  '93071F'],
 'ColorSchemeCredits': "Prepared by Sarah Bell, Esri's Cartography Lab, 27 Jul

### Extract icons and color scheme for each of 17 goals

In [5]:
sdg_colors = []
goalCode = None

for m in metadata_old:
    
    if str(m['goalCode']) == goalCode:
        continue

    goalCode = str(m['goalCode'])
    
    dict = {}
    
    dict['goalCode'] = goalCode
    dict['hex'] = m['hex']
    dict['rgb'] = m['rgb']
    dict['iconUrl'] = m['iconUrl'] 
    dict['ColorScheme'] = m['ColorScheme']
    dict['ColorSchemeCredits'] = m['ColorSchemeCredits']
    
    sdg_colors.append(dict)
    
sdg_colors[0]

with open('sdg_colors.json', 'w') as file:
    json.dump(sdg_colors, file)

    
    
    
    
    

{'goalCode': '1',
 'hex': 'e5243b',
 'rgb': [229, 36, 59],
 'iconUrl': 'https://raw.githubusercontent.com/UNStats/FIS4SDGs/master/globalResources/sdgIcons/SDG01.png',
 'ColorScheme': ['FFDA99',
  'FAC590',
  'F5967F',
  'F1786E',
  'ED5C5E',
  'E5233D',
  'BF162F',
  '93071F'],
 'ColorSchemeCredits': "Prepared by Sarah Bell, Esri's Cartography Lab, 27 July 2017"}

## Read `file_metadata.json`

This json file was produced with script `FIS4SDG - 02 - fileMetadataipynb` and contains the description of latest dataset files extracted from the SDG API

In [6]:
json_file = open(wd_dir + 'data/file_metadata.json')
json_str = json_file.read()
file_metadata = json.loads(json_str)
file_metadata[0].keys()

dict_keys(['fileName', 'goalCode', 'goalDesc', 'targetCode', 'targetDesc', 'indicatorCode', 'indicatorDesc', 'indicatorTier', 'seriesCode', 'seriesDesc', 'seriesRelease', 'slices', 'time_coverage', 'geo_coverage_groups', 'geo_coverage_regions', 'geo_coverage_countries'])

### Create a new metadata template for the current data release
Add tags from old metadata template, as well as color information, to metadata extracted from data files

In [8]:
metadata_new = []
for s in file_metadata:
    
    dict = {}
    
    dict['goalCode'] = s['goalCode']
    dict['goalDesc'] = s['goalDesc']
    dict['goalDesc'] = s['goalDesc']
    dict['targetCode'] = s['targetCode']
    dict['targetDesc'] = s['targetDesc']
    dict['indicatorCode'] = s['indicatorCode']
    dict['indicatorDesc'] = s['indicatorDesc']
    dict['indicatorTier'] = s['indicatorTier']
    dict['seriesCode'] = s['seriesCode']
    dict['seriesDesc'] = s['seriesDesc']
    dict['seriesRelease'] = s['seriesRelease']
    
    dict['TAGS'] = []
    for m in metadata_old:
        if m['seriesCode'] ==  s['seriesCode'] and  m['indicatorCode'] ==  s['indicatorCode']:
            dict['TAGS'] = m['TAGS']
            break

    for g in sdg_colors:
        if g['goalCode'] == s['goalCode']:
                dict['hex'] = g['hex']
                dict['rgb'] = g['rgb']
                dict['iconUrl'] = g['iconUrl']
                dict['ColorScheme'] = g['ColorScheme']
                dict['ColorSchemeCredits'] = g['ColorSchemeCredits']
                break
                
    dict['slices'] = s['slices']
    dict['time_coverage'] = s['time_coverage']
    dict['geo_coverage_groups'] = s['geo_coverage_groups']
    dict['geo_coverage_regions'] = s['geo_coverage_regions']
    dict['geo_coverage_countries'] = s['geo_coverage_countries']
    
    
    
    metadata_new.append(OrderedDict(dict))

#series_metadata[0]

sm = pd.DataFrame.from_records(metadata_new) 

sm.to_excel(wd_dir + 'metadata_template.xlsx', engine ='xlsxwriter', index=False)

    

In [9]:
sm

Unnamed: 0,goalCode,goalDesc,targetCode,targetDesc,indicatorCode,indicatorDesc,indicatorTier,seriesCode,seriesDesc,seriesRelease,...,hex,rgb,iconUrl,ColorScheme,ColorSchemeCredits,slices,time_coverage,geo_coverage_groups,geo_coverage_regions,geo_coverage_countries
0,1,End poverty in all its forms everywhere,1.1,"By 2030, eradicate extreme poverty for all peo...",1.1.1,Proportion of population below the internation...,1,SI_POV_DAY1,Proportion of population below international p...,2019.Q1.G.02,...,e5243b,"[229, 36, 59]",https://raw.githubusercontent.com/UNStats/FIS4...,"[FFDA99, FAC590, F5967F, F1786E, ED5C5E, E5233...","Prepared by Sarah Bell, Esri's Cartography Lab...","[{'reportingTypeCode': 'G', 'seriesCode': 'SI_...","[1990, 1991, 1992, 1993, 1994, 1995, 1996, 199...","[{'geoAreaCode': '910', 'geoAreaName': 'High i...","[{'geoAreaCode': '1', 'geoAreaName': 'World'}]","[{'geoAreaCode': '8', 'geoAreaName': 'Albania'..."
1,1,End poverty in all its forms everywhere,1.1,"By 2030, eradicate extreme poverty for all peo...",1.1.1,Proportion of population below the internation...,1,SI_POV_EMP1,Employed population below international povert...,2019.Q1.G.02,...,e5243b,"[229, 36, 59]",https://raw.githubusercontent.com/UNStats/FIS4...,"[FFDA99, FAC590, F5967F, F1786E, ED5C5E, E5233...","Prepared by Sarah Bell, Esri's Cartography Lab...","[{'ageCode': '15+', 'reportingTypeCode': 'G', ...","[2000, 2001, 2002, 2003, 2004, 2005, 2006, 200...","[{'geoAreaCode': '199', 'geoAreaName': 'Least ...","[{'geoAreaCode': '1', 'geoAreaName': 'World'},...","[{'geoAreaCode': '8', 'geoAreaName': 'Albania'..."
2,1,End poverty in all its forms everywhere,1.2,"By 2030, reduce at least by half the proportio...",1.2.1,Proportion of population living below the nati...,1,SI_POV_NAHC,Proportion of population living below the nati...,2019.Q1.G.02,...,e5243b,"[229, 36, 59]",https://raw.githubusercontent.com/UNStats/FIS4...,"[FFDA99, FAC590, F5967F, F1786E, ED5C5E, E5233...","Prepared by Sarah Bell, Esri's Cartography Lab...","[{'locationCode': 'ALLAREA', 'reportingTypeCod...","[1985, 1987, 1989, 1990, 1991, 1992, 1993, 199...",[],[],"[{'geoAreaCode': '4', 'geoAreaName': 'Afghanis..."
3,1,End poverty in all its forms everywhere,1.3,Implement nationally appropriate social protec...,1.3.1,Proportion of population covered by social pro...,1,SI_COV_BENFTS,[ILO] Proportion of population covered by at l...,2019.Q1.G.02,...,e5243b,"[229, 36, 59]",https://raw.githubusercontent.com/UNStats/FIS4...,"[FFDA99, FAC590, F5967F, F1786E, ED5C5E, E5233...","Prepared by Sarah Bell, Esri's Cartography Lab...","[{'reportingTypeCode': 'G', 'seriesCode': 'SI_...","[2016, 2017, 2018]",[],"[{'geoAreaCode': '1', 'geoAreaName': 'World'},...","[{'geoAreaCode': '31', 'geoAreaName': 'Azerbai..."
4,1,End poverty in all its forms everywhere,1.3,Implement nationally appropriate social protec...,1.3.1,Proportion of population covered by social pro...,1,SI_COV_CHLD,[ILO] Proportion of children/households receiv...,2019.Q1.G.02,...,e5243b,"[229, 36, 59]",https://raw.githubusercontent.com/UNStats/FIS4...,"[FFDA99, FAC590, F5967F, F1786E, ED5C5E, E5233...","Prepared by Sarah Bell, Esri's Cartography Lab...","[{'reportingTypeCode': 'G', 'seriesCode': 'SI_...","[2016, 2017, 2018]",[],"[{'geoAreaCode': '1', 'geoAreaName': 'World'},...","[{'geoAreaCode': '32', 'geoAreaName': 'Argenti..."
5,1,End poverty in all its forms everywhere,1.3,Implement nationally appropriate social protec...,1.3.1,Proportion of population covered by social pro...,1,SI_COV_DISAB,[ILO] Proportion of population with severe dis...,2019.Q1.G.02,...,e5243b,"[229, 36, 59]",https://raw.githubusercontent.com/UNStats/FIS4...,"[FFDA99, FAC590, F5967F, F1786E, ED5C5E, E5233...","Prepared by Sarah Bell, Esri's Cartography Lab...","[{'reportingTypeCode': 'G', 'seriesCode': 'SI_...","[2013, 2014, 2016, 2017, 2018]",[],"[{'geoAreaCode': '1', 'geoAreaName': 'World'},...","[{'geoAreaCode': '12', 'geoAreaName': 'Algeria..."
6,1,End poverty in all its forms everywhere,1.3,Implement nationally appropriate social protec...,1.3.1,Proportion of population covered by social pro...,1,SI_COV_LMKT,[World Bank] Proportion of population covered ...,2019.Q1.G.02,...,e5243b,"[229, 36, 59]",https://raw.githubusercontent.com/UNStats/FIS4...,"[FFDA99, FAC590, F5967F, F1786E, ED5C5E, E5233...","Prepared by Sarah Bell, Esri's Cartography Lab...","[{'reportingTypeCode': 'G', 'seriesCode': 'SI_...","[2002, 2004, 2005, 2006, 2007, 2008, 2009, 201...",[],[],"[{'geoAreaCode': '8', 'geoAreaName': 'Albania'..."
7,1,End poverty in all its forms everywhere,1.3,Implement nationally appropriate social protec...,1.3.1,Proportion of population covered by social pro...,1,SI_COV_LMKTPQ,[World Bank] Poorest quintile covered by labou...,2019.Q1.G.02,...,e5243b,"[229, 36, 59]",https://raw.githubusercontent.com/UNStats/FIS4...,"[FFDA99, FAC590, F5967F, F1786E, ED5C5E, E5233...","Prepared by Sarah Bell, Esri's Cartography Lab...","[{'reportingTypeCode': 'G', 'seriesCode': 'SI_...","[2002, 2004, 2005, 2006, 2007, 2008, 2009, 201...",[],[],"[{'geoAreaCode': '8', 'geoAreaName': 'Albania'..."
8,1,End poverty in all its forms everywhere,1.3,Implement nationally appropriate social protec...,1.3.1,Proportion of population covered by social pro...,1,SI_COV_MATNL,[ILO] Proportion of mothers with newborns rece...,2019.Q1.G.02,...,e5243b,"[229, 36, 59]",https://raw.githubusercontent.com/UNStats/FIS4...,"[FFDA99, FAC590, F5967F, F1786E, ED5C5E, E5233...","Prepared by Sarah Bell, Esri's Cartography Lab...","[{'reportingTypeCode': 'G', 'seriesCode': 'SI_...","[2016, 2017, 2018]",[],"[{'geoAreaCode': '1', 'geoAreaName': 'World'},...","[{'geoAreaCode': '12', 'geoAreaName': 'Algeria..."
9,1,End poverty in all its forms everywhere,1.3,Implement nationally appropriate social protec...,1.3.1,Proportion of population covered by social pro...,1,SI_COV_PENSN,[ILO] Proportion of population above statutory...,2019.Q1.G.02,...,e5243b,"[229, 36, 59]",https://raw.githubusercontent.com/UNStats/FIS4...,"[FFDA99, FAC590, F5967F, F1786E, ED5C5E, E5233...","Prepared by Sarah Bell, Esri's Cartography Lab...","[{'reportingTypeCode': 'G', 'seriesCode': 'SI_...","[1996, 2000, 2001, 2002, 2003, 2004, 2005, 200...",[],"[{'geoAreaCode': '1', 'geoAreaName': 'World'},...","[{'geoAreaCode': '4', 'geoAreaName': 'Afghanis..."
