# Data availability analysis by country

## This is the plan:
1. Read metadata (catalogue of all series available)
2. Create a consolidated file with all the data 
3. Split the consolidated file by country -- this will be James' request
4. Compute availability statistics by country series
    - Number of "slices / disaggregations" available
    - Latest year available
    - Number of data points for the most aggregated slice
    - Average no. of data points for the rest of the slices

## Load necessary libraries

In [1]:
import os
import sys
import json
import urllib3  # allows to access a URL with python
import pandas as pd
import math
import os 
import hashlib
import csv
import copy
import unicodedata

In [2]:
os.path.abspath(os.curdir)

'C:\\Users\\L.GonzalezMorales\\Documents\\GitHub\\FIS4SDGs\\notebooks\\unsdDataAvailability'

## Setup interactive shell

In [3]:
# https://volderette.de/jupyter-notebook-tip-multiple-outputs/
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Set path to data and metadata directories in the local branch: 

In [4]:
release = '2019.Q2.G.01'

dir_path = os.path.dirname(os.path.realpath('__file__'))
print(dir_path)

data_dir = r'../../data/unsd/' + release + '/'
metadata_dir = r"../../globalResources/"
metadata_dir


print('data inputs dir: ' + data_dir)

C:\Users\L.GonzalezMorales\Documents\GitHub\FIS4SDGs\notebooks\unsdDataAvailability


'../../globalResources/'

data inputs dir: ../../data/unsd/2019.Q2.G.01/


## Utilities

#### Convert string to camelCase

In [5]:
def camelCase(st, toAscii = True, startLowercase = True):
    """
    https://stackoverflow.com/questions/8347048/camelcase-every-string-any-standard-library
    
    """
    output = ''.join(x for x in st.title() if x.isalnum())
    if startLowercase:
        output = output[0].lower() + output[1:]
    
    if toAscii:
        output = unicodedata.normalize('NFD', output).encode('ascii', 'ignore').decode('utf-8')
        
    return output

### Get data from a json file

In [6]:

def get_json_data(file, print_first_element = True):    
    
    """ Get json metadata file """
    
    try:
        json_data = json.load(open(file))
        if(print_first_element==True):
            print("\n----This is an example of a series_metadata element----")
            print(json_data[0])
        return json_data
    
    except:
        print("Unexpected error:", sys.exc_info()[0])
        return None
    
    

#### Print the tree structure of nested dictionaries

In [7]:
def jsonTree(d, depth=0):
  for x, v in d.items():
    print('-'*depth + x)
    if isinstance(v, list):
        v0 = v[0]
        if isinstance(v0, dict):
          jsonTree(v0, depth = depth+2)

#### Compute a hash of a dictionary

In [8]:
def dict_hash(d):
    out = hashlib.md5()
    for key, value in d.items():
        out.update(key.encode('utf-8'))
        out.update(str(value).encode('utf-8'))
    return out.hexdigest()


#### Get unique dictionaries in a list

In [9]:
def unique_dicts(dictionary_list):

    uniques_map = {}

    for d in dictionary_list:
        uniques_map[dict_hash(d)] = d

    return list(uniques_map.values())


#### Extract subset of key-value pairs from Python dictionary object

In [10]:
def subdict_list(dict_list, keys_list, exclude = False):
    sub_d_list = []
    if exclude:
        for d in dict_list:
            sub_d= {k: d[k] for k in d.keys() if k not in keys_list}
            sub_d_list.append(sub_d)
    else:
        for d in dict_list:
            sub_d= {k: d[k] for k in keys_list}
            sub_d_list.append(sub_d)
    
    return sub_d_list




#### Get a dict from a list based on something inside the dict

In [11]:
def select_dict(dict_list, k, v):
    selected = []
    for d in dict_list:
        if d[k] == v:
            selected.append(d)
    return selected

### List of countries

In [12]:
def countryListXY(file):
    
    countryListXY = []
    
    with open(file, newline = '', encoding='latin-1') as countryList:                                                                                          
        countryList = csv.DictReader(countryList, delimiter='\t')
        for row in countryList:
            countryListXY.append(dict(row))
            
    countryListXY = pd.DataFrame(countryListXY).astype({'M49':'str'})
    
    return(countryListXY)

    #print(countryListXY[1])
    #for c in countryListXY:

In [13]:
countries_df = countryListXY(metadata_dir + 'refAreas.txt')
countries_df.head(10)

Unnamed: 0,Country_Profile,ISO3,M49,UN_Member,X,Y,areaName
0,1,AFG,4,1,66.02688198,33.83160199,Afghanistan
1,1,ALB,8,1,20.06660928,41.13897007,Albania
2,1,ATA,10,0,21.47585697,-80.40897662,Antarctica
3,1,DZA,12,1,2.678164227,28.15940032,Algeria
4,1,ASM,16,0,-170.7187269,-14.30587306,American Samoa
5,1,AND,20,1,1.576257417,42.54548611,Andorra
6,1,AGO,24,1,17.57817062,-12.33724746,Angola
7,1,ATG,28,1,-61.7999755,17.07761471,Antigua and Barbuda
8,1,AZE,31,1,50.01064725,40.39229544,Azerbaijan
9,1,ARG,32,1,-65.14563274,-35.19446255,Argentina


# 1. Read metadata

## Get metadata file

In [14]:
series_metadata = get_json_data(metadata_dir + 'metadata.json', False)
jsonTree(series_metadata[0])
series_metadata[0]['code']

code
labelEN
descEN
labelES
descES
labelFR
descFR
targets
--code
--labelEN
--descEN
--labelES
--descES
--labelFR
--descFR
--indicators
----code
----reference
----tier
----custodianAgencies
----labelEN
----descEN
----labelES
----descES
----labelFR
----descFR
----series
------code
------description
------release
------tags
thumbnail
hex
rgb
colorScheme


'1'

# 2. Create a consolidated file with all the data

### Utilities

#### Re-arrange columns
- Keep all dimenions together
- Keep all value columns together
- Push `value_latest_year` and `latest_year` to the end

In [23]:
def rearrange(bigtable):
    
    keys_1 = ['goal', 'target', 'indicator', 'seriesCode', 'seriesDesc', 'release',
       'geoAreaCode', 'geoAreaName', 'parentCode', 'parentName', 'type',
       'ISO3', 'UN_Member', 'X', 'Y']

    keys_2 = ['unitsCode', 'unitsDesc',
           'unitmultiplierCode', 'unitmultiplierDesc', 'reportingTypeCode',
           'reportingTypeDesc', 'valueDetails', 'footnotes', 'sources',
           'timeDetails', 'nature']

    keys_3 = ['value_latest_year', 'latest_year']
    
    value_cols = [x for x in list(bigtable.columns) if x.startswith('value_') and x != 'value_latest_year']
    value_cols = sorted(value_cols)
    
    dimension_cols = [x for x in list(bigtable.columns) if x not in keys_1 + keys_2 + keys_3 + value_cols]
    dimension_cols = sorted(dimension_cols)
    
    x =  bigtable[keys_1 + dimension_cols + keys_2 + value_cols + keys_3]
    
    x = x.sort_values(by= ['goal', 'target', 'indicator', 'seriesCode','geoAreaName'] + dimension_cols)
    
    return x

    

In [24]:
def split_by_country(bigtable, domain='', target_dir=''):
    
    country_catalog = bigtable[['geoAreaCode', 'geoAreaName']].drop_duplicates()
    
    for row in country_catalog.itertuples():

        geoAreaCode = row.geoAreaCode
        geoAreaName = camelCase(row.geoAreaName, startLowercase=False)
        
        
        country_table = bigtable.loc[bigtable['geoAreaCode'] == geoAreaCode]
        
        country_table = country_table.dropna(how='all', axis=1)
        
        country_table.to_excel(target_dir + 'Country_' +geoAreaName+'_'+ str(geoAreaCode) + '_' + domain + '.xlsx', index=False)
      

## Merge all the data for each goal

In [25]:
for g in series_metadata:
    counter = 0
    #if g['code']!= '1':
    #    continue
    for t in g['targets']:
        #if t['code']!= '1.1':
        #    continue
        for i in t['indicators']:
            #if i['reference']!= '1.1.1':
            #    continue
            if 'series' in i.keys():
                for s in i['series']:
                    filename = 'csv_Indicator_' + i['reference'] + '_Series_' + s['code']+'.csv'
                    
                    if os.path.isfile(data_dir + filename):
                        small_table = pd.read_csv(data_dir + filename, sep='\t')
                        #drop empty rows:
                        small_table = small_table.dropna(subset=['value_latest_year'])
                    
                        if counter == 0:
                            big_table = copy.deepcopy(small_table)  
                        else:
                            big_table = pd.concat([big_table, small_table], ignore_index=True, sort=False)
                        counter = counter + 1
                        
                        #display('--'+str(counter) + ' - added series ' + s['code'])
                        
    big_table = rearrange(big_table)

    split_by_country(big_table, domain='Goal_' + g['code'], target_dir=data_dir)
    
    big_table.to_excel(data_dir + 'All_Countries_Goal_' + g['code']+'.xlsx', index=False)
    
    display('finished processing goal ' + g['code'])
    
    

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_1990',
 'value_1991',
 'value_1992',
 'value_1993',
 'value_1994',
 'value_1995',
 'value_1996',
 'value_1997',
 'value_1998',
 'value_1999',
 'value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018']

'finished processing goal 1'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_1991',
 'value_1994',
 'value_1995',
 'value_1997',
 'value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018',
 'value_2019']

'finished processing goal 2'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_1990',
 'value_1991',
 'value_1992',
 'value_1993',
 'value_1994',
 'value_1995',
 'value_1996',
 'value_1997',
 'value_1998',
 'value_1999',
 'value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018']

'finished processing goal 3'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018']

'finished processing goal 4'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018',
 'value_2019']

'finished processing goal 5'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018',
 'value_2019']

'finished processing goal 6'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_1990',
 'value_1991',
 'value_1992',
 'value_1993',
 'value_1994',
 'value_1995',
 'value_1996',
 'value_1997',
 'value_1998',
 'value_1999',
 'value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017']

'finished processing goal 7'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018']

'finished processing goal 8'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018']

'finished processing goal 9'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018']

'finished processing goal 10'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_1990',
 'value_1995',
 'value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018']

'finished processing goal 11'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017']

'finished processing goal 12'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018']

'finished processing goal 13'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018']

'finished processing goal 14'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_1993',
 'value_1994',
 'value_1995',
 'value_1996',
 'value_1997',
 'value_1998',
 'value_1999',
 'value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018',
 'value_2019']

  force_unicode(url))


'finished processing goal 15'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_2000',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018',
 'value_2019']

'finished processing goal 16'

['goal',
 'target',
 'indicator',
 'seriesCode',
 'seriesDesc',
 'release',
 'geoAreaCode',
 'geoAreaName',
 'parentCode',
 'parentName',
 'type',
 'ISO3',
 'UN_Member',
 'X',
 'Y']

['unitsCode',
 'unitsDesc',
 'unitmultiplierCode',
 'unitmultiplierDesc',
 'reportingTypeCode',
 'reportingTypeDesc',
 'valueDetails',
 'footnotes',
 'sources',
 'timeDetails',
 'nature']

['value_latest_year', 'latest_year']

['value_1990',
 'value_1991',
 'value_1992',
 'value_1993',
 'value_1994',
 'value_1995',
 'value_1996',
 'value_1997',
 'value_1998',
 'value_1999',
 'value_2000',
 'value_2001',
 'value_2002',
 'value_2003',
 'value_2004',
 'value_2005',
 'value_2006',
 'value_2007',
 'value_2008',
 'value_2009',
 'value_2010',
 'value_2011',
 'value_2012',
 'value_2013',
 'value_2014',
 'value_2015',
 'value_2016',
 'value_2017',
 'value_2018']

'finished processing goal 17'