# Pre-processing script
This script pulls SDG data from API and transforms it into csv files.
The steps are:
- Pull data from [API](https://unstats.un.org/SDGAPI/swagger/) 
- Join with geography
- save as "long" table
- pivot into "wide" format and split regional and country data 

### Load necessary libraries

In [103]:
import csv
import json
import urllib3  # allows to access a URL with python
import math
import os
import io
import collections
import numpy as np
import pandas as pd
import xlsxwriter


### User parameters

In [2]:
release = '2019.Q1.G.02' # Make sure to have the correct release here

dir_path = os.path.dirname(os.path.realpath('__file__'))
print(dir_path)

wd_dir = r'../'
print('data inputs dir: ' + wd_dir)

/home/jovyan/FIS4SDGs/unsd/notebooks
data inputs dir: ../


## Utilities

#### Convert string to camelCase

In [3]:
def camelCase(st):
    """
    https://stackoverflow.com/questions/8347048/camelcase-every-string-any-standard-library
    
    """
    output = ''.join(x for x in st.title() if x.isalnum())
    return output[0].lower() + output[1:]

#### Disable insecure request warnings when using `urllib3`.

In [4]:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

#### Create array to catch errors

In [5]:
error_log = []

### List of countries to be plotted on a map (with XY coordinates)

In [91]:
def countryListXY(file):
    
    countryListXY = []
    
    with open(file, newline = '', encoding='latin-1') as countryList:                                                                                          
        countryList = csv.DictReader(countryList, delimiter='\t')
        for row in countryList:
            countryListXY.append(dict(row))
            
    countryListXY = pd.DataFrame(countryListXY).astype({'geoAreaCode':'str'})
    
    return(countryListXY)

    #print(countryListXY[1])
    #for c in countryListXY:

In [92]:
countryListXY(wd_dir + 'CountryListXY.txt').head(10)

Unnamed: 0,CountryProfile,ISO3CD,UNMember,X,Y,geoAreaCode,geoAreaName
0,1,AFG,1,66.02688198,33.83160199,4,Afghanistan
1,1,ALB,1,20.06660928,41.13897007,8,Albania
2,1,ATA,0,21.47585697,-80.40897662,10,Antarctica
3,1,DZA,1,2.678164227,28.15940032,12,Algeria
4,1,ASM,0,-170.7187269,-14.30587306,16,American Samoa
5,1,AND,1,1.576257417,42.54548611,20,Andorra
6,1,AGO,1,17.57817062,-12.33724746,24,Angola
7,1,ATG,1,-61.7999755,17.07761471,28,Antigua and Barbuda
8,1,AZE,1,50.01064725,40.39229544,31,Azerbaijan
9,1,ARG,1,-65.14563274,-35.19446255,32,Argentina


#### Call the endpoint of the SDG API that provides the list of hierarchical groupings of geographic Areas:

In [86]:
def geoAreaTree():
    http = urllib3.PoolManager()
    response = http.request('GET', "https://unstats.un.org/SDGAPI/v1/sdg/GeoArea/Tree")
    responseData = json.loads(response.data.decode('UTF-8'))
    
    return responseData

In [87]:
print(len(geoAreaTree()))
for t in geoAreaTree():
    print('root='+t['geoAreaName'])

7
root=World
root=Least Developed Countries (LDC)
root=Land Locked Developing Countries (LLDC)
root=Small Island Developing States (SIDS)
root=Developed Regions
root=Developing Regions
root=Custom groupings of data providers


#### Traverse a hierarchical tree of geographic areas and convert it to a parent-child hierarchy table:

In [112]:
def traverse(tree, parentCode=None, parentName=None):
    
    # print(' ' * traverse.level + 'Level: ' + str(traverse.level) + ', ' + tree['type'] + ', ' + str(tree['geoAreaCode']) + '-' + tree['geoAreaName'] )
    
    d = {}
    
    d['level'] = traverse.level
    d['type'] = tree['type']
    d['parentCode'] = parentCode
    d['parentName'] = parentName
    d['geoAreaCode'] = str(tree['geoAreaCode'])
    d['geoAreaName'] = tree['geoAreaName']
    
    hierarchy.append(d)
        
    if tree['children']:
        for child in tree['children']:
            traverse.level += 1
            traverse(child, str(tree['geoAreaCode']), tree['geoAreaName'])
            traverse.level -= 1
            
    

    

            
def flatten(tree):
    global hierarchy
    hierarchy = []
    traverse.level = 1
    traverse(tree)
   
    #----Temporary Fix for missing countries----
    
    d_DRC = {'level' : 5,
             'type' : 'Country',
             'parentCode' : '17',
             'parentName' : 'Middle Africa',
             'geoAreaCode' : '180',
             'geoAreaName' : 'Democratic Republic of the Congo'
            }
    
    
    d_Tanzania = {'level' : 5,
                  'type' : 'Country',
                  'parentCode' : '14',
                  'parentName' : 'Eastern Africa',
                  'geoAreaCode' : '834',
                  'geoAreaName' : 'Tanzania, United Republic of'
                 }
    
    hierarchy.append(d_DRC)
    hierarchy.append(d_Tanzania)

    
    return pd.DataFrame(hierarchy)

In [113]:
flatten(geoAreaTree()[0]).head(12)

Unnamed: 0,geoAreaCode,geoAreaName,level,parentCode,parentName,type
0,1,World,1,,,Region
1,10,Antarctica,2,1.0,World,Country
2,2,Africa,2,1.0,World,Region
3,15,Northern Africa,3,2.0,Africa,Region
4,12,Algeria,4,15.0,Northern Africa,Country
5,818,Egypt,4,15.0,Northern Africa,Country
6,434,Libya,4,15.0,Northern Africa,Country
7,504,Morocco,4,15.0,Northern Africa,Country
8,729,Sudan,4,15.0,Northern Africa,Country
9,788,Tunisia,4,15.0,Northern Africa,Country


#### Merge coordinates and list of geographic areas in SDG database

In [114]:
x = pd.merge(flatten(geoAreaTree()[0]),
         countryListXY(wd_dir + 'CountryListXY.txt'),
         how='outer',
         on=['geoAreaCode', 'geoAreaName'])

x.to_excel('test.xlsx', engine ='xlsxwriter')

### Get the list of goals, targets, indicators and series

In [8]:
def series_list(release):
    
    series_list = []

    # Call the endpoint of the SDG API that provides the list of goals with all their children:
    http = urllib3.PoolManager()
    response = http.request('GET', "https://unstats.un.org/SDGAPI/v1/sdg/Goal/List?includechildren=true")
    responseData = json.loads(response.data.decode('UTF-8'))
    
    # Define the list of 'keys' to be extracted into a flat dictionary from the response:
    keys = ["goalCode", 
            "goalDesc",
            "targetCode",
            "targetDesc",
            "indicatorCode",
            "indicatorDesc",
            "indicatorTier",
            "seriesCode",
            "seriesDesc",
            "seriesRelease"
           ]    
    
    # Iterate over goals, targets, indicators, and series for the specified release:
    for g in responseData:
        for t in g['targets']:
            for i in t['indicators']:
                for s in i['series']:
                    if s['release'] == release:
                        values = [g['code'], g['title'],
                                  t['code'], t['description'], 
                                  i['code'], i['description'], i['tier'], 
                                  s['code'], s['description'], s['release']]

                        keys_and_values = zip(keys, values)
                        serie_dic = {}
                        for key, value in keys_and_values:
                            serie_dic[key] = value
                        series_list.append(serie_dic)
                        
    series_list = pd.DataFrame(series_list)
    
    return series_list[['goalCode', 'goalDesc',
                       'targetCode', 'targetDesc',
                       'indicatorCode', 'indicatorDesc','indicatorTier', 
                       'seriesCode', 'seriesDesc', 'seriesRelease']]

In [9]:
series_list(release).head(4)

Unnamed: 0,goalCode,goalDesc,targetCode,targetDesc,indicatorCode,indicatorDesc,indicatorTier,seriesCode,seriesDesc,seriesRelease
0,1,End poverty in all its forms everywhere,1.1,"By 2030, eradicate extreme poverty for all peo...",1.1.1,Proportion of population below the internation...,1,SI_POV_DAY1,Proportion of population below international p...,2019.Q1.G.02
1,1,End poverty in all its forms everywhere,1.1,"By 2030, eradicate extreme poverty for all peo...",1.1.1,Proportion of population below the internation...,1,SI_POV_EMP1,Employed population below international povert...,2019.Q1.G.02
2,1,End poverty in all its forms everywhere,1.2,"By 2030, reduce at least by half the proportio...",1.2.1,Proportion of population living below the nati...,1,SI_POV_NAHC,Proportion of population living below the nati...,2019.Q1.G.02
3,1,End poverty in all its forms everywhere,1.3,Implement nationally appropriate social protec...,1.3.1,Proportion of population covered by social pro...,1,SI_COV_MATNL,[ILO] Proportion of mothers with newborns rece...,2019.Q1.G.02


### Get the data for each series

#### Verify how many pages need to be requested to get all the data for a specific series from the SDG API. 

In [10]:
def series_request_details(seriesCode,release):
    
    seriesRequest = 'https://unstats.un.org/SDGAPI/v1/sdg/Series/Data?seriesCode=' + seriesCode + '&releaseCode=' + release + "&pageSize=2"
    
    http = urllib3.PoolManager()
    response = http.request('GET', seriesRequest)
    responseData = json.loads(response.data.decode('UTF-8'))
    
    pageSize = 500
    nPages = math.floor(responseData['totalElements'] / pageSize) + 1
    totalElements = responseData['totalElements']
    
    return {'series' : seriesCode,
            'totalElements' : totalElements,
            'nPages' : nPages, 
            'pageSize' : pageSize
           }


In [12]:
series_request_details('SI_POV_DAY1', '2019.Q1.G.02')

{'series': 'SI_POV_DAY1', 'totalElements': 1393, 'nPages': 3, 'pageSize': 500}

#### Explore the code lists of the attributes and dimensions of a series
Describe each attribute or dimension as a simple dictionary made of a set of `code`-`description` pairs.  For the code, use the SDMX code, and not the internal codeof the database.  Keep all labels in camelCase.

In [15]:
def series_code_lists(seriesCode, release):
    
    seriesRequest = 'https://unstats.un.org/SDGAPI/v1/sdg/Series/Data?seriesCode=' + seriesCode + '&releaseCode=' + release + "&pageSize=2"
    
    http = urllib3.PoolManager()
    response = http.request('GET', seriesRequest)
    responseData = json.loads(response.data.decode('UTF-8'))
    
    series_attributes = responseData['attributes']
    series_dimensions = responseData['dimensions']
    
    new_dict = {}
    
    new_dict['seriesCode'] = seriesCode
    
    for a in series_attributes:
        codelist_dict = {}
        for c in a['codes']:
            codelist_dict[c['sdmx']] = c['description']
        new_dict[camelCase(a['id'])] = codelist_dict
    
    for d in series_dimensions:
        codelist_dict = {}
        for c in d['codes']:
            codelist_dict[c['sdmx']] = c['description']
        new_dict[camelCase(d['id'])] = codelist_dict
        
    return new_dict


In [17]:
series_code_lists('SI_POV_DAY1', '2019.Q1.G.02')

{'seriesCode': 'SI_POV_DAY1',
 'nature': {'G': 'Global monitoring data'},
 'units': {'PERCENT': 'Percentage'},
 'reportingType': {'N': 'National', 'G': 'Global'}}

#### Simplify further by presenting all the codes and their descriptions in a single table:

In [18]:
def series_code_lists2(seriesCode, release):
    
    seriesRequest = 'https://unstats.un.org/SDGAPI/v1/sdg/Series/Data?seriesCode=' + seriesCode + '&releaseCode=' + release + "&pageSize=2"
    
    http = urllib3.PoolManager()
    response = http.request('GET', seriesRequest)
    responseData = json.loads(response.data.decode('UTF-8'))
    
    series_attributes = responseData['attributes']
    series_dimensions = responseData['dimensions']
    
    code_list = []
    
    for a in series_attributes:
       
        for c in a['codes']:
            new_dict = {}
            new_dict['series'] = seriesCode
            new_dict['role'] = 'attribute'
            new_dict['concept'] = camelCase(a['id'])
            new_dict['code'] = c['sdmx']
            new_dict['description'] = c['description']
            code_list.append(new_dict)
        
    for d in series_dimensions:
        for c in d['codes']:
            new_dict = {}
            new_dict['series'] = seriesCode
            new_dict['role'] = 'dimension'
            new_dict['concept'] = camelCase(d['id'])
            new_dict['code'] = c['sdmx']
            new_dict['description'] = c['description']
            code_list.append(new_dict)
        
    return pd.DataFrame(code_list)


In [19]:
series_code_lists2('SI_POV_DAY1', '2019.Q1.G.02')

Unnamed: 0,code,concept,description,role,series
0,G,nature,Global monitoring data,attribute,SI_POV_DAY1
1,PERCENT,units,Percentage,attribute,SI_POV_DAY1
2,N,reportingType,National,dimension,SI_POV_DAY1
3,G,reportingType,Global,dimension,SI_POV_DAY1


#### Build query string to collect data for a specific series from the global SDG API

In [22]:
def series_query(seriesCode, release, page, pageSize):
    queryString =  r'https://unstats.un.org/SDGAPI/v1/sdg/Series/Data?seriesCode=' + seriesCode + '&releaseCode=' + release + '&page=' + str(page) + '&pageSize=' + str(pageSize)
    http = urllib3.PoolManager()
    response = http.request('GET', queryString)
    responseData =  json.loads(response.data.decode('UTF-8'))
    return(responseData)

#### Get data for a specific series from the API
*(!) Notice that a data point may appear more than once if it belongs to a "multi-purpose indicator"*

In [25]:
def series_data(seriesCode, release):
    x = series_request_details(seriesCode,release)
    series_data = []
    if x['totalElements'] > 0:
        for p in range(x['nPages']):
            print("---Series " + seriesCode + ": Processing page " + str(p+1) + " of " + str(x['nPages']))
            responseData =  series_query(seriesCode, release, p+1, x['pageSize'])
            if len(responseData['data'])>0:
                series_data = series_data + responseData['data'] 
    return series_data

In [26]:
series_data('SI_POV_DAY1','2019.Q1.G.02')[0]

---Series SI_POV_DAY1: Processing page 1 of 3
---Series SI_POV_DAY1: Processing page 2 of 3
---Series SI_POV_DAY1: Processing page 3 of 3


{'goal': ['1'],
 'target': ['1.1'],
 'indicator': ['1.1.1'],
 'series': 'SI_POV_DAY1',
 'seriesDescription': 'Proportion of population below international poverty line (%)',
 'seriesCount': '1393',
 'geoAreaCode': '8',
 'geoAreaName': 'Albania',
 'timePeriodStart': 1996.0,
 'value': '1.1',
 'valueType': 'Float',
 'time_detail': None,
 'source': 'World Bank, Development Research Group. Data are based on primary household survey data obtained from government statistical agencies and World Bank country departments. Data for high-income economies are from the Luxembourg Income Study database. For more information and methodology, please see PovcalNet (http://iresearch.worldbank.org/PovcalNet/index.htm). Accessed 01 November 2018, World Development Indicators Database.',
 'footnotes': [''],
 'attributes': {'Nature': 'G', 'Units': 'PERCENT'},
 'dimensions': {'Reporting Type': 'G'}}

#### Flatten the dictionary, extracting individual attributes and dimensions as key-value pairs in their own right.
Also convert the years (`timePeriod`) variable to `int`

In [27]:
def flat_series_data(seriesCode,release):
    new_x = []
    for d in series_data(seriesCode,release):
        new_d = {}
        for key, value in d.items():
            if type(value) is list:
                new_d[key] = ', '.join(value)
            elif type(value) is dict:
                for k, v in value.items():
                    new_d[camelCase(k+' Code')] = v
            elif key == 'time_detail':
                new_d[camelCase(key)] = value
            elif key == 'timePeriodStart':
                new_d['timePeriod'] = int(value)
            elif key == 'series':
                new_d['seriesCode'] = value
            elif key == 'seriesDescription':
                new_d['seriesDesc'] = value
            else:
                new_d[key] = value
        new_x.append(new_d)
    return new_x

In [28]:
flat_series_data('SI_POV_DAY1','2019.Q1.G.02')[0]

---Series SI_POV_DAY1: Processing page 1 of 3
---Series SI_POV_DAY1: Processing page 2 of 3
---Series SI_POV_DAY1: Processing page 3 of 3


{'goal': '1',
 'target': '1.1',
 'indicator': '1.1.1',
 'seriesCode': 'SI_POV_DAY1',
 'seriesDesc': 'Proportion of population below international poverty line (%)',
 'seriesCount': '1393',
 'geoAreaCode': '8',
 'geoAreaName': 'Albania',
 'timePeriod': 1996,
 'value': '1.1',
 'valueType': 'Float',
 'timeDetail': None,
 'source': 'World Bank, Development Research Group. Data are based on primary household survey data obtained from government statistical agencies and World Bank country departments. Data for high-income economies are from the Luxembourg Income Study database. For more information and methodology, please see PovcalNet (http://iresearch.worldbank.org/PovcalNet/index.htm). Accessed 01 November 2018, World Development Indicators Database.',
 'footnotes': '',
 'natureCode': 'G',
 'unitsCode': 'PERCENT',
 'reportingTypeCode': 'G'}

#### Convert flattened dictionary of data series to pandas data frame, remove goal/target/indicator columns, and remove duplicate values

In [29]:
def dataframe_unique_series_data(seriesCode,release):
    x = pd.DataFrame(flat_series_data(seriesCode,release))
    x.drop(['goal','target','indicator', 'seriesCount'], axis=1, inplace=True)
    x.drop_duplicates(inplace=True)

    return x

In [30]:
x = dataframe_unique_series_data('SI_POV_DAY1','2019.Q1.G.02')
x.head(5)


---Series SI_POV_DAY1: Processing page 1 of 3
---Series SI_POV_DAY1: Processing page 2 of 3
---Series SI_POV_DAY1: Processing page 3 of 3


Unnamed: 0,footnotes,geoAreaCode,geoAreaName,natureCode,reportingTypeCode,seriesCode,seriesDesc,source,timeDetail,timePeriod,unitsCode,value,valueType
0,,8,Albania,G,G,SI_POV_DAY1,Proportion of population below international p...,"World Bank, Development Research Group. Data a...",,1996,PERCENT,1.1,Float
1,,8,Albania,G,G,SI_POV_DAY1,Proportion of population below international p...,"World Bank, Development Research Group. Data a...",,2002,PERCENT,2.0,Float
2,,8,Albania,G,G,SI_POV_DAY1,Proportion of population below international p...,"World Bank, Development Research Group. Data a...",,2005,PERCENT,1.1,Float
3,,8,Albania,G,G,SI_POV_DAY1,Proportion of population below international p...,"World Bank, Development Research Group. Data a...",,2008,PERCENT,0.4,Float
4,,8,Albania,G,G,SI_POV_DAY1,Proportion of population below international p...,"World Bank, Development Research Group. Data a...",,2012,PERCENT,1.1,Float


#### Add descriptions to coded dimension and attributes

In [45]:
def annotated_data(seriesCode, release):
    
    codes = series_code_lists2(seriesCode, release)
    data = dataframe_unique_series_data(seriesCode, release)
    
    concepts = set(codes.concept)
    
    for c in concepts:
        code_list = codes.loc[codes['concept']==c][['code','description']]
        code_list.rename(columns={'code': c+'Code', 'description': c+'Desc'}, inplace=True)
        data = pd.merge(data,
                     code_list,
                     how='left',
                     on=[c+'Code'])
    
    data.drop(['seriesDesc'], axis=1, inplace=True)
         
    data = pd.merge(series_list(release),
                    data,
                    how = 'right',
                    on='seriesCode'
                   )
    
    c_series = ['goalCode', 'goalDesc', 
                'targetCode', 'targetDesc', 
                'indicatorCode', 'indicatorDesc', 'indicatorTier', 
                'seriesCode', 'seriesDesc', 'seriesRelease']
    
    c_geo  = ['geoAreaCode', 'geoAreaName']
    
    c_time = ['timePeriod', 'timeDetail']
    
    c_values = ['value', 'valueType', 'unitsCode', 'unitsDesc']
    
    c_fn = ['natureCode', 'natureDesc', 'footnotes', 'source']
    
    fixed_columns = c_series + c_geo + c_time + c_values + c_fn
    
    c_dim = [c for c in data.columns if c not in fixed_columns]
    
    
    return data[c_series + c_geo + c_dim + c_time + c_values + c_fn]
    

In [46]:
x = described_data('SI_POV_DAY1','2019.Q1.G.02')
print(x.columns)
print(x.shape)
x.head(4)

---Series SI_POV_DAY1: Processing page 1 of 3
---Series SI_POV_DAY1: Processing page 2 of 3
---Series SI_POV_DAY1: Processing page 3 of 3
Index(['goalCode', 'goalDesc', 'targetCode', 'targetDesc', 'indicatorCode',
       'indicatorDesc', 'indicatorTier', 'seriesCode', 'seriesDesc',
       'seriesRelease', 'geoAreaCode', 'geoAreaName', 'reportingTypeCode',
       'reportingTypeDesc', 'timePeriod', 'timeDetail', 'value', 'valueType',
       'unitsCode', 'unitsDesc', 'natureCode', 'natureDesc', 'footnotes',
       'source'],
      dtype='object')
(1393, 24)


Unnamed: 0,goalCode,goalDesc,targetCode,targetDesc,indicatorCode,indicatorDesc,indicatorTier,seriesCode,seriesDesc,seriesRelease,...,timePeriod,timeDetail,value,valueType,unitsCode,unitsDesc,natureCode,natureDesc,footnotes,source
0,1,End poverty in all its forms everywhere,1.1,"By 2030, eradicate extreme poverty for all peo...",1.1.1,Proportion of population below the internation...,1,SI_POV_DAY1,Proportion of population below international p...,2019.Q1.G.02,...,1996,,1.1,Float,PERCENT,Percentage,G,Global monitoring data,,"World Bank, Development Research Group. Data a..."
1,1,End poverty in all its forms everywhere,1.1,"By 2030, eradicate extreme poverty for all peo...",1.1.1,Proportion of population below the internation...,1,SI_POV_DAY1,Proportion of population below international p...,2019.Q1.G.02,...,2002,,2.0,Float,PERCENT,Percentage,G,Global monitoring data,,"World Bank, Development Research Group. Data a..."
2,1,End poverty in all its forms everywhere,1.1,"By 2030, eradicate extreme poverty for all peo...",1.1.1,Proportion of population below the internation...,1,SI_POV_DAY1,Proportion of population below international p...,2019.Q1.G.02,...,2005,,1.1,Float,PERCENT,Percentage,G,Global monitoring data,,"World Bank, Development Research Group. Data a..."
3,1,End poverty in all its forms everywhere,1.1,"By 2030, eradicate extreme poverty for all peo...",1.1.1,Proportion of population below the internation...,1,SI_POV_DAY1,Proportion of population below international p...,2019.Q1.G.02,...,2008,,0.4,Float,PERCENT,Percentage,G,Global monitoring data,,"World Bank, Development Research Group. Data a..."


#### Add coordinates for countries

In [50]:
def georeferenced_data(seriesCode, release, coordinates_file):
    x = pd.merge(countryListXY(coordinates_file),
                 described_data('SI_POV_DAY1','2019.Q1.G.02'),
                 how = 'outer',
                 on = ['geoAreaCode', 'geoAreaName'])
    
    return x

In [51]:
georeferenced_data('SI_POV_DAY1','2019.Q1.G.02', wd_dir + 'CountryListXY.txt')

---Series SI_POV_DAY1: Processing page 1 of 3
---Series SI_POV_DAY1: Processing page 2 of 3
---Series SI_POV_DAY1: Processing page 3 of 3


Unnamed: 0,CountryProfile,ISO3CD,UNMember,X,Y,geoAreaCode,geoAreaName,goalCode,goalDesc,targetCode,...,timePeriod,timeDetail,value,valueType,unitsCode,unitsDesc,natureCode,natureDesc,footnotes,source
0,1,AFG,1,66.02688198,33.83160199,4,Afghanistan,,,,...,,,,,,,,,,
1,1,ALB,1,20.06660928,41.13897007,8,Albania,1,End poverty in all its forms everywhere,1.1,...,1996.0,,1.1,Float,PERCENT,Percentage,G,Global monitoring data,,"World Bank, Development Research Group. Data a..."
2,1,ALB,1,20.06660928,41.13897007,8,Albania,1,End poverty in all its forms everywhere,1.1,...,2002.0,,2,Float,PERCENT,Percentage,G,Global monitoring data,,"World Bank, Development Research Group. Data a..."
3,1,ALB,1,20.06660928,41.13897007,8,Albania,1,End poverty in all its forms everywhere,1.1,...,2005.0,,1.1,Float,PERCENT,Percentage,G,Global monitoring data,,"World Bank, Development Research Group. Data a..."
4,1,ALB,1,20.06660928,41.13897007,8,Albania,1,End poverty in all its forms everywhere,1.1,...,2008.0,,0.4,Float,PERCENT,Percentage,G,Global monitoring data,,"World Bank, Development Research Group. Data a..."
5,1,ALB,1,20.06660928,41.13897007,8,Albania,1,End poverty in all its forms everywhere,1.1,...,2012.0,,1.1,Float,PERCENT,Percentage,G,Global monitoring data,,"World Bank, Development Research Group. Data a..."
6,1,ATA,0,21.47585697,-80.40897662,10,Antarctica,,,,...,,,,,,,,,,
7,1,DZA,1,2.678164227,28.15940032,12,Algeria,1,End poverty in all its forms everywhere,1.1,...,1995.0,,5.9,Float,PERCENT,Percentage,G,Global monitoring data,The new base year is 1999.,"World Bank, Development Research Group. Data a..."
8,1,DZA,1,2.678164227,28.15940032,12,Algeria,1,End poverty in all its forms everywhere,1.1,...,2011.0,,0.5,Float,PERCENT,Percentage,G,Global monitoring data,The new base year is 1999.,"World Bank, Development Research Group. Data a..."
9,1,ASM,0,-170.7187269,-14.30587306,16,American Samoa,,,,...,,,,,,,,,,


In [None]:

        
        for xy in countryListXY:
            if xy['geoAreaCode'] == record['GeoArea_Code']:
                record['ISO3CD'] = xy['ISO3CD']
                record['X'] = xy['X']
                record['Y'] = xy['Y']
            continue
        
        series_dataset.append(record)
     
    try:
        with open('data\\csv\\'+ record['IndicatorCode'] + "-" + s['seriesCode']+'_long.csv', 'w', newline='') as outfile:
            fp = csv.DictWriter(outfile, series_dataset[0].keys(), quoting=csv.QUOTE_NONNUMERIC)
            fp.writeheader()
            fp.writerows(series_dataset)
            
            print('=====FINISHED WRITING SERIES ' + record['SeriesCode'] + ' TO FILE=====')
            
              
          
    
    except:
        
        error_log.append(record['SeriesCode'])
        
        print('=====SERIES ' + record['SeriesCode'] + ' COULD NOT BE WRITTEN TO FILE=====')
        
