In [139]:
import requests
import xmltodict
import pandas as pd
import logging
import datetime

# http://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/ALL
# Extract OECD KeyFamily id (dataset id) and English description

# where to save
logfile = '/Users/onurbaser/Documents/FinTech/FT_Project_1/oecd_keyfamilies.log'
datafile = '/Users/onurbaser/Documents/FinTech/FT_Project_1/OECD_key_names.csv'

# logging
logging.basicConfig(filename=logfile, filemode='w', level=logging.DEBUG)
logging.debug("Log started at %s", str(datetime.datetime.now()))

# get the data structure schema with the key families (dataset ids)
dataStructureUrl = 'http://stats.oecd.org/RESTSDMX/sdmx.ashx/GetDataStructure/ALL/'

try:
    r = requests.get(dataStructureUrl, timeout=61)
except requests.exceptions.ReadTimeout:
    print("Data request read timed out")
    logging.debug('Data read timed out')
except requests.exceptions.Timeout:
    print("Data request timed out")
    logging.debug('Data request timed out')
except requests.exceptions.HTTPError:
    print("HTTP error")
    logging.debug('HTTP error')
except requests.exceptions.ConnectionError:
    print("Connection error")
    logging.debug('Connection error')
else:
    if r.status_code == 200:
        keyFamIdList = []
        keyFamNameList = []

        # use xmltodict and traverse nested ordered dictionaries
        keyfamilies_dict = xmltodict.parse(r.text)
        keyFamilies = keyfamilies_dict['message:Structure']['message:KeyFamilies']['KeyFamily']

        for keyFamily in keyFamilies:
            keyfam_id = keyFamily['@id']
            if (keyfam_id != 'NCM_LIVE' or keyfam_id != 'NCM_STAGING', True):
                keyFamIdList.append(keyfam_id)
                keyNames = keyFamily['Name']
                for keyName in keyNames: 
                    keyfam_lang = keyName['@xml:lang']
                    if keyfam_lang == 'en':
                        keyfam_text = keyName['#text']
                        keyFamNameList.append(keyfam_text)
            #print(keyfam_id, keyfam_text)
       
        keyFamDF = pd.DataFrame({'KeyFamilyId': keyFamIdList, 'KeyFamilyName': keyFamNameList})
        keyFamDF.set_index('KeyFamilyId', inplace=True)
        keyFamDF.to_csv(datafile)  

    else:
        print('HTTP Failed with code', r.status_code)
        logging.debug('HTTP Failed with code %d', r.status_code)
        
     
        # create a 2D list(needed?), and a data frame. Save data frame to csv file
        # keyFamTable = [keyFamIdList, keyFamNameList]
       

print("completed ...")
logging.debug("Log ended at %s", str(datetime.datetime.now()))

TypeError: string indices must be integers

In [140]:
keyfam_id

'NCM_LIVE'

In [94]:
keyfam_lang

'fr'

In [62]:
keyFamily['Name']

OrderedDict([('@xml:lang', 'en'), ('#text', 'NCM Staging cube')])

In [77]:
keyFamilies

[OrderedDict([('@id', 'QNA'),
              ('@agencyID', 'OECD'),
              ('Name',
               [OrderedDict([('@xml:lang', 'en'),
                             ('#text', 'Quarterly National Accounts')]),
                OrderedDict([('@xml:lang', 'fr'),
                             ('#text', 'Comptes nationaux trimestriels')])]),
              ('Annotations',
               OrderedDict([('common:Annotation',
                             [OrderedDict([('common:AnnotationTitle',
                                            'Copyright OECD - All rights reserved')]),
                              OrderedDict([('common:AnnotationTitle',
                                            'Terms and Conditions'),
                                           ('common:AnnotationURL',
                                            'http://www.oecd.org/termsandconditions/')]),
                              OrderedDict([('common:AnnotationTitle',
                                            'Privacy Po