## This notebook extracts data from regions.yaml and writes it to a csv file
make sure regions.yaml is in the current directory

imports

In [None]:
import pandas as pd
import numpy as np
import yaml

define function for data extraction from dictionary tree

In [None]:
def extract_data(region_dict, names=[], kind=[], 
                 population=[], gleam_id=[], 
                 iana=[], iso_alpha_3=[],
                 country_code=[],country_track=[],
                 lat=[], lon=[]):
    '''
    recurse through nested dictionaries in tree
    to extract data from dictionary 
    '''
    if 'gleam_id' in region_dict.keys():
        gleam_id.append(region_dict['gleam_id'])
    else:
        gleam_id.append(None)
        
    if 'iana' in region_dict.keys():   
        iana.append(region_dict['iana'])
    else:
        iana.append(None)
        
    if 'iso_alpha_3' in region_dict.keys():
        iso_alpha_3.append(region_dict['iso_alpha_3'])
    else:
        iso_alpha_3.append(None)
        
    if 'kind' in region_dict.keys():
        kind.append(region_dict['kind'])
        if region_dict['kind']=='country':
            country_track = region_dict['names'][0]
            country_code.append(region_dict['names'][0])
        else:
            country_code.append(country_track)
            
    if 'names' in region_dict.keys():
        names.append(region_dict['names'][0])
        
    if 'population' in region_dict.keys():
        population.append(region_dict['population'])
    else:
        population.append(None)
        
    if 'lat' in region_dict.keys():
        lat.append(region_dict['lat'])
    else:
        lat.append(None)
        
    if 'lon' in region_dict.keys():
        lon.append(region_dict['lon'])
    else:
        lon.append(None)
        
    if 'subregions' in region_dict.keys():
            for i in range(0,len(region_dict['subregions'])):
                extract_data(region_dict['subregions'][i], names=names, kind=kind, 
                             population=population, gleam_id=gleam_id, 
                             iana=iana, iso_alpha_3=iso_alpha_3,
                             country_code=country_code, country_track=country_track,
                             lat=lat, lon=lon)
    return(names, kind, population, gleam_id, iana, iso_alpha_3, country_code, lat, lon)

open file and extract data

In [None]:
# open regions.yaml
with open('regions.yaml') as file:
    regions = yaml.safe_load(file)

# extract the data from the dictionary tree
(names, 
kind, 
population, 
gleam_id, 
iana, 
iso_alpha_3, 
country_code, 
lat, 
lon) = extract_data(regions)

write to csv file

In [None]:
# create a blank list for formatting table
blank = ['']*len(names)

# write to dataframe first 
df = pd.DataFrame(data={'Code': iana,
                        'Level': kind, 
                        'Name': names,
                        'OfficialName': blank,
                        'OtherNames': blank,
                        'CountryCode': blank,
                        'CountryCodeISOa3':iso_alpha_3,
                        'M49Code': blank,
                        'Lat':lat, 
                        'Lon':lon,
                        'Population':population,
                        'GleamID':gleam_id})
df = df.sort_values(by=['Level','Name'])
print(df)

# write to csv
df.to_csv('regions_new.csv', index=False)