### CRU: Country names ISO standardization

In [1]:
import os
import pandas as pd

#### Set input and output folder

In [2]:
data_in  = '../data/CRU_1_time_series'
data_out = '../data/CRU_2a_iso_standardization'

if not os.path.exists(data_out):
    os.mkdir(data_out)

periods = ['annually', 'monthly', 'quarterly']

for period in periods:
    if not os.path.exists(f'{data_out}/{period}'):
        os.mkdir(f'{data_out}/{period}')

#### Read table of country names and ISO codes

In [3]:
iso_df = pd.read_csv('../data/ISO_country_codes.csv',
                     index_col='CRU Country Name')
iso_df = iso_df.drop('HDR Country Name', axis=1)
iso_df.head(10)

Unnamed: 0_level_0,alpha-2,alpha-3,ISO Country Name
CRU Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,AF,AFG,Afghanistan
Albania,AL,ALB,Albania
Algeria,DZ,DZA,Algeria
Andorra,AD,AND,Andorra
Angola,AO,AGO,Angola
Antigua_and_Barbuda,AG,ATG,Antigua and Barbuda
Argentina,AR,ARG,Argentina
Armenia,AM,ARM,Armenia
Australia,AU,AUS,Australia
Austria,AT,AUT,Austria


#### Load snippets

In [4]:
# %load '../snippets/df_iso.py'
def df_iso(df_in, iso_df, alpha2=True, remove_unmatched=True):
    df = df_in.join(iso_df)
    if alpha2:
        df = df.set_index('alpha-2')
        df = df.drop(['alpha-3'], axis=1)
    else:
        df = df.set_index('alpha-3')
        df = df.drop(['alpha-2'], axis=1)
    while remove_unmatched and float('nan') in list(df.index):
        df = df.drop([float('nan')], axis=0)
    df = df.drop(['ISO Country Name'], axis=1)
    df.index.name = 'Country'
    return df

#### Read data, change country names to ISO codes; Output data to csv format

In [5]:
for period in periods:
    all_attrs = dict()

    for attr_file in os.listdir(f'{data_in}/{period}'):
        attr_path = f'{data_in}/{period}/{attr_file}'
        attr = attr_file.split('.')[0]
        all_attrs[attr] = pd.read_csv(f'{data_in}/{period}/{attr_file}',
                                      index_col='Time', na_values=-999)
        all_attrs[attr] = all_attrs[attr].drop('all', axis=1)
        all_attrs[attr] = all_attrs[attr][iso_df.index]
        all_attrs[attr].index = list(all_attrs[attr].index)
        all_attrs[attr] = all_attrs[attr].transpose()
        all_attrs[attr] = df_iso(all_attrs[attr], iso_df, alpha2=False)
    
    # output data to csv format
    for attr in all_attrs.keys():
       all_attrs[attr].to_csv(f'{data_out}/{period}/{attr}.csv', index_label='Country')