# Countries

Create a new master country data from lib_dataset.xlsx sheetname: PPP private conversion factor

In [1]:
import pandas as pd

In [2]:
df = pd.read_excel("./countries_LI.xlsx")

In [3]:
df = df.dropna()

In [4]:
countries = df[["County_LI"]]

In [5]:
countries["id"] = countries.reset_index().index + 1

In [6]:
countries = countries.rename(columns={"County_LI": "country"})

In [7]:
countries = countries[["id", "country"]]

In [8]:
countries.tail()

Unnamed: 0,id,country
198,199,Vietnam
199,200,West Bank and Gaza
200,201,"Yemen, Rep."
201,202,Zambia
202,203,Zimbabwe


## Map to Master V1 countries.csv to get the currency

In [9]:
prev_countries = pd.read_csv("../master_v1/countries.csv")

In [10]:
prev_countries.columns

Index(['id', 'country', 'country_code', 'currency', 'abbreviation'], dtype='object')

In [11]:
def remap_prev_countries(val, column_name):
    find_curr = prev_countries[prev_countries["country"].str.lower() == str(val).lower()]
    if find_curr.empty:
        return None
    return find_curr[column_name].to_list()[0]

In [12]:
def get_currency(val):
    return remap_prev_countries(val, "currency")

In [13]:
def get_abbreviation(val):
    return remap_prev_countries(val, "abbreviation")

In [14]:
countries["currency"] = countries["country"].apply(get_currency)

In [15]:
countries["abbreviation"] = countries["country"].apply(get_abbreviation)

In [16]:
countries.head()

Unnamed: 0,id,country,currency,abbreviation
0,1,Afghanistan,Afghani,AFN
1,2,Albania,Lek,ALL
2,3,Algeria,Algerian Dinar,DZD
3,4,American Samoa,Euros,EUR
4,5,Angola,Angolan kwanza,AOA


## Map to updated_country_currencies.csv to get the currency

In [17]:
updated_curr = pd.read_csv("./updated_country_currencies_with_names.csv")

In [18]:
updated_curr.columns

Index(['LI', 'Currency', 'Score', 'ISO Currency Code', 'Currency Long Name'], dtype='object')

In [19]:
def remap_updated_curr(val, column_name):
    # Manual
    if val == "British Virgin Islands":
        return "USD"
    if val == "Cote d'Ivoire":
        if "ISO" in column_name:
            return "XOF"
        return "CFA Franc BCEAO"
    if val == "Eswatini":
        if "ISO" in column_name:
            return "SZL"
        return "Lilangeni"
    if val == "Korea, Rep.":
        if "ISO" in column_name:
            return "KRW"
        return "Won"
    if val == "Sint Maarten (Dutch part)":
        if "ISO" in column_name:
            return "ANG"
        return "Netherlands Antillean guilder"
    if val == "Turkiye":
        if "ISO" in column_name:
            return "TRY"
        return "Lira"
    #
    find_curr = updated_curr[updated_curr["LI"].str.lower() == str(val).lower()]
    if find_curr.empty:
        prev = countries[countries["country"].str.lower() == str(val).lower()]
        prev_column = "currency"
        if "ISO" in column_name:
            prev_column = "abbreviation"
        return prev[prev_column].to_list()[0]
    return find_curr[column_name].to_list()[0]

In [20]:
def get_updated_currency(val):
    return remap_updated_curr(val, "Currency Long Name")

In [21]:
def get_updated_abbreviation(val):
    return remap_updated_curr(val, "ISO Currency Code")

In [22]:
countries["currency"] = countries["country"].apply(get_updated_currency)

In [23]:
countries["abbreviation"] = countries["country"].apply(get_updated_abbreviation)

In [24]:
countries.reset_index(drop=True, inplace=True)

In [25]:
countries.to_csv("./countries.csv", index=False)