This script is used to read the raw data and create `country_dimension.csv`.

In [181]:
import pandas as pd
from constants import COUNTRY_DIMENSION_CSV

In [182]:
COUNTRY_LIST = [
    "Canada", "United States", "Mexico",
    "India", "Indonesia", "Iran",
    "Egypt", "Kenya", "Nigeria"
]

In [183]:
# process the country data from WB-HNP bulk
selected_column_list = [
    "Short Name", "Country Code", "2-alpha code",
    "Region", "Currency Unit", "Income Group",
    "System of trade", "Government Accounting concept",
    "Balance of Payments Manual in use", "IMF data dissemination standard"
]

country_df = pd.read_csv("../raw_data/HNP_StatsCountry.csv")
country_df = country_df[selected_column_list]
country_df.rename(columns={"Short Name": "Country Name", "2-alpha code": "two_alpha_code"}, inplace=True)
country_df = country_df.loc[country_df["Country Name"].isin(COUNTRY_LIST)]
country_df

Unnamed: 0,Country Name,Country Code,two_alpha_code,Region,Currency Unit,Income Group,System of trade,Government Accounting concept,Balance of Payments Manual in use,IMF data dissemination standard
35,Canada,CAN,CA,North America,Canadian dollar,High income,General trade system,Consolidated central government,BPM6,Special Data Dissemination Standard Plus (SDDS...
67,Egypt,EGY,EG,Middle East & North Africa,Egyptian pound,Lower middle income,General trade system,Budgetary central government,BPM5 (Converted into BPM6 by IMF),Special Data Dissemination Standard (SDDS)
106,Indonesia,IDN,ID,East Asia & Pacific,Indonesian rupiah,Lower middle income,General trade system,Consolidated central government,BPM6,Special Data Dissemination Standard (SDDS)
109,India,IND,IN,South Asia,Indian rupee,Lower middle income,General trade system,Consolidated central government,BPM6,Special Data Dissemination Standard (SDDS)
111,Iran,IRN,IR,Middle East & North Africa,Iranian rial,Lower middle income,Special trade system,Consolidated central government,BPM5 (Converted into BPM6 by IMF),Enhanced General Data Dissemination System (e-...
120,Kenya,KEN,KE,Sub-Saharan Africa,Kenyan shilling,Lower middle income,General trade system,Consolidated central government,BPM6,Enhanced General Data Dissemination System (e-...
153,Mexico,MEX,MX,Latin America & Caribbean,Mexican peso,Upper middle income,General trade system,Consolidated central government,BPM6,Special Data Dissemination Standard (SDDS)
173,Nigeria,NGA,NG,Sub-Saharan Africa,Nigerian naira,Lower middle income,General trade system,Budgetary central government,BPM6,Enhanced General Data Dissemination System (e-...
250,United States,USA,US,North America,U.S. dollar,High income,General trade system,Consolidated central government,BPM6,Special Data Dissemination Standard Plus (SDDS...


In [184]:
# get the country capital and continent data
selected_column_list = ["CountryName", "CapitalName", "ContinentName"]

more_country_df = pd.read_csv("../raw_data/country-capitals.csv")
more_country_df = more_country_df[selected_column_list]
more_country_df.rename(columns={"CountryName": "Country Name", "CapitalName": "Capital", "ContinentName": "Continent"}, inplace=True)
more_country_df = more_country_df.loc[more_country_df["Country Name"].isin(COUNTRY_LIST)]
more_country_df



Unnamed: 0,Country Name,Capital,Continent
45,Canada,Ottawa,Central America
71,Egypt,Cairo,Africa
103,India,New Delhi,Asia
104,Indonesia,Jakarta,Asia
105,Iran,Tehran,Asia
116,Kenya,Nairobi,Africa
142,Mexico,Mexico City,Central America
158,Nigeria,Abuja,Africa
227,United States,Washington,North America


In [185]:
# merge two dataframes
country_df = country_df.merge(more_country_df)
country_df.rename(columns={
    "Country Name": "country_name",
    "Country Code": "country_code",
    "Region": "region",
    "Currency Unit": "currency",
    "Income Group": "income_group",
    "System of trade": "system_of_trade",
    "Government Accounting concept": "government_accounting_concept",
    "Balance of Payments Manual in use": "balance_of_payments_manual_in_use",
    "IMF data dissemination standard": "IMF_data_dissemination_standard",
    "Capital": "capital",
    "Continent": "continent"
}, inplace=True)

# rename and reorder columns
country_df.rename_axis("country_key", inplace=True)
cols = ['country_name', 'country_code', 'two_alpha_code', 'capital', 'region', 'continent', 'currency', 'income_group', 'system_of_trade', 'government_accounting_concept', 'balance_of_payments_manual_in_use', 'IMF_data_dissemination_standard']
country_df = country_df[cols]
country_df

Unnamed: 0_level_0,country_name,country_code,two_alpha_code,capital,region,continent,currency,income_group,system_of_trade,government_accounting_concept,balance_of_payments_manual_in_use,IMF_data_dissemination_standard
country_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,Canada,CAN,CA,Ottawa,North America,Central America,Canadian dollar,High income,General trade system,Consolidated central government,BPM6,Special Data Dissemination Standard Plus (SDDS...
1,Egypt,EGY,EG,Cairo,Middle East & North Africa,Africa,Egyptian pound,Lower middle income,General trade system,Budgetary central government,BPM5 (Converted into BPM6 by IMF),Special Data Dissemination Standard (SDDS)
2,Indonesia,IDN,ID,Jakarta,East Asia & Pacific,Asia,Indonesian rupiah,Lower middle income,General trade system,Consolidated central government,BPM6,Special Data Dissemination Standard (SDDS)
3,India,IND,IN,New Delhi,South Asia,Asia,Indian rupee,Lower middle income,General trade system,Consolidated central government,BPM6,Special Data Dissemination Standard (SDDS)
4,Iran,IRN,IR,Tehran,Middle East & North Africa,Asia,Iranian rial,Lower middle income,Special trade system,Consolidated central government,BPM5 (Converted into BPM6 by IMF),Enhanced General Data Dissemination System (e-...
5,Kenya,KEN,KE,Nairobi,Sub-Saharan Africa,Africa,Kenyan shilling,Lower middle income,General trade system,Consolidated central government,BPM6,Enhanced General Data Dissemination System (e-...
6,Mexico,MEX,MX,Mexico City,Latin America & Caribbean,Central America,Mexican peso,Upper middle income,General trade system,Consolidated central government,BPM6,Special Data Dissemination Standard (SDDS)
7,Nigeria,NGA,NG,Abuja,Sub-Saharan Africa,Africa,Nigerian naira,Lower middle income,General trade system,Budgetary central government,BPM6,Enhanced General Data Dissemination System (e-...
8,United States,USA,US,Washington,North America,North America,U.S. dollar,High income,General trade system,Consolidated central government,BPM6,Special Data Dissemination Standard Plus (SDDS...


In [186]:
country_df.to_csv(COUNTRY_DIMENSION_CSV)