In [25]:
# Import libraries
import numpy as np
import pandas as pd

**Data Upload**

In [26]:
# Load contraints
country_list = pd.read_excel('constraints.xlsx', sheet_name = 'country-code')
years = pd.read_excel('constraints.xlsx', sheet_name = 'time-period')

# Load datasets
gdp_data = pd.read_csv('API_NY.GDP.PCAP.PP.CD_DS2_en_csv_v2_4150885.csv')
acc_balance = pd.read_csv('API_BN.CAB.XOKA.GD.ZS_DS2_en_csv_v2_4150849.csv')

**GDP Per Capita Pre-processing**

In [27]:
# Filter Country List
gdp_data['Country Code'] = gdp_data['Country Code'].apply(lambda x: 'EUZ' if x == 'EMU' else x)
gdp_data_country_filtered = pd.merge(left = country_list['iso_a3'], 
                                     right = gdp_data, 
                                     how = 'left', 
                                     left_on = 'iso_a3', 
                                     right_on = 'Country Code')
# Drop Columns
drop_cols = ['Indicator Name', 'Indicator Code']
gdp_data_country_filtered.drop(drop_cols, axis = 1, inplace = True)

# Filter Timeperiod
keep_cols = ['Country Name', 'Country Code', 'iso_a3']
keep_cols.extend(list(years.year.astype(str)))
gdp_data_year_filtered = gdp_data_country_filtered[keep_cols]

# Wide to Long Format
gdp_post_processed = pd.melt(frame = gdp_data_year_filtered, 
                             id_vars = ['Country Name', 'Country Code', 'iso_a3'], 
                             var_name = 'Year', 
                             value_name = 'GDP per capita')

**Current Account Balance Pre-processing**

In [28]:
# Filter Country List
acc_balance['Country Code'] = acc_balance['Country Code'].apply(lambda x: 'EUZ' if x == 'EMU' else x)
acc_balance_country_filtered = pd.merge(left = country_list['iso_a3'], 
                                        right = acc_balance, 
                                        how = 'left', 
                                        left_on = 'iso_a3', 
                                        right_on = 'Country Code')
# Drop Columns
drop_cols = ['Indicator Name', 'Indicator Code']
acc_balance_country_filtered.drop(drop_cols, axis = 1, inplace = True)

# Filter Timeperiod
keep_cols = ['Country Name', 'Country Code', 'iso_a3']
keep_cols.extend(list(years.year.astype(str)))
acc_balance_year_filtered = acc_balance_country_filtered[keep_cols]

# Wide to Long Format
acc_balance_post_processed = pd.melt(frame = gdp_data_year_filtered, 
                                     id_vars = ['Country Name', 'Country Code', 'iso_a3'], 
                                     var_name = 'Year', 
                                     value_name = 'Curr Acc Balance')

**Merge data**

In [29]:
financial_data = pd.merge(left = gdp_post_processed,
                          right = acc_balance_post_processed[['iso_a3', 'Year', 'Curr Acc Balance']],
                          how = 'inner', 
                          on = ['iso_a3', 'Year'])
financial_data.head()

Unnamed: 0,Country Name,Country Code,iso_a3,Year,GDP per capita,Curr Acc Balance
0,Australia,AUS,AUS,2000,26343.0767,26343.0767
1,Brazil,BRA,BRA,2000,9060.788667,9060.788667
2,United Kingdom,GBR,GBR,2000,26472.19371,26472.19371
3,Canada,CAN,CAN,2000,29362.08416,29362.08416
4,Chile,CHL,CHL,2000,9538.075278,9538.075278
