# **RA1: data**

Dong Gyun Ko <br/>
last Updated: august 22, 2022 <br/>

In [1]:
!pip install finance-datareader

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting finance-datareader
  Downloading finance_datareader-0.9.34-py3-none-any.whl (17 kB)
Collecting requests-file
  Downloading requests_file-1.5.1-py2.py3-none-any.whl (3.7 kB)
Installing collected packages: requests-file, finance-datareader
Successfully installed finance-datareader-0.9.34 requests-file-1.5.1


In [2]:
import numpy as np
import pandas as pd
import datetime as dt
import pandas_datareader.data as web
from pandas_datareader import wb
import requests # python 3.6

import os
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
os.chdir('/content/gdrive/MyDrive/Colab Notebooks/RA1_data')

## **1. OECD data**

### **1.1. python code**

In [4]:
# import the raw data

var_list = ['oecd_gdp', 'oecd_ggdebt', 'oecd_ggexp', 'oecd_bop', 'oecd_gfcf', 'oecd_ginv', 'oecd_stir', 'oecd_cpi']

for var in var_list:

    globals()['df_{}'.format(var)] = pd.read_csv('df_' + var + '.csv')
    globals()['df_{}'.format(var)] = globals()['df_{}'.format(var)].sort_values(by=['isocode', 'year'])

In [5]:
# merge the raw data 

df_oecd = pd.merge(globals()['df_{}'.format(var_list[0])], globals()['df_{}'.format(var_list[1])], how='outer', on=['isocode', 'year'])

for var in var_list[2:]:

    df_oecd = pd.merge(df_oecd, globals()['df_{}'.format(var)], how='outer', on=['isocode', 'year'])

In [6]:
# generate temp data

for i in range(1914, 2022):

    df_oecd_isocode = pd.DataFrame({df_oecd['isocode'].unique()[i] for i in range(0,df_oecd['isocode'].unique().shape[0])}).rename(columns={0:'isocode'})
    df_oecd_isocode['year'] = i

    globals()['df_oecd_isocode_{}'.format(i)] = df_oecd_isocode

df_oecd_temp = globals()['df_oecd_isocode_{}'.format(1914)].append(globals()['df_oecd_isocode_{}'.format(1915)])

for i in range(1914+2, 2022):

    df_oecd_temp = df_oecd_temp.append(globals()['df_oecd_isocode_{}'.format(i)])

df_oecd_temp = df_oecd_temp.sort_values(by=['isocode', 'year']).reset_index(drop=True)

df_oecd_temp['value'] = 0

In [7]:
# merge the raw data and temp data

df_oecd = pd.merge(df_oecd, df_oecd_temp, how='outer', on=['isocode', 'year'])
df_oecd = df_oecd.drop(columns=['value']).sort_values(by=['isocode', 'year']).reset_index(drop=True)

In [8]:
# filter

# West Germany
cond0 = (df_oecd['isocode'] == 'DEW')

# countries group
cond1 = (df_oecd['isocode'] == 'EA')
cond2 = (df_oecd['isocode'] == 'EA19')
cond3 = (df_oecd['isocode'] == 'EU')
cond4 = (df_oecd['isocode'] == 'EU27_2020')
cond5 = (df_oecd['isocode'] == 'G-20')
cond6 = (df_oecd['isocode'] == 'G-7')
cond7 = (df_oecd['isocode'] == 'OAVG')
cond8 = (df_oecd['isocode'] == 'OECD')
cond9 = (df_oecd['isocode'] == 'OECDE')
cond10 = (df_oecd['isocode'] == 'EA19')

df_oecd = df_oecd.loc[~cond0 & ~cond1 & ~cond2 & ~cond3 & ~cond4 & ~cond5 & ~cond6 & ~cond7 & ~cond8 & ~cond9 & ~cond10]

In [9]:
# merge the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8').rename(columns={'alpha-3':'isocode'})
df_oecd = pd.merge(df_wb_isocode, df_oecd, how='right', on=['isocode'])
df_oecd = df_oecd.sort_values(by=['isocode', 'year']).reset_index(drop=True)

In [10]:
# oecd dataset

df_oecd

Unnamed: 0,country,isocode,year,oecd_gdp,oecd_ggdebt,oecd_ggexp,oecd_bop,oecd_gfcf,oecd_ginv,oecd_stir,oecd_cpi
0,Albania,ALB,1914,,,,,,,,
1,Albania,ALB,1915,,,,,,,,
2,Albania,ALB,1916,,,,,,,,
3,Albania,ALB,1917,,,,,,,,
4,Albania,ALB,1918,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
6691,Zambia,ZMB,2017,58735.191,,,,22794.197,,,
6692,Zambia,ZMB,2018,62564.660,,,,21966.533,,,
6693,Zambia,ZMB,2019,64601.642,,,,23099.363,,,
6694,Zambia,ZMB,2020,,,,,,,,


### **1.2. variables summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|gross domestic product|oecd_gdp|million US dollars|annual|GDP|TOT|MLN_USD|https://data.oecd.org/gdp/gross-domestic-product-gdp.htm|
|2|general government debt|oecd_ggdebt|% of GDP|annual|GGDEBT|TOT|PC_GDP|https://data.oecd.org/gga/general-government-debt.htm|
|3|general government spending|oecd_ggexp|% of GDP|annual|GGEXP|TOT|PC_GDP|https://data.oecd.org/gga/general-government-spending.htm|
|4|current account balance|oecd_bop|% of GDP|annual|BOP|TOT|PC_GDP|https://data.oecd.org/trade/current-account-balance.htm|
|5|gross fixed capital formation|oecd_gfcf|million US dollars|annual|GFCF|TOT|MLN_USD|https://data.oecd.org/gdp/investment-gfcf.htm|
|6|investment by sector(government)|oecd_ginv|% of GFCF|annual|GFCFSECTOR|GG|PC_GFGF|https://data.oecd.org/gdp/investment-by-sector.htm|
|7|short-term interest rates|oecd_stir|% per annum|annual|STINT|TOT|PC_PA|https://data.oecd.org/interest/short-term-interest-rates.htm|
|8|consumer price index|oecd_cpi|2015=100|annual|CPI|TOT|IDX2015|https://data.oecd.org/price/inflation-cpi.htm|

### **1.3. data summary**

In [11]:
# number of countries & sample period

print('number of countries:', df_oecd['isocode'].unique().shape[0])
print('sample period: from 1914 to 2021, annual')

number of countries: 62
sample period: from 1914 to 2021, annual


In [12]:
# non-null count by each variables

df_oecd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6696 entries, 0 to 6695
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   country      6696 non-null   object 
 1   isocode      6696 non-null   object 
 2   year         6696 non-null   int64  
 3   oecd_gdp     2483 non-null   float64
 4   oecd_ggdebt  909 non-null    float64
 5   oecd_ggexp   864 non-null    float64
 6   oecd_bop     1292 non-null   float64
 7   oecd_gfcf    2423 non-null   float64
 8   oecd_ginv    1165 non-null   float64
 9   oecd_stir    1375 non-null   float64
 10  oecd_cpi     2555 non-null   float64
dtypes: float64(8), int64(1), object(2)
memory usage: 575.6+ KB


In [13]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_oecd.describe(), 2)

Unnamed: 0,year,oecd_gdp,oecd_ggdebt,oecd_ggexp,oecd_bop,oecd_gfcf,oecd_ginv,oecd_stir,oecd_cpi
count,6696.0,2483.0,909.0,864.0,1292.0,2423.0,1165.0,1375.0,2555.0
mean,1967.5,867966.38,71.28,43.22,-0.09,218639.25,16.14,6.1,51.1
std,31.18,2215186.64,42.35,8.49,5.12,680177.42,4.36,6.04,38.2
min,1914.0,786.78,6.65,17.9,-22.67,225.73,2.58,-0.82,0.0
25%,1940.75,61143.61,43.25,37.92,-3.09,14062.83,13.08,1.75,12.0
50%,1967.5,208937.27,62.18,43.65,-0.55,47827.37,16.04,4.63,52.77
75%,1994.25,694176.59,92.84,49.3,2.66,147838.01,18.72,8.68,86.26
max,2021.0,24313684.92,259.46,64.89,26.28,10305337.02,38.52,45.48,215.52


## **2. BIS data**

### **2.1. python code** <br/>

* **reer** <br/>

In [14]:
# import the raw data

df_bis_reer = pd.read_csv('df_bis_reer.csv').set_index('isocode').transpose()
df_bis_reer = pd.DataFrame(df_bis_reer.stack(level='isocode')).reset_index()
df_bis_reer = df_bis_reer.rename(columns={'level_0':'year', 0:'bis_reer'})
df_bis_reer = df_bis_reer[['isocode', 'year', 'bis_reer']].sort_values(by=['isocode', 'year']).reset_index(drop=True)

df_bis = df_bis_reer

In [15]:
# merge the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8').rename(columns={'alpha-3':'isocode'})
df_bis = pd.merge(df_wb_isocode, df_bis, how='right', on=['isocode'])
df_bis = df_bis.sort_values(by=['isocode', 'year']).reset_index(drop=True)

In [16]:
# filter

# Taiwan
cond0 = (df_bis['isocode'] == 'TWN')

df_bis = df_bis.loc[~cond0]

In [17]:
# set the datetime

df_bis['year'] = pd.to_datetime(df_bis['year'])
df_bis['year'] = df_bis['year'].dt.year

In [18]:
# bis dataset

df_bis = df_bis.sort_values(by=['isocode', 'year']).reset_index(drop=True)

df_bis

Unnamed: 0,country,isocode,year,bis_reer
0,United Arab Emirates,ARE,1994,74.66
1,United Arab Emirates,ARE,1995,71.24
2,United Arab Emirates,ARE,1996,73.91
3,United Arab Emirates,ARE,1997,79.67
4,United Arab Emirates,ARE,1998,87.11
...,...,...,...,...
1619,South Africa,ZAF,2017,79.53
1620,South Africa,ZAF,2018,80.78
1621,South Africa,ZAF,2019,78.19
1622,South Africa,ZAF,2020,70.36


* **debtsec** <br/>

In [19]:
# import the raw data

df_bis_debtsec = pd.read_csv('df_bis_debtsec.csv').set_index('isocode').transpose()
df_bis_debtsec = pd.DataFrame(df_bis_debtsec.stack(level='isocode')).reset_index()
df_bis_debtsec = df_bis_debtsec.rename(columns={'level_0':'quarter', 0:'bis_debtsec'})
df_bis_debtsec = df_bis_debtsec[['isocode', 'quarter', 'bis_debtsec']].sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

In [20]:
# merge the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8').rename(columns={'alpha-3':'isocode'})
df_bi_debtsec = pd.merge(df_wb_isocode, df_bis_debtsec, how='right', on=['isocode'])
df_bis_debtsec = df_bis_debtsec.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

In [21]:
# bis debtsec dataset

df_bis_debtsec

Unnamed: 0,isocode,quarter,bis_debtsec
0,ARG,30/06/1995,58171.00
1,ARG,30/06/1996,68524.00
2,ARG,30/06/1997,73861.00
3,ARG,30/06/1998,78022.00
4,ARG,30/06/1999,85688.00
...,...,...,...
4572,USA,31/12/2017,17599034.00
4573,USA,31/12/2018,18689387.00
4574,USA,31/12/2019,19748045.00
4575,USA,31/12/2020,24154641.00


### **2.2. variables summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|real effective exchange rate, broad(60 economies) indicies|bis_reer|2010=100|annual|EER|-|-|https://stats.bis.org/statx/srs/table/i2?m=B|
|2|total debt securities (general government)|bis_debtsec|billions of USD|quarter|DEBT_SEC2|-|-|https://stats.bis.org/statx/srs/table/c1|


### **2.3. data summary**

* **reer** <br/>

In [22]:
# number of countries & sample period

print('number of countries:', df_bis['isocode'].unique().shape[0])
print('sample period: from 1994 to 2021, annual')

number of countries: 58
sample period: from 1994 to 2021, annual


In [23]:
# non-null count by each variables

df_bis.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1624 entries, 0 to 1623
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   country   1624 non-null   object 
 1   isocode   1624 non-null   object 
 2   year      1624 non-null   int64  
 3   bis_reer  1624 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 50.9+ KB


In [24]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_bis.describe(), 2)

Unnamed: 0,year,bis_reer
count,1624.0,1624.0
mean,2007.5,97.29
std,8.08,17.79
min,1994.0,43.41
25%,2000.75,90.54
50%,2007.5,98.11
75%,2014.25,103.19
max,2021.0,276.38


* **debtsec** <br/>

In [25]:
# number of countries & sample period

print('number of countries:', df_bis_debtsec['isocode'].unique().shape[0])
print('sample period: from 1952 q1 to 2021 q4, quarter')

number of countries: 44
sample period: from 1952 q1 to 2021 q4, quarter


In [26]:
# non-null count by each variables

df_bis_debtsec.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4577 entries, 0 to 4576
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   isocode      4577 non-null   object 
 1   quarter      4577 non-null   object 
 2   bis_debtsec  4577 non-null   float64
dtypes: float64(1), object(2)
memory usage: 107.4+ KB


In [27]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_bis_debtsec.describe(), 2)

Unnamed: 0,bis_debtsec
count,4577.0
mean,766820.24
std,2220776.03
min,0.0
25%,39004.0
50%,127835.0
75%,406924.0
max,25826104.0


## **3. world bank data**

### **3.1. python code** <br/>

In [28]:
# import the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8')

wb_isocode_list = []

for i in range(0,df_wb_isocode['alpha-3'].unique().shape[0]):

    alpha_3 = str(df_wb_isocode['alpha-3'].unique()[i])
    wb_isocode_list.append(alpha_3)

In [29]:
# debug HTTPConnectionPool error

from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter

retries = Retry(connect=5, read=3, redirect=3)
http_session = requests.Session()
http_session.mount('https://<yourdomain>.slack.com', HTTPAdapter(max_retries=retries))

In [30]:
# import the raw data

indicator_list = ['NY.GDP.MKTP.CD',
                  'NY.GDP.MKTP.KD',
                  'GC.DOD.TOTL.GD.ZS',
                  'NE.CON.GOVT.ZS',
                  'BN.CAB.XOKA.GD.ZS',
                  'NE.GDI.FTOT.CD',
                  'NE.GDI.TOTL.ZS',
                  'FP.CPI.TOTL',
                  'NY.GDP.DEFL.KD.ZG',
                  'PX.REX.REER']

var_list = ['wb_ngdp', 'wb_rgpd', 'wb_cgdebt', 'wb_ggexp', 'wb_bop', 'wb_gfcf', 'wb_gcf', 'wb_cpi', 'wb_gdpd', 'wb_reer']

for i in range(0,len(indicator_list)):

    globals()['df_{}'.format(var_list[i])] = wb.download(indicator=indicator_list[i], country=wb_isocode_list, start=1960, end=2021)
    globals()['df_{}'.format(var_list[i])] = globals()['df_{}'.format(var_list[i])].reset_index()
    globals()['df_{}'.format(var_list[i])] = globals()['df_{}'.format(var_list[i])].rename(columns={'country':'country', 'year':'year', indicator_list[i]:var_list[i]})
    globals()['df_{}'.format(var_list[i])] = globals()['df_{}'.format(var_list[i])][['country', 'year', var_list[i]]].sort_values(by=['country', 'year']).reset_index(drop=True)
    
    # merge the wb isocode data
    globals()['df_{}'.format(var_list[i])] = pd.merge(globals()['df_{}'.format(var_list[i])], df_wb_isocode, how='outer', on=['country']).rename(columns={'alpha-3':'isocode'})
    globals()['df_{}'.format(var_list[i])] = globals()['df_{}'.format(var_list[i])][['country', 'isocode', 'year',  var_list[i]]]



In [31]:
# merge the raw data 

df_wb = pd.merge(globals()['df_{}'.format(var_list[0])], globals()['df_{}'.format(var_list[1])], how='outer', on=['country', 'isocode', 'year'])

for var in var_list[2:]:

    df_wb = pd.merge(df_wb, globals()['df_{}'.format(var)], how='outer', on=['country', 'isocode', 'year'])

In [32]:
# set the datetime

df_wb['year'] = pd.to_datetime(df_wb['year'])
df_wb['year'] = df_wb['year'].dt.year

In [33]:
# world bank dataset

df_wb = df_wb.sort_values(by=['isocode', 'year']).reset_index(drop=True)

df_wb

Unnamed: 0,country,isocode,year,wb_ngdp,wb_rgpd,wb_cgdebt,wb_ggexp,wb_bop,wb_gfcf,wb_gcf,wb_cpi,wb_gdpd,wb_reer
0,Aruba,ABW,1960,,,,,,,,,,
1,Aruba,ABW,1961,,,,,,,,,,
2,Aruba,ABW,1962,,,,,,,,,,
3,Aruba,ABW,1963,,,,,,,,,,
4,Aruba,ABW,1964,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13449,Zimbabwe,ZWE,2017,17584890936.65,21061283685.95,,21.65,-1.54,1699377692.68,9.70,105.51,2.44,
13450,Zimbabwe,ZWE,2018,18115543790.79,22077324353.29,,11.92,-7.62,1751060497.52,9.69,116.71,59.80,
13451,Zimbabwe,ZWE,2019,19284289739.05,20720841373.13,,6.64,4.77,1427892358.82,7.41,414.68,440.83,
13452,Zimbabwe,ZWE,2020,18051170798.94,19426048165.88,,7.76,6.07,1344952699.72,7.45,2725.31,558.56,


### **3.2. variables summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|GDP (current USD)|wb_ngdp|current USD|annual|NY.GDP.MKTP.CD|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=NY.GDP.MKTP.CD&country=|
|2|GDP (constant 2015 USD)|wb_rgdp|constant 2015 USD|annual|NY.GDP.MKTP.KD|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=NY.GDP.MKTP.KD&country=|
|3|central government debt, total|wb_gdebt|% of GDP|annual|GC.DOD.TOTL.GD.ZS|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=GC.DOD.TOTL.GD.ZS&country=|
|4|general government final consumption expenditure|wb_ggexp|% of GDP|annual|NE.CON.GOVT.ZS|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=NE.CON.GOVT.ZS&country=|
|5|current account balance|wb_bop|% of GDP|annual|BN.CAB.XOKA.GD.ZS|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=BN.CAB.XOKA.GD.ZS&country=|
|6|gross fixed capital formation (current USD)|wb_gfcf|current USD|annual|NE.GDI.FTOT.CD|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=NE.GDI.FTOT.CD&country=|
|7|gross capital formation (% of GDP)|wb_gcf|% of GDP|annual|NE.GDI.TOTL.ZS|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=NE.GDI.TOTL.ZS&country=|
|8|consumer price index (2010=100)|wb_cpi|2010=100|annual|FP.CPI.TOTL|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=FP.CPI.TOTL&country=|
|9|gdp deflator|wb_gdpd|annual %|annual|NY.GDP.DEFL.KD.ZG|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=NY.GDP.DEFL.KD.ZG&country=|
|10|real effective exchange rate index (2010=100)|wb_reer|2010=100|annual|PX.REX.REER|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=PX.REX.REER&country=|

### **3.3. data summary**

In [34]:
# number of countries & sample period

print('number of countries:', df_wb['isocode'].unique().shape[0])
print('sample period: from 1960 to 2021, annual')

number of countries: 217
sample period: from 1960 to 2021, annual


In [35]:
# non-null count by each variables

df_wb.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13454 entries, 0 to 13453
Data columns (total 13 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   country    13454 non-null  object 
 1   isocode    13454 non-null  object 
 2   year       13454 non-null  int64  
 3   wb_ngdp    10336 non-null  float64
 4   wb_rgpd    9791 non-null   float64
 5   wb_cgdebt  1270 non-null   float64
 6   wb_ggexp   8143 non-null   float64
 7   wb_bop     6864 non-null   float64
 8   wb_gfcf    7587 non-null   float64
 9   wb_gcf     8123 non-null   float64
 10  wb_cpi     8463 non-null   float64
 11  wb_gdpd    9784 non-null   float64
 12  wb_reer    3779 non-null   float64
dtypes: float64(10), int64(1), object(2)
memory usage: 1.3+ MB


In [36]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_wb.describe(), 2)

Unnamed: 0,year,wb_ngdp,wb_rgpd,wb_cgdebt,wb_ggexp,wb_bop,wb_gfcf,wb_gcf,wb_cpi,wb_gdpd,wb_reer
count,13454.0,10336.0,9791.0,1270.0,8143.0,6864.0,7587.0,8123.0,8463.0,9784.0,3779.0
mean,1990.5,184192121208.18,246060250890.96,56.48,16.28,-2.93,58498628436.55,23.14,75.96,29.58,115.02
std,17.9,991080605921.11,1123327823642.51,81.15,8.17,13.07,288038289788.06,8.92,435.0,407.0,105.61
min,1960.0,8824447.74,21561952.31,0.02,0.0,-240.52,-20612328.07,-13.41,0.0,-98.7,18.73
25%,1975.0,1460824271.16,3652104951.13,29.08,11.2,-7.12,695080308.53,17.83,16.46,1.92,93.33
50%,1990.5,7824737791.8,15610486135.64,47.35,15.31,-2.89,3386606655.81,22.48,61.54,5.06,100.56
75%,2006.0,52326788295.34,100386043670.39,68.66,19.42,0.98,22026083272.61,27.49,100.0,11.04,112.92
max,2021.0,22996100000000.0,20338578000000.0,2002.51,147.72,311.76,6240217076281.01,89.38,22570.71,26765.86,3053.7


## **4. IMF data**

## **5. pooled data**

### **5.1. python code** <br/>

In [37]:
# generate the pooled data

df_oecd_bis = pd.merge(df_oecd, df_bis, how='outer', on=['country', 'isocode', 'year'])
df_pooled = pd.merge(df_oecd_bis, df_wb, how='outer', on=['country', 'isocode', 'year'])
df_pooled = df_pooled.sort_values(by=['isocode', 'year']).reset_index(drop=True)

In [38]:
# generate the temp data

df_temp = pd.read_csv('df_temp.csv').set_index(['country', 'alpha-3']).transpose()
df_temp = pd.DataFrame(df_temp.stack(level=['country', 'alpha-3'])).reset_index()
df_temp = df_temp.rename(columns={'level_0':'year', 'alpha-3':'isocode', 0:'value'})
df_temp = df_temp[['country', 'isocode', 'year', 'value']].sort_values(by=['isocode', 'year']).reset_index(drop=True)
df_temp['year'] = pd.to_datetime(df_temp['year']).dt.year

In [39]:
# merge the pooled data with the temp data

df_pooled = pd.merge(df_pooled, df_temp, how='outer', on=['country', 'isocode', 'year']).drop(columns=['value'])

In [40]:
# pooled dataset

df_pooled = df_pooled.sort_values(by=['isocode', 'year']).reset_index(drop=True)

df_pooled

Unnamed: 0,country,isocode,year,oecd_gdp,oecd_ggdebt,oecd_ggexp,oecd_bop,oecd_gfcf,oecd_ginv,oecd_stir,...,wb_ngdp,wb_rgpd,wb_cgdebt,wb_ggexp,wb_bop,wb_gfcf,wb_gcf,wb_cpi,wb_gdpd,wb_reer
0,Aruba,ABW,1914,,,,,,,,...,,,,,,,,,,
1,Aruba,ABW,1915,,,,,,,,...,,,,,,,,,,
2,Aruba,ABW,1916,,,,,,,,...,,,,,,,,,,
3,Aruba,ABW,1917,,,,,,,,...,,,,,,,,,,
4,Aruba,ABW,1918,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23431,Zimbabwe,ZWE,2017,,,,,,,,...,17584890936.65,21061283685.95,,21.65,-1.54,1699377692.68,9.70,105.51,2.44,
23432,Zimbabwe,ZWE,2018,,,,,,,,...,18115543790.79,22077324353.29,,11.92,-7.62,1751060497.52,9.69,116.71,59.80,
23433,Zimbabwe,ZWE,2019,,,,,,,,...,19284289739.05,20720841373.13,,6.64,4.77,1427892358.82,7.41,414.68,440.83,
23434,Zimbabwe,ZWE,2020,,,,,,,,...,18051170798.94,19426048165.88,,7.76,6.07,1344952699.72,7.45,2725.31,558.56,


### **5.2. variables summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|$OECD$|||||||||
|1|gross domestic product|oecd_gdp|million US dollars|annual|GDP|TOT|MLN_USD|https://data.oecd.org/gdp/gross-domestic-product-gdp.htm|
|2|general government debt|oecd_ggdebt|% of GDP|annual|GGDEBT|TOT|PC_GDP|https://data.oecd.org/gga/general-government-debt.htm|
|3|general government spending|oecd_ggexp|% of GDP|annual|GGEXP|TOT|PC_GDP|https://data.oecd.org/gga/general-government-spending.htm|
|4|current account balance|oecd_bop|% of GDP|annual|BOP|TOT|PC_GDP|https://data.oecd.org/trade/current-account-balance.htm|
|5|gross fixed capital formation|oecd_gfcf|million US dollars|annual|GFCF|TOT|MLN_USD|https://data.oecd.org/gdp/investment-gfcf.htm|
|6|investment by sector(government)|oecd_ginv|% of GFCF|annual|GFCFSECTOR|GG|PC_GFGF|https://data.oecd.org/gdp/investment-by-sector.htm|
|7|short-term interest rates|oecd_stir|% per annum|annual|STINT|TOT|PC_PA|https://data.oecd.org/interest/short-term-interest-rates.htm|
|8|consumer price index|oecd_cpi|2015=100|annual|CPI|TOT|IDX2015|https://data.oecd.org/price/inflation-cpi.htm|
|$BIS$|||||||||
|1|real effective exchange rate, broad(60 economies) indicies|bis_reer|2010=100|annual|EER|-|-|https://stats.bis.org/statx/srs/table/i2?m=B|
|$World$ $Bank$|||||||||
|1|GDP (current USD)|wb_ngdp|current USD|annual|NY.GDP.MKTP.CD|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=NY.GDP.MKTP.CD&country=|
|2|GDP (constant 2015 USD)|wb_rgdp|constant 2015 USD|annual|NY.GDP.MKTP.KD|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=NY.GDP.MKTP.KD&country=|
|3|central government debt, total|wb_gdebt|% of GDP|annual|GC.DOD.TOTL.GD.ZS|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=GC.DOD.TOTL.GD.ZS&country=|
|4|general government final consumption expenditure|wb_ggexp|% of GDP|annual|NE.CON.GOVT.ZS|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=NE.CON.GOVT.ZS&country=|
|5|current account balance|wb_bop|% of GDP|annual|BN.CAB.XOKA.GD.ZS|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=BN.CAB.XOKA.GD.ZS&country=|
|6|gross fixed capital formation (current USD)|wb_gfcf|current USD|annual|NE.GDI.FTOT.CD|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=NE.GDI.FTOT.CD&country=|
|7|gross capital formation (% of GDP)|wb_gcf|% of GDP|annual|NE.GDI.TOTL.ZS|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=NE.GDI.TOTL.ZS&country=|
|8|consumer price index (2010=100)|wb_cpi|2010=100|annual|FP.CPI.TOTL|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=FP.CPI.TOTL&country=|
|9|gdp deflator|wb_gdpd|annual %|annual|NY.GDP.DEFL.KD.ZG|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=NY.GDP.DEFL.KD.ZG&country=|
|10|real effective exchange rate index (2010=100)|wb_reer|2010=100|annual|PX.REX.REER|-|-|https://databank.worldbank.org/reports.aspx?source=2&series=PX.REX.REER&country=|

### **5.3. data summary**

In [41]:
# number of countries & sample period

print('number of countries:', df_pooled['isocode'].unique().shape[0])
print('sample period: from 1914 to 2021, annual')

number of countries: 217
sample period: from 1914 to 2021, annual


In [42]:
# non-null count by each variables

df_pooled.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23436 entries, 0 to 23435
Data columns (total 22 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   country      23436 non-null  object 
 1   isocode      23436 non-null  object 
 2   year         23436 non-null  int64  
 3   oecd_gdp     2483 non-null   float64
 4   oecd_ggdebt  909 non-null    float64
 5   oecd_ggexp   864 non-null    float64
 6   oecd_bop     1292 non-null   float64
 7   oecd_gfcf    2423 non-null   float64
 8   oecd_ginv    1165 non-null   float64
 9   oecd_stir    1375 non-null   float64
 10  oecd_cpi     2555 non-null   float64
 11  bis_reer     1624 non-null   float64
 12  wb_ngdp      10336 non-null  float64
 13  wb_rgpd      9791 non-null   float64
 14  wb_cgdebt    1270 non-null   float64
 15  wb_ggexp     8143 non-null   float64
 16  wb_bop       6864 non-null   float64
 17  wb_gfcf      7587 non-null   float64
 18  wb_gcf       8123 non-null   float64
 19  wb_c

In [43]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_pooled.describe(), 2)

Unnamed: 0,year,oecd_gdp,oecd_ggdebt,oecd_ggexp,oecd_bop,oecd_gfcf,oecd_ginv,oecd_stir,oecd_cpi,bis_reer,wb_ngdp,wb_rgpd,wb_cgdebt,wb_ggexp,wb_bop,wb_gfcf,wb_gcf,wb_cpi,wb_gdpd,wb_reer
count,23436.0,2483.0,909.0,864.0,1292.0,2423.0,1165.0,1375.0,2555.0,1624.0,10336.0,9791.0,1270.0,8143.0,6864.0,7587.0,8123.0,8463.0,9784.0,3779.0
mean,1967.5,867966.38,71.28,43.22,-0.09,218639.25,16.14,6.1,51.1,97.29,184192121208.18,246060250890.96,56.48,16.28,-2.93,58498628436.55,23.14,75.96,29.58,115.02
std,31.18,2215186.64,42.35,8.49,5.12,680177.42,4.36,6.04,38.2,17.79,991080605921.11,1123327823642.51,81.15,8.17,13.07,288038289788.06,8.92,435.0,407.0,105.61
min,1914.0,786.78,6.65,17.9,-22.67,225.73,2.58,-0.82,0.0,43.41,8824447.74,21561952.31,0.02,0.0,-240.52,-20612328.07,-13.41,0.0,-98.7,18.73
25%,1940.75,61143.61,43.25,37.92,-3.09,14062.83,13.08,1.75,12.0,90.54,1460824271.16,3652104951.13,29.08,11.2,-7.12,695080308.53,17.83,16.46,1.92,93.33
50%,1967.5,208937.27,62.18,43.65,-0.55,47827.37,16.04,4.63,52.77,98.11,7824737791.8,15610486135.64,47.35,15.31,-2.89,3386606655.81,22.48,61.54,5.06,100.56
75%,1994.25,694176.59,92.84,49.3,2.66,147838.01,18.72,8.68,86.26,103.19,52326788295.34,100386043670.39,68.66,19.42,0.98,22026083272.61,27.49,100.0,11.04,112.92
max,2021.0,24313684.92,259.46,64.89,26.28,10305337.02,38.52,45.48,215.52,276.38,22996100000000.0,20338578000000.0,2002.51,147.72,311.76,6240217076281.01,89.38,22570.71,26765.86,3053.7


## **6. export the data into excel file(.xlsx)**

### **6.1. pooled data**

In [44]:
# pooled data

df_pooled.to_excel(excel_writer='df_pooled.xlsx')

### **6.2. debtsec data**

In [46]:
# debtsec data

df_bis_debtsec.to_excel(excel_writer='df_debtsec.xlsx')

## **7. references**

* https://data.oecd.org/ <br/>
* https://stats.bis.org/#ppq=XRU_D_24D;pv=1,2~4~1,0,0~both <br/>
* https://databank.worldbank.org/source/world-development-indicators/preview/on <br/>
* https://wits.worldbank.org/wits/wits/witshelp/content/codes/country_codes.htm <br/>

