# **RA1: quarterly pooled data**

Dong Gyun Ko <br/>
last updated: september 18, 2022 <br/>

In [67]:
!pip install finance-datareader

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [68]:
# import libraries

import numpy as np
import pandas as pd
import datetime as dt
import pandas_datareader as pdr
import pandas_datareader.data as web
from pandas_datareader import wb
from pandas_datareader.data import DataReader
import requests # python 3.6

import os
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [69]:
# set the working directory

os.chdir('/content/gdrive/MyDrive/Colab Notebooks/RA1_data')

In [70]:
# set the pandas display option

# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

# pd.options.display.max_rows = 100
# pd.options.display.max_columns = 100

## **1. OECD**

### **1.1. python code** <br/>

In [71]:
# debug HTTPConnectionPool error

from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter

retries = Retry(connect=5, read=3, redirect=3)
http_session = requests.Session()
http_session.mount('https://<yourdomain>.slack.com', HTTPAdapter(max_retries=retries))

In [72]:
# import the raw data object (EO: economic outlook)

start_time = dt.datetime(1960, 1, 1)
end_time = dt.datetime(2022, 4, 1)

df_oecd_eo_q_raw = pdr.oecd.OECDReader('EO', start=start_time, end=end_time, retry_count=3, pause=0.1, timeout=30, session=None, freq=None)
df_oecd_eo_q_raw = df_oecd_eo_q_raw.read()

df_oecd_eo_q_raw.index = pd.to_datetime(df_oecd_eo_q_raw.index)

print(type(df_oecd_eo_q_raw.index))
print(type(df_oecd_eo_q_raw.columns))

<class 'pandas.core.indexes.datetimes.DatetimeIndex'>
<class 'pandas.core.indexes.multi.MultiIndex'>


In [73]:
# import the raw data

var_name_list = ['Government final consumption expenditure, volume',
                 'Government gross fixed capital formation, volume',
                 'Gross domestic product, volume, market prices',
                 'Gross domestic product, volume in USD, constant exchange rates',
                 'Current account balance as a percentage of GDP',
                 'Short-term interest rate']

var_list = ['oecd_q_rggexp', 'oecd_q_gginv', 'oecd_q_rgdp_dcu', 'oecd_q_rgdp_usd', 'oecd_q_ca', 'oecd_q_stir']

for i in range(0, len(var_list)):

    globals()['df_{}'.format(var_list[i])] = df_oecd_eo_q_raw.xs(('Quarterly', var_name_list[i]), 
                                                                 level=('Frequency', 'Variable'), 
                                                                 axis=1)

    globals()['df_{}'.format(var_list[i])] = pd.DataFrame(globals()['df_{}'.format(var_list[i])].stack())
    globals()['df_{}'.format(var_list[i])] = globals()['df_{}'.format(var_list[i])].reset_index().rename(columns={'Country':'country', 'Time':'quarter', 0:var_list[i]})
    globals()['df_{}'.format(var_list[i])] = globals()['df_{}'.format(var_list[i])][['country', 'quarter', var_list[i]]]
    globals()['df_{}'.format(var_list[i])] = globals()['df_{}'.format(var_list[i])].sort_values(by=['country', 'quarter'])
    globals()['df_{}'.format(var_list[i])] = globals()['df_{}'.format(var_list[i])].reset_index().drop(columns=['index'])

In [74]:
# merge the raw data 

df_oecd_q = pd.merge(globals()['df_{}'.format(var_list[0])], globals()['df_{}'.format(var_list[1])], how='outer', on=['country', 'quarter'])

for var in var_list[2:]:

    df_oecd_q = pd.merge(df_oecd_q, globals()['df_{}'.format(var)], how='outer', on=['country', 'quarter'])

In [75]:
# replace the country

# Korea, Rep.
df_oecd_q = df_oecd_q.replace({'country':'Korea'}, 'Korea, Rep.')

# China
df_oecd_q = df_oecd_q.replace({'country':"China (People's Republic of)"}, 'China')

In [76]:
# filter

# countries group
cond1 = (df_oecd_q['country'] == 'Euro area (17 countries)')
cond2 = (df_oecd_q['country'] == 'OECD - Total')

df_oecd_q = df_oecd_q.loc[~cond1 & ~cond2]

In [77]:
# merge the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8').rename(columns={'alpha-3':'isocode'})
df_oecd_q = pd.merge(df_wb_isocode, df_oecd_q, how='right', on=['country'])
df_oecd_q = df_oecd_q.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)
df_oecd_q['quarter'] = pd.to_datetime(df_oecd_q['quarter'])
df_oecd_q['quarter'] = df_oecd_q['quarter'].dt.to_period('Q')

In [78]:
# oecd quarterly dataset

df_oecd_q

Unnamed: 0,country,isocode,quarter,oecd_q_rggexp,oecd_q_gginv,oecd_q_rgdp_dcu,oecd_q_rgdp_usd,oecd_q_ca,oecd_q_stir
0,Australia,AUS,1976Q1,43967071452.75,7237854162.98,286482058161.78,194434328706.11,0.58,
1,Australia,AUS,1976Q2,43504819741.98,6735119073.18,291983390482.06,198168062900.65,-0.01,
2,Australia,AUS,1976Q3,43852448262.02,7263380222.04,294351054788.99,199774988036.12,-2.83,
3,Australia,AUS,1976Q4,44871660543.24,6835646541.80,299551496567.17,203304508916.42,-1.26,
4,Australia,AUS,1977Q1,46293421844.68,7111847308.83,306245590359.03,207847765974.36,-1.16,
...,...,...,...,...,...,...,...,...,...
5336,South Africa,ZAF,2021Q2,,,,,,8.60
5337,South Africa,ZAF,2021Q3,,,,,,9.16
5338,South Africa,ZAF,2021Q4,,,,,,10.16
5339,South Africa,ZAF,2022Q1,,,,,,10.26


In [79]:
# export the oecd quarterly data

# df_oecd_q.to_excel(excel_writer='df_oecd_q.xlsx')

### **1.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|government final consumption expenditure, volume|oecd_q_rggexp|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|2|goverment gross fixed capital formation, volume|oecd_q_gginv|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|3|gross domestic product, volume, market prices|oecd_q_rgdp_dcu|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|4|gross domestic product, volume in USD, constant exchange rates|oecd_q_rgdp_usd|USD, 2015|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|5|current account balance as a percentage of GDP|oecd_q_ca|% of GDP|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|6|short-term interest rate|oecd_q_stir|% per annum|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|

### **1.3. data summary**

In [80]:
# number of countries & sample period

print('number of countries:', df_oecd_q['isocode'].unique().shape[0])
print('start:', np.min(df_oecd_q['quarter'].unique()))
print('end:', np.max(df_oecd_q['quarter'].unique()))

number of countries: 43
start: 1976Q1
end: 2022Q2


In [81]:
# non-null count by each variables

df_oecd_q.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5341 entries, 0 to 5340
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype        
---  ------           --------------  -----        
 0   country          5341 non-null   object       
 1   isocode          5341 non-null   object       
 2   quarter          5341 non-null   period[Q-DEC]
 3   oecd_q_rggexp    4596 non-null   float64      
 4   oecd_q_gginv     2574 non-null   float64      
 5   oecd_q_rgdp_dcu  4712 non-null   float64      
 6   oecd_q_rgdp_usd  4712 non-null   float64      
 7   oecd_q_ca        3710 non-null   float64      
 8   oecd_q_stir      4324 non-null   float64      
dtypes: float64(6), object(2), period[Q-DEC](1)
memory usage: 375.7+ KB


In [82]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_oecd_q.describe(), 2)

Unnamed: 0,oecd_q_rggexp,oecd_q_gginv,oecd_q_rgdp_dcu,oecd_q_rgdp_usd,oecd_q_ca,oecd_q_stir
count,4596.0,2574.0,4712.0,4712.0,3710.0,4324.0
mean,6242188094073.38,4110268804224.7,43267557294382.57,877952832737.61,-0.84,8.69
std,20778616834903.2,12179893287988.37,143658966097430.34,1950298072701.03,4.48,6.33
min,1365709967.41,-44992000000.0,8075649110.82,2478559424.72,-32.73,0.05
25%,38324020752.54,13771695535.06,200299275061.1,106562043365.17,-3.17,4.48
50%,170953346178.88,42408000000.0,820200027039.0,242521326068.86,-0.73,7.09
75%,477074229248.64,109569253028.5,2335227000000.0,759272947642.64,1.57,11.45
max,180434000000000.0,73184000000000.0,1332547200000000.0,16533342932403.3,18.93,57.84


## **2. BIS**

### **2.1. python code** <br/>

In [83]:
# import the raw data

var_list = ['bis_q_ggdebt_m', 'bis_q_ggdebt_n', 'bis_q_cpi', 'bis_q_cbpr', 'bis_q_reer_b', 'bis_q_reer_n']

for var in var_list:

    globals()['df_{}'.format(var)] = pd.read_csv('df_' + var + '.csv').set_index('isocode').transpose()
    globals()['df_{}'.format(var)] = pd.DataFrame(globals()['df_{}'.format(var)].stack(level='isocode')).reset_index()
    globals()['df_{}'.format(var)] = globals()['df_{}'.format(var)].rename(columns={'level_0':'quarter', 0:var})
    globals()['df_{}'.format(var)] = globals()['df_{}'.format(var)][['isocode', 'quarter', var]].sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

In [84]:
# merge the raw data 

df_bis_q = pd.merge(globals()['df_{}'.format(var_list[0])], globals()['df_{}'.format(var_list[1])], how='outer', on=['isocode', 'quarter'])

for var in var_list[2:]:

    df_bis_q = pd.merge(df_bis_q, globals()['df_{}'.format(var)], how='outer', on=['isocode', 'quarter'])

In [85]:
# merge the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8').rename(columns={'alpha-3':'isocode'})
df_bis_q = pd.merge(df_wb_isocode, df_bis_q, how='right', on=['isocode'])
df_bis_q = df_bis_q.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)
df_bis_q['quarter'] = pd.to_datetime(df_bis_q['quarter'])
df_bis_q['quarter'] = df_bis_q['quarter'].dt.to_period('Q')

In [86]:
# bis quarterly dataset

df_bis_q

Unnamed: 0,country,isocode,quarter,bis_q_ggdebt_m,bis_q_ggdebt_n,bis_q_cpi,bis_q_cbpr,bis_q_reer_b,bis_q_reer_n
0,United Arab Emirates,ARE,1994Q2,,,,,75.43,
1,United Arab Emirates,ARE,1995Q2,,,,,69.41,
2,United Arab Emirates,ARE,1996Q2,,,,,73.79,
3,United Arab Emirates,ARE,1997Q2,,,,,77.92,
4,United Arab Emirates,ARE,1998Q2,,,,,87.56,
...,...,...,...,...,...,...,...,...,...
15007,South Africa,ZAF,2017Q4,,50.30,147.58,6.75,76.44,
15008,South Africa,ZAF,2018Q4,,53.20,154.85,6.75,77.77,
15009,South Africa,ZAF,2019Q4,,57.80,160.64,6.5,77.58,
15010,South Africa,ZAF,2020Q4,,71.00,165.74,3.5,72.31,


### **2.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|total credit to the government sector at market value (core debt)|bis_q_ggdebt_m|% of GDP|quarterly|CRE|-|market value|https://stats.bis.org/statx/srs/table/f5.1|
|2|total credit to the government sector at nominal value (core debt)|bis_q_ggdebt_n|% of GDP|quarterly|CRE|-|nominal value|https://stats.bis.org/statx/srs/table/f5.4|
|3|consumer price index - BIS spliced, not seasonally adjusted|bis_q_cpi|2010=100|quarterly|CPI|-|-|https://stats.bis.org/statx/srs/table/k1|
|4|central bank policy rates|bis_q_cbpr|%|quarterly|CBP|-|-|https://stats.bis.org/statx/srs/table/l1|
|5|real effective exchange rate, broad(60 economies) indicies|bis_q_reer_b|2010=100|quarterly|EER|-|-|https://stats.bis.org/statx/srs/table/i2?m=B|
|6|real effective exchange rate, narrow(27 economies) indicies|bis_q_reer_n|2010=100|quarterly|EER|-|-|https://stats.bis.org/statx/srs/table/i2?m=B|

### **2.3. data summary**

In [87]:
# number of countries & sample period

print('number of countries:', df_bis_q['isocode'].unique().shape[0])
print('start:', np.min(df_bis_q['quarter'].unique()))
print('end:', np.max(df_bis_q['quarter'].unique()))

number of countries: 60
start: 1913Q1
end: 2022Q2


In [88]:
# non-null count by each variables

df_bis_q.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15012 entries, 0 to 15011
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype        
---  ------          --------------  -----        
 0   country         15012 non-null  object       
 1   isocode         15012 non-null  object       
 2   quarter         15012 non-null  period[Q-DEC]
 3   bis_q_ggdebt_m  3193 non-null   float64      
 4   bis_q_ggdebt_n  4837 non-null   float64      
 5   bis_q_cpi       14629 non-null  float64      
 6   bis_q_cbpr      5616 non-null   object       
 7   bis_q_reer_b    6612 non-null   float64      
 8   bis_q_reer_n    5850 non-null   float64      
dtypes: float64(5), object(3), period[Q-DEC](1)
memory usage: 1.0+ MB


In [89]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_bis_q.describe(), 2)

Unnamed: 0,bis_q_ggdebt_m,bis_q_ggdebt_n,bis_q_cpi,bis_q_reer_b,bis_q_reer_n
count,3193.0,4837.0,14629.0,6612.0,5850.0
mean,65.47,59.03,53.98,97.27,100.15
std,39.45,34.31,56.99,17.94,17.21
min,4.6,1.6,0.0,41.04,45.28
25%,38.8,34.4,7.87,90.25,90.45
50%,57.6,51.2,49.86,97.99,98.45
75%,84.2,74.1,91.91,103.22,106.97
max,238.2,226.9,2031.98,281.92,248.02


## **3. World Bank**

### **3.1. python code** <br/>

In [90]:
# import the raw data

var_list = ['wb_q_ggdebt_d1', 'wb_q_ggdebt_d2', 'wb_q_ggdebt_d3', 'wb_q_ggdebt_d4']

for var in var_list:

    globals()['df_{}'.format(var)] = pd.read_csv('df_' + var + '.csv').set_index('isocode').transpose()
    globals()['df_{}'.format(var)] = pd.DataFrame(globals()['df_{}'.format(var)].stack(level='isocode')).reset_index()
    globals()['df_{}'.format(var)] = globals()['df_{}'.format(var)].rename(columns={'level_0':'quarter', 0:var})
    globals()['df_{}'.format(var)] = globals()['df_{}'.format(var)][['isocode', 'quarter', var]].sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

In [91]:
# merge the raw data 

df_wb_q = pd.merge(globals()['df_{}'.format(var_list[0])], globals()['df_{}'.format(var_list[1])], how='outer', on=['isocode', 'quarter'])

for var in var_list[2:]:

    df_wb_q = pd.merge(df_wb_q, globals()['df_{}'.format(var)], how='outer', on=['isocode', 'quarter'])

In [92]:
# merge the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8').rename(columns={'alpha-3':'isocode'})
df_wb_q = pd.merge(df_wb_isocode, df_wb_q, how='right', on=['isocode'])
df_wb_q = df_wb_q.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

def datetime_func(x):

    result = str(x) # numerical type, not string type -> indexing is not applied"
    return result[0:6]

df_wb_q['quarter'] = pd.DataFrame(df_wb_q['quarter'].apply(datetime_func))
df_wb_q['quarter'] = pd.to_datetime(df_wb_q['quarter'])
df_wb_q['quarter'] = df_wb_q['quarter'].dt.to_period('Q')

In [93]:
# replace the value into NaN

# '..'
df_wb_q = df_wb_q.replace('..', np.nan)

# 0
df_wb_q = df_wb_q.replace(0, np.nan)

In [94]:
# set the astype

df_wb_q = df_wb_q.astype({'wb_q_ggdebt_d1':'float', 'wb_q_ggdebt_d2':'float', 'wb_q_ggdebt_d3':'float', 'wb_q_ggdebt_d4':'float'})

In [95]:
# wb quarterly dataset

df_wb_q

Unnamed: 0,country,isocode,quarter,wb_q_ggdebt_d1,wb_q_ggdebt_d2,wb_q_ggdebt_d3,wb_q_ggdebt_d4
0,Albania,ALB,1995Q1,,,,
1,Albania,ALB,1995Q2,,,,
2,Albania,ALB,1995Q3,,,,
3,Albania,ALB,1995Q4,,,,
4,Albania,ALB,1996Q1,,,,
...,...,...,...,...,...,...,...
11222,South Africa,ZAF,2021Q1,,,,
11223,South Africa,ZAF,2021Q2,,,,
11224,South Africa,ZAF,2021Q3,,,,
11225,South Africa,ZAF,2021Q4,,,,


### **3.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|Gross PSD, General Gov.-D1, All maturities, Debt securities + loans, Nominal Value, % of GDP|wb_q_ggdebt_d1|% of GDP|quarterly|DP.DOD.DLD1.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|2|Gross PSD, General Gov.-D2, All maturities, D1+ SDRs + currency and deposits, Nominal Value, % of GDP|wb_q_ggdebt_d2|% of GDP|quarterly|DP.DOD.DLD2.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|3|Gross PSD, General Gov.-D3, All maturities, D2+other accounts payable, Nominal Value, % of GDP|wb_q_ggdebt_d3|% of GDP|quarterly|DP.DOD.DLD3.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|4|Gross PSD, General Gov.-D4, All maturities, D3+insurance, pensions, and standardized guarantees, Nominal Value, % of GDP|wb_q_ggdebt_d4|% of GDP|quarterly|DP.DOD.DLD2.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|

### **3.3. data summary**

In [96]:
# number of countries & sample period

print('number of countries:', df_wb_q['isocode'].unique().shape[0])
print('start:', np.min(df_wb_q['quarter'].unique()))
print('end:', np.max(df_wb_q['quarter'].unique()))

number of countries: 103
start: 1995Q1
end: 2022Q1


In [97]:
# non-null count by each variables

df_wb_q.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11227 entries, 0 to 11226
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype        
---  ------          --------------  -----        
 0   country         11227 non-null  object       
 1   isocode         11227 non-null  object       
 2   quarter         11227 non-null  period[Q-DEC]
 3   wb_q_ggdebt_d1  3907 non-null   float64      
 4   wb_q_ggdebt_d2  3759 non-null   float64      
 5   wb_q_ggdebt_d3  2730 non-null   float64      
 6   wb_q_ggdebt_d4  651 non-null    float64      
dtypes: float64(4), object(2), period[Q-DEC](1)
memory usage: 614.1+ KB


In [98]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_wb_q.describe(), 2)

Unnamed: 0,wb_q_ggdebt_d1,wb_q_ggdebt_d2,wb_q_ggdebt_d3,wb_q_ggdebt_d4
count,3907.0,3759.0,2730.0,651.0
mean,53.02,44.04,49.76,78.62
std,34.12,35.29,35.34,40.5
min,0.0,0.0,0.0,0.0
25%,28.66,13.18,23.15,44.68
50%,46.63,41.15,47.66,76.16
75%,67.68,66.0,70.23,107.62
max,232.1,159.34,146.42,171.92


## **4. Eurostat**

### **4.1. python code** <br/>

In [99]:
# import the raw data

var_list = ['eustat_q_ggdebt', 'eustat_q_nggexp', 'eustat_q_gginv']

for var in var_list:

    globals()['df_{}'.format(var)] = pd.read_csv('df_' + var + '.csv')
    globals()['df_{}'.format(var)] = globals()['df_{}'.format(var)].sort_values(by=['alpha-2', 'quarter']).reset_index(drop=True)

In [100]:
# merge the raw data 

df_eustat_q = pd.merge(globals()['df_{}'.format(var_list[0])], globals()['df_{}'.format(var_list[1])], how='outer', on=['alpha-2', 'quarter'])

for var in var_list[2:]:

    df_eustat_q = pd.merge(df_eustat_q, globals()['df_{}'.format(var)], how='outer', on=['alpha-2', 'quarter'])

In [101]:
# replace the alpha-2

# Greece
df_eustat_q = df_eustat_q.replace({'alpha-2':'EL'}, 'GR')

In [102]:
# update the wb isocode data

df_isocode = pd.read_csv('df_isocode.csv', encoding='utf-8').drop(columns=['country', 'numeric'])
df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8')

df_wb_isocode = pd.merge(df_wb_isocode, df_isocode, how='left', on=['alpha-3'])
df_wb_isocode = df_wb_isocode[['country', 'alpha-3', 'alpha-2']].rename(columns={'alpha-3':'isocode'})

In [103]:
# merge the wb isocode data

df_eustat_q = pd.merge(df_wb_isocode, df_eustat_q, how='right', on=['alpha-2']).drop(columns=['alpha-2'])
df_eustat_q = df_eustat_q.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)
df_eustat_q['quarter'] = pd.to_datetime(df_eustat_q['quarter'])
df_eustat_q['quarter'] = df_eustat_q['quarter'].dt.to_period('Q')

In [104]:
# eustat quarterly dataset

df_eustat_q

Unnamed: 0,country,isocode,quarter,eustat_q_ggdebt,eustat_q_nggexp,eustat_q_gginv
0,Austria,AUT,2000Q1,70.20,,
1,Austria,AUT,2000Q2,70.80,,
2,Austria,AUT,2000Q3,71.40,,
3,Austria,AUT,2000Q4,66.10,,
4,Austria,AUT,2001Q1,69.90,18.70,1.90
...,...,...,...,...,...,...
2972,Sweden,SWE,2021Q1,39.20,26.00,4.00
2973,Sweden,SWE,2021Q2,37.60,25.70,4.60
2974,Sweden,SWE,2021Q3,35.90,26.20,4.50
2975,Sweden,SWE,2021Q4,36.30,25.90,5.70


### **4.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|government consolidated gross debt [S13: general government]|eustat_q_ggdebt|% of GDP|quarterly|GOV_10Q_GGDEBT [GD]|-|PC_GDP|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGDEBT__custom_3260679/default/table?lang=en|
|2|final consumption expenditure [S13: general government]|eustat_q_nggexp|% of GDP|quarterly|GOV_10Q_GGNFA [P3]|-|PC_GDP / NSA|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGNFA__custom_3263185/default/table?lang=en|
|3|gross fixed capital formation [S13: general government]|eustat_q_gginv|% of GDP|quarterly|GOV_10Q_GGNFA [P51G]|-|PC_GDP / NSA|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGNFA__custom_3263870/default/table?lang=en|

### **4.3. data summary** <br/>

In [105]:
# number of countries & sample period

print('number of countries:', df_eustat_q['isocode'].unique().shape[0])
print('start:', np.min(df_eustat_q['quarter'].unique()))
print('end:', np.max(df_eustat_q['quarter'].unique()))

number of countries: 30
start: 1980Q1
end: 2022Q1


In [106]:
# non-null count by each variables

df_eustat_q.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2977 entries, 0 to 2976
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype        
---  ------           --------------  -----        
 0   country          2977 non-null   object       
 1   isocode          2977 non-null   object       
 2   quarter          2977 non-null   period[Q-DEC]
 3   eustat_q_ggdebt  2578 non-null   float64      
 4   eustat_q_nggexp  2878 non-null   float64      
 5   eustat_q_gginv   2878 non-null   float64      
dtypes: float64(3), object(2), period[Q-DEC](1)
memory usage: 139.7+ KB


In [107]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_eustat_q.describe(), 2)

Unnamed: 0,eustat_q_ggdebt,eustat_q_nggexp,eustat_q_gginv
count,2578.0,2878.0,2878.0
mean,59.2,19.79,3.73
std,35.16,3.68,1.43
min,3.4,2.0,0.5
25%,35.9,17.7,2.8
50%,52.4,19.7,3.6
75%,75.6,22.4,4.4
max,209.3,29.7,17.1


## **5. IMF**

### **5.1. python code** <br/>

In [108]:
# import the raw data

df_imf_q_1 = pd.read_csv('df_imf_q_1.csv').sort_values(by=['country', 'quarter']).reset_index(drop=True)
df_imf_q_2 = pd.read_csv('df_imf_q_2.csv').sort_values(by=['country', 'quarter']).reset_index(drop=True)
df_imf_q_3 = pd.read_csv('df_imf_q_3.csv').sort_values(by=['country', 'quarter']).reset_index(drop=True)

In [109]:
# merge the raw data

df_imf_q = pd.merge(df_imf_q_1, df_imf_q_2, how='outer', on=['country', 'quarter'])
df_imf_q = pd.merge(df_imf_q, df_imf_q_3, how='outer', on=['country', 'quarter'])
df_imf_q = df_imf_q.sort_values(by=['country', 'quarter']).reset_index(drop=True)

In [110]:
# filter

cond1 = (df_imf_q['country'] == 'Advanced Economies')
cond2 = (df_imf_q['country'] == 'Anguilla')
cond3 = (df_imf_q['country'] == 'Cura?ao, Kingdom of the Netherlands')
cond4 = (df_imf_q['country'] == 'Czechoslovakia')
cond5 = (df_imf_q['country'] == 'Emerging and Developing Asia')
cond6 = (df_imf_q['country'] == 'Emerging and Developing Countries')
cond7 = (df_imf_q['country'] == 'Euro Area')
cond8 = (df_imf_q['country'] == 'Netherlands Antilles')
cond9 = (df_imf_q['country'] == 'Sub-Saharan Africa')
cond10 = (df_imf_q['country'] == 'Taiwan Province of China')
cond11 = (df_imf_q['country'] == 'West African Economic and Monetary Union (WAEMU)')
cond12 = (df_imf_q['country'] == 'West Bank and Gaza')
cond13 = (df_imf_q['country'] == 'Western Hemisphere')
cond14 = (df_imf_q['country'] == 'World')
cond15 = (df_imf_q['country'] == 'Yugoslavia')
cond16 = (df_imf_q['country'] == 'Montserrat')
cond17 = (df_imf_q['country'] == 'Eritrea, The State of')
cond18 = (df_imf_q['country'] == 'Cura?ao and Sint Maarten')
cond19 = (df_imf_q['country'] == 'Eastern Caribbean Currency Union (ECCU)')
cond20 = (df_imf_q['country'] == 'Central African Economic and Monetary Community')
cond21 = (df_imf_q['country'] == 'Guernsey')
cond22 = (df_imf_q['country'] == 'Jersey')
cond23 = (df_imf_q['country'] == 'Reunion')
cond24 = (df_imf_q['country'] == "Yemen, People's Dem. Rep. of")
cond25 = (df_imf_q['country'] == 'Yemen Arab Rep.')

df_imf_q = df_imf_q.loc[~cond1 & ~cond2 & ~cond3 & ~cond4 & ~cond5 & ~cond6 & ~cond7 & ~cond8 & ~cond9 & ~cond10 & ~cond11 & ~cond12 & ~cond13 & ~cond14 & ~cond15 & ~cond16 & ~cond17 & ~cond18 & ~cond19 & ~cond20 & ~cond21 & ~cond22 & ~cond23 & ~cond24 & ~cond25]

In [111]:
# replace the country

df_imf_q = df_imf_q.replace({'country':'Afghanistan, Islamic Rep. of'}, 'Afghanistan')
df_imf_q = df_imf_q.replace({'country':'Armenia, Rep. of'}, 'Armenia')
df_imf_q = df_imf_q.replace({'country':'Aruba, Kingdom of the Netherlands'}, 'Aruba')
df_imf_q = df_imf_q.replace({'country':'Azerbaijan, Rep. of'}, 'Azerbaijan')
df_imf_q = df_imf_q.replace({'country':'Bahrain, Kingdom of'}, 'Bahrain')
df_imf_q = df_imf_q.replace({'country':'Belarus, Rep. of'}, 'Belarus')
df_imf_q = df_imf_q.replace({'country':"C?te d'Ivoire"}, "Cote d'Ivoire")
df_imf_q = df_imf_q.replace({'country':'Central African Rep.'}, 'Central African Republic')
df_imf_q = df_imf_q.replace({'country':'China, P.R.: Hong Kong'}, 'Hong Kong SAR, China')
df_imf_q = df_imf_q.replace({'country':'China, P.R.: Mainland'}, 'China')
df_imf_q = df_imf_q.replace({'country':'Comoros, Union of the'}, 'Comoros')
df_imf_q = df_imf_q.replace({'country':'Congo, Dem. Rep. of the'}, 'Congo, Dem. Rep.')
df_imf_q = df_imf_q.replace({'country':'Congo, Rep. of'}, 'Congo, Rep.')
df_imf_q = df_imf_q.replace({'country':'Croatia, Rep. of'}, 'Croatia')
df_imf_q = df_imf_q.replace({'country':'Czech Rep.'}, 'Czech Republic')
df_imf_q = df_imf_q.replace({'country':'Dominican Rep.'}, 'Dominican Republic')
df_imf_q = df_imf_q.replace({'country':'Egypt, Arab Rep. of'}, 'Egypt, Arab Rep.')
df_imf_q = df_imf_q.replace({'country':'Equatorial Guinea, Rep. of'}, 'Equatorial Guinea')
df_imf_q = df_imf_q.replace({'country':'Estonia, Rep. of'}, 'Estonia')
df_imf_q = df_imf_q.replace({'country':'Eswatini, Kingdom of'}, 'Eswatini')
df_imf_q = df_imf_q.replace({'country':'Ethiopia, The Federal Dem. Rep. of'}, 'Ethiopia')
df_imf_q = df_imf_q.replace({'country':'Fiji, Rep. of'}, 'Fiji')
df_imf_q = df_imf_q.replace({'country':'Iran, Islamic Rep. of'}, 'Iran, Islamic Rep.')
df_imf_q = df_imf_q.replace({'country':'Kazakhstan, Rep. of'}, 'Kazakhstan')
df_imf_q = df_imf_q.replace({'country':'Korea, Rep. of'}, 'Korea, Rep.')
df_imf_q = df_imf_q.replace({'country':'Kosovo, Rep. of'}, 'Kosovo')
df_imf_q = df_imf_q.replace({'country':'Kyrgyz Rep.'}, 'Kyrgyz Republic')
df_imf_q = df_imf_q.replace({'country':"Lao People's Dem. Rep."}, 'Lao PDR')
df_imf_q = df_imf_q.replace({'country':'Lesotho, Kingdom of'}, 'Lesotho')
df_imf_q = df_imf_q.replace({'country':'China, P.R.: Macao'}, 'Macao SAR, China')
df_imf_q = df_imf_q.replace({'country':'Madagascar, Rep. of'}, 'Madagascar')
df_imf_q = df_imf_q.replace({'country':'Mauritania, Islamic Rep. of'}, 'Mauritania')
df_imf_q = df_imf_q.replace({'country':'Micronesia, Federated States of'}, 'Micronesia, Fed. Sts.')
df_imf_q = df_imf_q.replace({'country':'Moldova, Rep. of'}, 'Moldova')
df_imf_q = df_imf_q.replace({'country':'Mozambique, Rep. of'}, 'Mozambique')
df_imf_q = df_imf_q.replace({'country':'Nauru, Rep. of'}, 'Nauru')
df_imf_q = df_imf_q.replace({'country':'Netherlands, The'}, 'Netherlands')
df_imf_q = df_imf_q.replace({'country':'North Macedonia, Republic of'}, 'North Macedonia')
df_imf_q = df_imf_q.replace({'country':'Palau, Rep. of'}, 'Palau')
df_imf_q = df_imf_q.replace({'country':'Poland, Rep. of'}, 'Poland')
df_imf_q = df_imf_q.replace({'country':'S?o Tom? and Pr?ncipe, Dem. Rep. of'}, 'Sao Tome and Principe')
df_imf_q = df_imf_q.replace({'country':'San Marino, Rep. of'}, 'San Marino')
df_imf_q = df_imf_q.replace({'country':'Serbia, Rep. of'}, 'Serbia')
df_imf_q = df_imf_q.replace({'country':'Sint Maarten, Kingdom of the Netherlands'}, 'Sint Maarten (Dutch part)')
df_imf_q = df_imf_q.replace({'country':'Slovak Rep.'}, 'Slovak Republic')
df_imf_q = df_imf_q.replace({'country':'Slovenia, Rep. of'}, 'Slovenia')
df_imf_q = df_imf_q.replace({'country':'South Sudan, Rep. of'}, 'South Sudan')
df_imf_q = df_imf_q.replace({'country':'Syrian Arab Rep.'}, 'Syrian Arab Republic')
df_imf_q = df_imf_q.replace({'country':'Tajikistan, Rep. of'}, 'Tajikistan')
df_imf_q = df_imf_q.replace({'country':'Tanzania, United Rep. of'}, 'Tanzania')
df_imf_q = df_imf_q.replace({'country':'Timor-Leste, Dem. Rep. of'}, 'Timor-Leste')
df_imf_q = df_imf_q.replace({'country':'Turkey'}, 'Turkiye')
df_imf_q = df_imf_q.replace({'country':'Uzbekistan, Rep. of'}, 'Uzbekistan')
df_imf_q = df_imf_q.replace({'country':'Venezuela, Rep. Bolivariana de'}, 'Venezuela, RB')
df_imf_q = df_imf_q.replace({'country':'Yemen, Rep. of'}, 'Yemen, Rep.')
df_imf_q = df_imf_q.replace({'country':'Andorra, Principality of'}, 'Andorra')

In [112]:
# merge the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8').rename(columns={'alpha-3':'isocode'})
df_imf_q = df_imf_q[['country', 'quarter', 'imf_q_ggdebt', 'imf_q_nggexp', 'imf_q_rggexp', 'imf_q_ngdp', 'imf_q_rgdp', 'imf_q_exp', 'imf_q_pexp', 'imf_q_gcf', 'imf_q_gfcf', 'imf_q_cpi', 'imf_q_gdpd', 'imf_q_neer', 'imf_q_reer', 'imf_q_ca', 'imf_q_nebgs', 'imf_q_rebgs', 'imf_q_stir_mm', 'imf_q_stir_3mtb', 'imf_q_stir_mp', 'imf_q_USD_DCU_ep', 'imf_q_USD_DCU_pa']]
df_imf_q = pd.merge(df_wb_isocode, df_imf_q, how='right', on=['country'])
df_imf_q = df_imf_q.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)
df_imf_q['quarter'] = pd.to_datetime(df_imf_q['quarter'])
df_imf_q['quarter'] = df_imf_q['quarter'].dt.to_period('Q')

In [113]:
# imf quarterly dataset

df_imf_q

Unnamed: 0,country,isocode,quarter,imf_q_ggdebt,imf_q_nggexp,imf_q_rggexp,imf_q_ngdp,imf_q_rgdp,imf_q_exp,imf_q_pexp,...,imf_q_neer,imf_q_reer,imf_q_ca,imf_q_nebgs,imf_q_rebgs,imf_q_stir_mm,imf_q_stir_3mtb,imf_q_stir_mp,imf_q_USD_DCU_ep,imf_q_USD_DCU_pa
0,Aruba,ABW,1986Q1,,,,,,,,...,,,6759776.54,,,6.60,,,0.56,0.56
1,Aruba,ABW,1986Q2,,,,,,,,...,,,1173184.36,,,5.87,,,0.56,0.56
2,Aruba,ABW,1986Q3,,,,,,,,...,,,-14972067.04,,,4.93,,,0.56,0.56
3,Aruba,ABW,1986Q4,,,,,,,,...,,,-11620111.73,,,4.60,,,0.56,0.56
4,Aruba,ABW,1987Q1,,,,,,,,...,,,20279329.61,,,4.97,,,0.56,0.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47466,Zimbabwe,ZWE,2021Q1,,,,,,,,...,,,,,,,,,0.01,0.01
47467,Zimbabwe,ZWE,2021Q2,,,,,,,,...,,,,,,,,,0.01,0.01
47468,Zimbabwe,ZWE,2021Q3,,,,,,,,...,,,,,,,,,0.01,0.01
47469,Zimbabwe,ZWE,2021Q4,,,,,,,,...,,,,,,,,,0.01,0.01


### **5.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|fiscal, general government, assets and liabilities, debt|imf_q_ggdebt|DCU|quarterly|GG_GALM_G01_XDC|-|market val.|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|2|general government final consumption expenditure, nominal|imf_q_nggexp|DCU|quarterly|NCGG_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|3|general government final consumption expenditure, real|imf_q_rggexp|DCU|quarterly|NCGG_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|4|gross domestic product, nominal|imf_q_ngdp|DCU|quarterly|NGDP_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|5|gross domestic product, real|imf_q_rgdp|DCU|quarterly|NGDP_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|6|final consumption expenditure, real|imf_q_exp|DCU|quarterly|NC_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|7|private sector final consumption expenditure, real|imf_q_pexp|DCU|quarterly|NCP_R_SA_XDC|-|real, SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|8|gross capital formation, real|imf_q_gcf|DCU|quarterly|NI_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|9|gross fixed capital formation, real|imf_q_gfcf|DCU|quarterly|NFI_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|10|prices, consumer price index, all items|imf_q_cpi|index|quarterly|PCPI_IX|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|11|gross domestic product, deflator|imf_q_gdpd|index|quarterly|NGDP_D_SA_IX|-|SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|12|exchange rates, nominal effective exchange rate|imf_q_neer|index|quarterly|ENEER_IX|-|nominal|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|13|exchange rates, real effective exchange rate based on consumer price index|imf_q_reer|index|quarterly|EREER_IX|-|real|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|14|supplementary items, current account, net (excluding exceptional financing)|imf_q_ca|USD|quarterly|BCAXF_BP6_USD|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|15|external balance of goods and services, nominal|imf_q_nebgs|DCU|quarterly|NNXGS_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|16|external balance of goods and services, real|imf_q_rebgs|DCU|quarterly|NNXGS_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|17|financial, interest rates, money market|imf_q_stir_mm|percent per annum|quarterly|FIMM_PA|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|18|financial, interest rates, government securities, treasury bills, 3-month|imf_q_stir_3mtb|percent per annum|quarterly|FITB_3M_PA|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|19|financial, interest rates, monetary policy-related interest rate|imf_q_stir_mp|percent per annum|quarterly|FPOLM_PA|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|20|exchange rates, US dollar per domestic currency, end of period|imf_q_USD_DCU_ep|-|quarterly|EDNE_USD_XDC_RATE|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|21|exchange rates, US dollar per domestic currency, period average, rate|imf_q_USD_DCU_pa|-|quarterly|EDNA_USD_XDC_RATE|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|

### **5.3. data summary** <br/>

In [114]:
# number of countries & sample period

print('number of countries:', df_imf_q['isocode'].unique().shape[0])
print('start:', np.min(df_imf_q['quarter'].unique()))
print('end:', np.max(df_imf_q['quarter'].unique()))

number of countries: 200
start: 1920Q1
end: 2022Q2


In [115]:
# non-null count by each variables

df_imf_q.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47471 entries, 0 to 47470
Data columns (total 24 columns):
 #   Column            Non-Null Count  Dtype        
---  ------            --------------  -----        
 0   country           47471 non-null  object       
 1   isocode           47471 non-null  object       
 2   quarter           47471 non-null  period[Q-DEC]
 3   imf_q_ggdebt      2735 non-null   float64      
 4   imf_q_nggexp      6672 non-null   float64      
 5   imf_q_rggexp      6639 non-null   float64      
 6   imf_q_ngdp        6801 non-null   float64      
 7   imf_q_rgdp        7101 non-null   float64      
 8   imf_q_exp         5973 non-null   float64      
 9   imf_q_pexp        6459 non-null   float64      
 10  imf_q_gcf         5481 non-null   float64      
 11  imf_q_gfcf        6630 non-null   float64      
 12  imf_q_cpi         34397 non-null  float64      
 13  imf_q_gdpd        6644 non-null   float64      
 14  imf_q_neer        16528 non-null  floa

In [116]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_imf_q.describe(), 2)

Unnamed: 0,imf_q_ggdebt,imf_q_nggexp,imf_q_rggexp,imf_q_ngdp,imf_q_rgdp,imf_q_exp,imf_q_pexp,imf_q_gcf,imf_q_gfcf,imf_q_cpi,...,imf_q_neer,imf_q_reer,imf_q_ca,imf_q_nebgs,imf_q_rebgs,imf_q_stir_mm,imf_q_stir_3mtb,imf_q_stir_mp,imf_q_USD_DCU_ep,imf_q_USD_DCU_pa
count,2735.0,6672.0,6639.0,6801.0,7101.0,5973.0,6459.0,5481.0,6630.0,34397.0,...,16528.0,15051.0,16363.0,6394.0,2672.0,11563.0,12017.0,8628.0,45018.0,44843.0
mean,3585117967751.37,4289190913952.62,4061062584518.53,38973240316535.39,35120839096718.32,23651434209245.7,21906141104607.45,11936602640133.93,11282065037987.46,77.07,...,3817252747.92,114.72,-112562775.17,510296879682.83,1144155253742.51,21.03,9.07,8.23,15224232419.96,13964915904.98
std,44538464010221.6,26775376009081.16,20126290540732.3,283967799503787.9,222870148437244.0,144884769261328.28,130623365611321.06,78689853213966.86,72768852235715.89,479.12,...,134425254762.94,116.51,13269079748.31,6441353818015.24,9873062859847.3,625.4,12.83,10.13,484756522739.47,427808076362.11
min,417000000.0,39453000.0,261872261.9,267985000.0,81160000.0,84740000.0,813595200.9,-40600000000.0,148800891.2,0.0,...,0.41,15.38,-255000000000.0,-64057300000000.0,-29503000000000.0,-0.68,-1.01,-0.75,0.0,0.0
25%,24876000000.0,7665770778.75,10200000000.0,32967000000.0,46860100000.0,42600000000.0,30700000000.0,10559000000.0,11778725000.0,13.73,...,94.57,92.93,-613000000.0,-3939144601.75,-36986450000.0,3.17,3.53,3.25,0.01,0.01
50%,319000000000.0,39424850000.0,51500000000.0,182000000000.0,267000000000.0,217000000000.0,164000000000.0,67700000000.0,53360800000.0,60.4,...,102.34,100.62,-88856646.03,29500000.0,1274000000.0,5.53,6.29,5.75,0.19,0.19
75%,1310000000000.0,172357250000.0,156000000000.0,868000000000.0,820000000000.0,745000000000.0,599500000000.0,227000000000.0,188000000000.0,99.84,...,125.37,112.45,173871838.4,6909525000.0,12698825000.0,9.9,10.93,10.0,1.0,1.0
max,1020000000000000.0,393307000000000.0,230000000000000.0,4900000000000000.0,2920000000000000.0,1800000000000000.0,1600000000000000.0,939000000000000.0,911000000000000.0,29407.15,...,8470000000000.0,5988.81,133000000000.0,212207000000000.0,167532000000000.0,61846.25,374.5,350.53,40801200000000.0,35189600000000.0


## **6. FRED**

### **6.1. python code** <br/>

In [117]:
# import the raw data (ggexp)

start_time = dt.datetime(1960, 1, 1)
end_time = dt.datetime(2022, 4, 1)

var_name_list = ['NCGGRSAXDCJPQ', 'NCGGRSAXDCDEQ', 'NCGGRSAXDCAUQ', 'NCGGRSAXDCFRQ', 'NCGGRSAXDCGBQ', 
                 'NCGGRSAXDCCAQ', 'NCGGRSAXDCITQ', 'NCGGRSAXDCPLQ', 'NCGGRSAXDCESQ', 'NCGGRSAXDCKRQ', 
                 'NCGGRSAXDCUSQ', 'NCGGRSAXDCBRQ', 'NCGGRSAXDCTRQ', 'NCGGRSAXDCARQ', 'NCGGRSAXDCMXQ', 
                 'NCGGRSAXDCIDQ', 'NCGGRSAXDCZAQ']

var_list = ['JPN', 'DEU', 'AUS', 'FRA', 'GBR', 
            'CAN', 'ITA', 'POL', 'EST', 'KOR', 
            'USA', 'BRA', 'TUR', 'ARG', 'MEX', 
            'IDN', 'ZAF']

for i in range(0, len(var_list)):

    globals()['df_{}'.format(var_list[i])] = DataReader(var_name_list[i], 'fred', start=start_time, end=end_time)[[var_name_list[i]]].reset_index().rename(columns={var_name_list[i]: var_list[i], 'DATE':'quarter'})

In [118]:
# merge the raw data (ggexp)

df_fred_q_ggexp = pd.merge(globals()['df_{}'.format(var_list[0])], globals()['df_{}'.format(var_list[1])], how='outer', on=['quarter'])

for var in var_list[2:]:

    df_fred_q_ggexp = pd.merge(df_fred_q_ggexp, globals()['df_{}'.format(var)], how='outer', on=['quarter'])

df_fred_q_ggexp = df_fred_q_ggexp.sort_values(by=['quarter']).reset_index(drop=True)
df_fred_q_ggexp = df_fred_q_ggexp.set_index('quarter').transpose()
df_fred_q_ggexp = pd.DataFrame(df_fred_q_ggexp.stack()).reset_index().rename(columns={'level_0':'isocode', 0:'fred_q_rggexp'})

In [119]:
# import the raw data (ca)

start_time = dt.datetime(1960, 1, 1)
end_time = dt.datetime(2022, 4, 1)

var_name_list = ['USAB6BLTT02STSAQ', 'DEUB6BLTT02STSAQ', 'TURB6BLTT02STSAQ', 'CHNB6BLTT02STSAQ', 'JPNB6BLTT02STSAQ', 
                 'GRCB6BLTT02STSAQ', 'INDB6BLTT02STSAQ', 'ZAFB6BLTT02STSAQ', 'GBRB6BLTT02STSAQ', 'ESTB6BLTT02STSAQ',
                 'DNKB6BLTT02STSAQ', 'ISLB6BLTT02STSAQ', 'IDNB6BLTT02STSAQ', 'RUSB6BLTT02STSAQ', 'PRTB6BLTT02STSAQ',
                 'LUXB6BLTT02STSAQ', 'LTUB6BLTT02STSAQ', 'FRAB6BLTT02STSAQ', 'CANB6BLTT02STSAQ', 'AUSB6BLTT02STSAQ',
                 'COLB6BLTT02STSAQ', 'ITAB6BLTT02STSAQ', 'FINB6BLTT02STSAQ', 'LVAB6BLTT02STSAQ', 'NLDB6BLTT02STSAQ',
                 'MEXB6BLTT02STSAQ', 'IRLB6BLTT02STSAQ', 'SVNB6BLTT02STSAQ', 'POLB6BLTT02STSAQ', 'ISRB6BLTT02STSAQ',
                 'ESPB6BLTT02STSAQ', 'HUNB6BLTT02STSAQ', 'BELB6BLTT02STSAQ', 'ARGB6BLTT02STSAQ', 'SWEB6BLTT02STSAQ',
                 'AUTB6BLTT02STSAQ', 'CHLB6BLTT02STSAQ', 'CHEB6BLTT02STSAQ', 'NORB6BLTT02STSAQ', 'NZLB6BLTT02STSAQ',
                 'KORB6BLTT02STSAQ', 'CRIB6BLTT02STSAQ', 'SVKB6BLTT02STSAQ', 'CZEB6BLTT02STSAQ']
                 # ISRB6BLTT02STSAQ: not seasonally-adjusted

var_list = ['USA', 'DEU', 'TUR', 'CHN', 'JPN', 
            'GRC', 'IND', 'ZAF', 'GBR', 'EST',
            'DNK', 'ISL', 'IDN', 'RUS', 'PRT',
            'LUX', 'LTU', 'FRA', 'CAN', 'AUS',
            'COL', 'ITA', 'FIN', 'LVA', 'NLD',
            'MEX', 'IRL', 'SVN', 'POL', 'ISR',
            'ESP', 'HUN', 'BEL', 'ARG', 'SWE',
            'AUT', 'CHL', 'CHE', 'NOR', 'NZL',
            'KOR', 'CRI', 'SVK', 'CZE']
            # ISR: not seasonally-adjusted

for i in range(0, len(var_list)):

    globals()['df_{}'.format(var_list[i])] = DataReader(var_name_list[i], 'fred', start=start_time, end=end_time)[[var_name_list[i]]].reset_index().rename(columns={var_name_list[i]: var_list[i], 'DATE':'quarter'})

In [120]:
# merge the raw data (ca)

df_fred_q_ca = pd.merge(globals()['df_{}'.format(var_list[0])], globals()['df_{}'.format(var_list[1])], how='outer', on=['quarter'])

for var in var_list[2:]:

    df_fred_q_ca = pd.merge(df_fred_q_ca, globals()['df_{}'.format(var)], how='outer', on=['quarter'])

df_fred_q_ca = df_fred_q_ca.sort_values(by=['quarter']).reset_index(drop=True)
df_fred_q_ca = df_fred_q_ca.set_index('quarter').transpose()
df_fred_q_ca = pd.DataFrame(df_fred_q_ca.stack()).reset_index().rename(columns={'level_0':'isocode', 0:'fred_q_ca'})

In [121]:
# merge the data

df_fred_q = pd.merge(df_fred_q_ggexp, df_fred_q_ca, how='outer', on=['isocode', 'quarter'])
df_fred_q = df_fred_q.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

In [122]:
# merge the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8').rename(columns={'alpha-3':'isocode'})
df_fred_q = pd.merge(df_wb_isocode, df_fred_q, how='right', on=['isocode'])
df_fred_q = df_fred_q.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)
df_fred_q['quarter'] = pd.to_datetime(df_fred_q['quarter'])
df_fred_q['quarter'] = df_fred_q['quarter'].dt.to_period('Q')

In [123]:
# fred quarterly dataset

df_fred_q

Unnamed: 0,country,isocode,quarter,fred_q_rggexp,fred_q_ca
0,Argentina,ARG,2004Q1,13129.30,
1,Argentina,ARG,2004Q2,13231.40,
2,Argentina,ARG,2004Q3,13624.90,
3,Argentina,ARG,2004Q4,13933.20,
4,Argentina,ARG,2005Q1,14412.30,
...,...,...,...,...,...
5502,South Africa,ZAF,2021Q1,222069.40,3.84
5503,South Africa,ZAF,2021Q2,222850.40,5.58
5504,South Africa,ZAF,2021Q3,224008.70,2.99
5505,South Africa,ZAF,2021Q4,224479.80,2.30


### **6.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|real general government final consumption expenditure|fred_q_rggexp|DCU|quarterly|NCGGRSAXDC + isocode [alpha-2] + Q|-|real / SA|https://fred.stlouisfed.org/searchresults?st=real+general+government+final+consumption+expenditure|
|2|balance of payments BPM6: current account balance: total: total balance|fred_q_ca|% of GDP|quarterly|isocode [alpha-2] + B6BLTT02STSA + Q|-|SA|https://fred.stlouisfed.org/searchresults?st=BPM6%3A+current+account+balance+%25&pageID=1|

### **6.3. data summary**

In [124]:
# number of countries & sample period

print('number of countries:', df_fred_q['isocode'].unique().shape[0])
print('start:', np.min(df_fred_q['quarter'].unique()))
print('end:', np.max(df_fred_q['quarter'].unique()))

number of countries: 45
start: 1960Q1
end: 2022Q2


In [125]:
# non-null count by each variables

df_fred_q.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5507 entries, 0 to 5506
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype        
---  ------         --------------  -----        
 0   country        5507 non-null   object       
 1   isocode        5507 non-null   object       
 2   quarter        5507 non-null   period[Q-DEC]
 3   fred_q_rggexp  2409 non-null   float64      
 4   fred_q_ca      5041 non-null   float64      
dtypes: float64(2), object(2), period[Q-DEC](1)
memory usage: 215.2+ KB


In [126]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_fred_q.describe(), 2)

Unnamed: 0,fred_q_rggexp,fred_q_ca
count,2409.0,5041.0
mean,9786801.38,-0.19
std,32277310.05,5.42
min,9907.0,-56.27
25%,53232.64,-3.2
50%,83537.0,-0.52
75%,416138.4,2.71
max,230259300.0,44.17


## **7. pooled data**

### **7.1. python code** <br/>

In [127]:
# generate the quarterly pooled data

df_q_pooled = pd.merge(df_oecd_q, df_bis_q, how='outer', on=['country', 'isocode', 'quarter'])
df_q_pooled = pd.merge(df_q_pooled, df_wb_q, how='outer', on=['country', 'isocode', 'quarter'])
df_q_pooled = pd.merge(df_q_pooled, df_eustat_q, how='outer', on=['country', 'isocode', 'quarter'])
df_q_pooled = pd.merge(df_q_pooled, df_imf_q, how='outer', on=['country', 'isocode', 'quarter'])
df_q_pooled = pd.merge(df_q_pooled, df_fred_q, how='outer', on=['country', 'isocode', 'quarter'])
df_q_pooled = df_q_pooled.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

In [128]:
# generate the quarterly temp data

df_q_temp = pd.read_csv('df_q_temp.csv').set_index(['country', 'alpha-3']).transpose()
df_q_temp = pd.DataFrame(df_q_temp.stack(level=['country', 'alpha-3'])).reset_index()
df_q_temp = df_q_temp.rename(columns={'level_0':'quarter', 'alpha-3':'isocode', 0:'value'})
df_q_temp = df_q_temp[['country', 'isocode', 'quarter', 'value']].sort_values(by=['isocode', 'quarter']).reset_index(drop=True)
df_q_temp['quarter'] = pd.to_datetime(df_q_temp['quarter'])
df_q_temp['quarter'] = df_q_temp['quarter'].dt.to_period('Q')

In [129]:
# merge the quarterly pooled data with the quarterly temp data

df_q_pooled = pd.merge(df_q_pooled, df_q_temp, how='outer', on=['country', 'isocode', 'quarter']).drop(columns=['value'])

In [130]:
# quarterly pooled dataset

df_q_pooled = df_q_pooled.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

df_q_pooled

Unnamed: 0,country,isocode,quarter,oecd_q_rggexp,oecd_q_gginv,oecd_q_rgdp_dcu,oecd_q_rgdp_usd,oecd_q_ca,oecd_q_stir,bis_q_ggdebt_m,...,imf_q_ca,imf_q_nebgs,imf_q_rebgs,imf_q_stir_mm,imf_q_stir_3mtb,imf_q_stir_mp,imf_q_USD_DCU_ep,imf_q_USD_DCU_pa,fred_q_rggexp,fred_q_ca
0,Aruba,ABW,1913Q1,,,,,,,,...,,,,,,,,,,
1,Aruba,ABW,1913Q2,,,,,,,,...,,,,,,,,,,
2,Aruba,ABW,1913Q3,,,,,,,,...,,,,,,,,,,
3,Aruba,ABW,1913Q4,,,,,,,,...,,,,,,,,,,
4,Aruba,ABW,1914Q1,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95041,Zimbabwe,ZWE,2021Q2,,,,,,,,...,,,,,,,0.01,0.01,,
95042,Zimbabwe,ZWE,2021Q3,,,,,,,,...,,,,,,,0.01,0.01,,
95043,Zimbabwe,ZWE,2021Q4,,,,,,,,...,,,,,,,0.01,0.01,,
95044,Zimbabwe,ZWE,2022Q1,,,,,,,,...,,,,,,,0.01,0.01,,


### **7.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|$OECD$|||||||||
|1|government final consumption expenditure, volume|oecd_q_rggexp|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|2|goverment gross fixed capital formation, volume|oecd_q_gginv|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|3|gross domestic product, volume, market prices|oecd_q_rgdp_dcu|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|4|gross domestic product, volume in USD, constant exchange rates|oecd_q_rgdp_usd|USD, 2015|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|5|current account balance as a percentage of GDP|oecd_q_ca|% of GDP|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|6|short-term interest rate|oecd_q_stir|% per annum|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|$BIS$|||||||||
|1|total credit to the government sector at market value (core debt)|bis_q_ggdebt_m|% of GDP|quarterly|CRE|-|market value|https://stats.bis.org/statx/srs/table/f5.1|
|2|total credit to the government sector at nominal value (core debt)|bis_q_ggdebt_n|% of GDP|quarterly|CRE|-|nominal value|https://stats.bis.org/statx/srs/table/f5.4|
|3|consumer price index - BIS spliced, not seasonally adjusted|bis_q_cpi|2010=100|quarterly|CPI|-|-|https://stats.bis.org/statx/srs/table/k1|
|4|central bank policy rates|bis_q_cbpr|%|quarterly|CBP|-|-|https://stats.bis.org/statx/srs/table/l1|
|5|real effective exchange rate, broad(60 economies) indicies|bis_q_reer_b|2010=100|quarterly|EER|-|-|https://stats.bis.org/statx/srs/table/i2?m=B|
|6|real effective exchange rate, narrow(27 economies) indicies|bis_q_reer_n|2010=100|quarterly|EER|-|-|https://stats.bis.org/statx/srs/table/i2?m=B|
|$World Bank$|||||||||
|1|Gross PSD, General Gov.-D1, All maturities, Debt securities + loans, Nominal Value, % of GDP|wb_q_ggdebt_d1|% of GDP|quarterly|DP.DOD.DLD1.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|2|Gross PSD, General Gov.-D2, All maturities, D1+ SDRs + currency and deposits, Nominal Value, % of GDP|wb_q_ggdebt_d2|% of GDP|quarterly|DP.DOD.DLD2.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|3|Gross PSD, General Gov.-D3, All maturities, D2+other accounts payable, Nominal Value, % of GDP|wb_q_ggdebt_d3|% of GDP|quarterly|DP.DOD.DLD3.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|4|Gross PSD, General Gov.-D4, All maturities, D3+insurance, pensions, and standardized guarantees, Nominal Value, % of GDP|wb_q_ggdebt_d4|% of GDP|quarterly|DP.DOD.DLD2.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|$Eurostat$|||||||||
|1|government consolidated gross debt [S13: general government]|eustat_q_ggdebt|% of GDP|quarterly|GOV_10Q_GGDEBT [GD]|-|PC_GDP|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGDEBT__custom_3260679/default/table?lang=en|
|2|final consumption expenditure [S13: general government]|eustat_q_nggexp|% of GDP|quarterly|GOV_10Q_GGNFA [P3]|-|PC_GDP / NSA|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGNFA__custom_3263185/default/table?lang=en|
|3|gross fixed capital formation [S13: general government]|eustat_q_gginv|% of GDP|quarterly|GOV_10Q_GGNFA [P51G]|-|PC_GDP / NSA|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGNFA__custom_3263870/default/table?lang=en|
|$IMF$|||||||||
|1|fiscal, general government, assets and liabilities, debt|imf_q_ggdebt|DCU|quarterly|GG_GALM_G01_XDC|-|market val.|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|2|general government final consumption expenditure, nominal|imf_q_nggexp|DCU|quarterly|NCGG_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|3|general government final consumption expenditure, real|imf_q_rggexp|DCU|quarterly|NCGG_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|4|gross domestic product, nominal|imf_q_ngdp|DCU|quarterly|NGDP_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|5|gross domestic product, real|imf_q_rgdp|DCU|quarterly|NGDP_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|6|final consumption expenditure, real|imf_q_exp|DCU|quarterly|NC_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|7|private sector final consumption expenditure, real|imf_q_pexp|DCU|quarterly|NCP_R_SA_XDC|-|real, SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|8|gross capital formation, real|imf_q_gcf|DCU|quarterly|NI_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|9|gross fixed capital formation, real|imf_q_gfcf|DCU|quarterly|NFI_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|10|prices, consumer price index, all items|imf_q_cpi|index|quarterly|PCPI_IX|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|11|gross domestic product, deflator|imf_q_gdpd|index|quarterly|NGDP_D_SA_IX|-|SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|12|exchange rates, nominal effective exchange rate|imf_q_neer|index|quarterly|ENEER_IX|-|nominal|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|13|exchange rates, real effective exchange rate based on consumer price index|imf_q_reer|index|quarterly|EREER_IX|-|real|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|14|supplementary items, current account, net (excluding exceptional financing)|imf_q_ca|USD|quarterly|BCAXF_BP6_USD|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|15|external balance of goods and services, nominal|imf_q_nebgs|DCU|quarterly|NNXGS_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|16|external balance of goods and services, real|imf_q_rebgs|DCU|quarterly|NNXGS_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|17|financial, interest rates, money market|imf_q_stir_mm|percent per annum|quarterly|FIMM_PA|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|18|financial, interest rates, government securities, treasury bills, 3-month|imf_q_stir_3mtb|percent per annum|quarterly|FITB_3M_PA|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|19|financial, interest rates, monetary policy-related interest rate|imf_q_stir_mp|percent per annum|quarterly|FPOLM_PA|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|20|exchange rates, US dollar per domestic currency, end of period|imf_q_USD_DCU_ep|-|quarterly|EDNE_USD_XDC_RATE|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|21|exchange rates, US dollar per domestic currency, period average, rate|imf_q_USD_DCU_pa|-|quarterly|EDNA_USD_XDC_RATE|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|$FRED$|||||||||
|1|real general government final consumption expenditure|fred_q_rggexp|DCU|quarterly|NCGGRSAXDC + isocode [alpha-2] + Q|-|real / SA|https://fred.stlouisfed.org/searchresults?st=real+general+government+final+consumption+expenditure|
|2|balance of payments BPM6: current account balance: total: total balance|fred_q_ca|% of GDP|quarterly|isocode [alpha-2] + B6BLTT02STSA + Q|-|SA|https://fred.stlouisfed.org/searchresults?st=BPM6%3A+current+account+balance+%25&pageID=1|

### **7.3. data summary**

In [131]:
# number of countries & sample period

print('number of countries:', df_q_pooled['isocode'].unique().shape[0])
print('start:', np.min(df_q_pooled['quarter'].unique()))
print('end:', np.max(df_q_pooled['quarter'].unique()))

number of countries: 217
start: 1913Q1
end: 2022Q2


In [132]:
# non-null count by each variables

df_q_pooled.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95046 entries, 0 to 95045
Data columns (total 45 columns):
 #   Column            Non-Null Count  Dtype        
---  ------            --------------  -----        
 0   country           95046 non-null  object       
 1   isocode           95046 non-null  object       
 2   quarter           95046 non-null  period[Q-DEC]
 3   oecd_q_rggexp     4596 non-null   float64      
 4   oecd_q_gginv      2574 non-null   float64      
 5   oecd_q_rgdp_dcu   4712 non-null   float64      
 6   oecd_q_rgdp_usd   4712 non-null   float64      
 7   oecd_q_ca         3710 non-null   float64      
 8   oecd_q_stir       4324 non-null   float64      
 9   bis_q_ggdebt_m    3193 non-null   float64      
 10  bis_q_ggdebt_n    4837 non-null   float64      
 11  bis_q_cpi         14629 non-null  float64      
 12  bis_q_cbpr        5616 non-null   object       
 13  bis_q_reer_b      6612 non-null   float64      
 14  bis_q_reer_n      5850 non-null   floa

In [133]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_q_pooled.describe(), 2)

Unnamed: 0,oecd_q_rggexp,oecd_q_gginv,oecd_q_rgdp_dcu,oecd_q_rgdp_usd,oecd_q_ca,oecd_q_stir,bis_q_ggdebt_m,bis_q_ggdebt_n,bis_q_cpi,bis_q_reer_b,...,imf_q_ca,imf_q_nebgs,imf_q_rebgs,imf_q_stir_mm,imf_q_stir_3mtb,imf_q_stir_mp,imf_q_USD_DCU_ep,imf_q_USD_DCU_pa,fred_q_rggexp,fred_q_ca
count,4596.0,2574.0,4712.0,4712.0,3710.0,4324.0,3193.0,4837.0,14629.0,6612.0,...,16363.0,6394.0,2672.0,11563.0,12017.0,8628.0,45018.0,44843.0,2409.0,5041.0
mean,6242188094073.38,4110268804224.7,43267557294382.57,877952832737.61,-0.84,8.69,65.47,59.03,53.98,97.27,...,-112562775.17,510296879682.83,1144155253742.51,21.03,9.07,8.23,15224232419.96,13964915904.98,9786801.38,-0.19
std,20778616834903.2,12179893287988.37,143658966097430.34,1950298072701.03,4.48,6.33,39.45,34.31,56.99,17.94,...,13269079748.31,6441353818015.24,9873062859847.3,625.4,12.83,10.13,484756522739.47,427808076362.11,32277310.05,5.42
min,1365709967.41,-44992000000.0,8075649110.82,2478559424.72,-32.73,0.05,4.6,1.6,0.0,41.04,...,-255000000000.0,-64057300000000.0,-29503000000000.0,-0.68,-1.01,-0.75,0.0,0.0,9907.0,-56.27
25%,38324020752.54,13771695535.06,200299275061.1,106562043365.17,-3.17,4.48,38.8,34.4,7.87,90.25,...,-613000000.0,-3939144601.75,-36986450000.0,3.17,3.53,3.25,0.01,0.01,53232.64,-3.2
50%,170953346178.88,42408000000.0,820200027039.0,242521326068.86,-0.73,7.09,57.6,51.2,49.86,97.99,...,-88856646.03,29500000.0,1274000000.0,5.53,6.29,5.75,0.19,0.19,83537.0,-0.52
75%,477074229248.64,109569253028.5,2335227000000.0,759272947642.64,1.57,11.45,84.2,74.1,91.91,103.22,...,173871838.4,6909525000.0,12698825000.0,9.9,10.93,10.0,1.0,1.0,416138.4,2.71
max,180434000000000.0,73184000000000.0,1332547200000000.0,16533342932403.3,18.93,57.84,238.2,226.9,2031.98,281.92,...,133000000000.0,212207000000000.0,167532000000000.0,61846.25,374.5,350.53,40801200000000.0,35189600000000.0,230259300.0,44.17


## **8. export the data**

In [134]:
# quarterly pooled data

df_q_pooled.to_excel(excel_writer='df_q_pooled.xlsx')

## **9. references**

* https://data.oecd.org/ <br/>
* https://stats.bis.org/#ppq=XRU_D_24D;pv=1,2~4~1,0,0~both <br/>
* https://databank.worldbank.org/source/world-development-indicators/preview/on <br/>
* https://wits.worldbank.org/wits/wits/witshelp/content/codes/country_codes.htm <br/>
* https://ec.europa.eu/eurostat/en/web/main/data/database <br/>
* https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976 <br/>
* https://fred.stlouisfed.org/ <br/>