# **RA1: quarterly data**

Dong Gyun Ko <br/>
last updated: august 29, 2022 <br/>

In [7]:
!pip install finance-datareader

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [8]:
# import libraries

import numpy as np
import pandas as pd
import datetime as dt
import pandas_datareader as pdr
import pandas_datareader.data as web
from pandas_datareader import wb
import requests # python 3.6

import os
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [9]:
# set the working directory

os.chdir('/content/gdrive/MyDrive/Colab Notebooks/RA1_data')

In [10]:
# set the pandas display option

# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

# pd.options.display.max_rows = 100
# pd.options.display.max_columns = 100

## **1. OECD**

### **1.1. python code** <br/>

In [11]:
# debug HTTPConnectionPool error

from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter

retries = Retry(connect=5, read=3, redirect=3)
http_session = requests.Session()
http_session.mount('https://<yourdomain>.slack.com', HTTPAdapter(max_retries=retries))

In [12]:
# import the raw data object (EO: economic outlook)

start_time = dt.datetime(1960, 1, 1)
end_time = dt.datetime(2022, 4, 1)

df_oecd_eo_q_raw = pdr.oecd.OECDReader('EO', start=start_time, end=end_time, retry_count=3, pause=0.1, timeout=30, session=None, freq=None)
df_oecd_eo_q_raw = df_oecd_eo_q_raw.read()

df_oecd_eo_q_raw.index = pd.to_datetime(df_oecd_eo_q_raw.index)

print(type(df_oecd_eo_q_raw.index))
print(type(df_oecd_eo_q_raw.columns))

<class 'pandas.core.indexes.datetimes.DatetimeIndex'>
<class 'pandas.core.indexes.multi.MultiIndex'>


In [13]:
# import the raw data

var_name_list = ['Government final consumption expenditure, volume',
                 'Government gross fixed capital formation, volume',
                 'Gross domestic product, volume, market prices',
                 'Gross domestic product, volume in USD, constant exchange rates',
                 'Current account balance as a percentage of GDP',
                 'Short-term interest rate']

var_list = ['oecd_q_ggexp', 'oecd_q_gginv', 'oecd_q_rgdp_dcu', 'oecd_q_rgdp_usd', 'oecd_q_ca', 'oecd_q_stir']

for i in range(0, len(var_list)):

    globals()['df_{}'.format(var_list[i])] = df_oecd_eo_q_raw.xs(('Quarterly', var_name_list[i]), 
                                                                 level=('Frequency', 'Variable'), 
                                                                 axis=1)

    globals()['df_{}'.format(var_list[i])] = pd.DataFrame(globals()['df_{}'.format(var_list[i])].stack())
    globals()['df_{}'.format(var_list[i])] = globals()['df_{}'.format(var_list[i])].reset_index().rename(columns={'Country':'country', 'Time':'quarter', 0:var_list[i]})
    globals()['df_{}'.format(var_list[i])] = globals()['df_{}'.format(var_list[i])][['country', 'quarter', var_list[i]]]
    globals()['df_{}'.format(var_list[i])] = globals()['df_{}'.format(var_list[i])].sort_values(by=['country', 'quarter'])
    globals()['df_{}'.format(var_list[i])] = globals()['df_{}'.format(var_list[i])].reset_index().drop(columns=['index'])

In [14]:
# merge the raw data 

df_oecd_q = pd.merge(globals()['df_{}'.format(var_list[0])], globals()['df_{}'.format(var_list[1])], how='outer', on=['country', 'quarter'])

for var in var_list[2:]:

    df_oecd_q = pd.merge(df_oecd_q, globals()['df_{}'.format(var)], how='outer', on=['country', 'quarter'])

In [15]:
# replace the country

# Korea, Rep.
df_oecd_q = df_oecd_q.replace({'country':'Korea'}, 'Korea, Rep.')

# China
df_oecd_q = df_oecd_q.replace({'country':"China (People's Republic of)"}, 'China')

In [16]:
# filter

# countries group
cond1 = (df_oecd_q['country'] == 'Euro area (17 countries)')
cond2 = (df_oecd_q['country'] == 'OECD - Total')

df_oecd_q = df_oecd_q.loc[~cond1 & ~cond2]

In [17]:
# merge the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8').rename(columns={'alpha-3':'isocode'})
df_oecd_q = pd.merge(df_wb_isocode, df_oecd_q, how='right', on=['country'])
df_oecd_q = df_oecd_q.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)
df_oecd_q['quarter'] = pd.to_datetime(df_oecd_q['quarter'])
df_oecd_q['quarter'] = df_oecd_q['quarter'].dt.to_period('Q')

In [18]:
# oecd quarterly dataset

df_oecd_q

Unnamed: 0,country,isocode,quarter,oecd_q_ggexp,oecd_q_gginv,oecd_q_rgdp_dcu,oecd_q_rgdp_usd,oecd_q_ca,oecd_q_stir
0,Australia,AUS,1976Q1,4.396707e+10,7.237854e+09,2.864821e+11,1.944343e+11,0.582957,
1,Australia,AUS,1976Q2,4.350482e+10,6.735119e+09,2.919834e+11,1.981681e+11,-0.008483,
2,Australia,AUS,1976Q3,4.385245e+10,7.263380e+09,2.943511e+11,1.997750e+11,-2.828399,
3,Australia,AUS,1976Q4,4.487166e+10,6.835647e+09,2.995515e+11,2.033045e+11,-1.257119,
4,Australia,AUS,1977Q1,4.629342e+10,7.111847e+09,3.062456e+11,2.078478e+11,-1.161298,
...,...,...,...,...,...,...,...,...,...
5336,South Africa,ZAF,2021Q2,,,,,,8.600000
5337,South Africa,ZAF,2021Q3,,,,,,9.156667
5338,South Africa,ZAF,2021Q4,,,,,,10.156667
5339,South Africa,ZAF,2022Q1,,,,,,10.263333


In [19]:
# export the oecd quarterly data

# df_oecd_q.to_excel(excel_writer='df_oecd_q.xlsx')

### **1.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|government final consumption expenditure, volume|oecd_q_ggexp|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|2|goverment gross fixed capital formation, volume|oecd_q_gginv|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|3|gross domestic product, volume, market prices|oecd_q_rgdp_dcu|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|4|gross domestic product, volume in USD, constant exchange rates|oecd_q_rgdp_usd|USD, 2015|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|5|current account balance as a percentage of GDP|oecd_q_ca|% of GDP|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|6|short-term interest rate|oecd_q_stir|% per quarter|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|

### **1.3. data summary**

In [20]:
# number of countries & sample period

print('number of countries:', df_oecd_q['isocode'].unique().shape[0])
print('start:', np.min(df_oecd_q['quarter'].unique()))
print('end:', np.max(df_oecd_q['quarter'].unique()))

number of countries: 43
start: 1976Q1
end: 2022Q2


In [21]:
# non-null count by each variables

df_oecd_q.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5341 entries, 0 to 5340
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype        
---  ------           --------------  -----        
 0   country          5341 non-null   object       
 1   isocode          5341 non-null   object       
 2   quarter          5341 non-null   period[Q-DEC]
 3   oecd_q_ggexp     4596 non-null   float64      
 4   oecd_q_gginv     2574 non-null   float64      
 5   oecd_q_rgdp_dcu  4712 non-null   float64      
 6   oecd_q_rgdp_usd  4712 non-null   float64      
 7   oecd_q_ca        3710 non-null   float64      
 8   oecd_q_stir      4324 non-null   float64      
dtypes: float64(6), object(2), period[Q-DEC](1)
memory usage: 375.7+ KB


In [22]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_oecd_q.describe(), 2)

Unnamed: 0,oecd_q_ggexp,oecd_q_gginv,oecd_q_rgdp_dcu,oecd_q_rgdp_usd,oecd_q_ca,oecd_q_stir
count,4596.0,2574.0,4712.0,4712.0,3710.0,4324.0
mean,6242188094073.38,4110268804224.7,43267557294382.57,877952832737.61,-0.84,8.69
std,20778616834903.2,12179893287988.37,143658966097430.34,1950298072701.03,4.48,6.33
min,1365709967.41,-44992000000.0,8075649110.82,2478559424.72,-32.73,0.05
25%,38324020752.54,13771695535.06,200299275061.1,106562043365.17,-3.17,4.48
50%,170953346178.88,42408000000.0,820200027039.0,242521326068.86,-0.73,7.09
75%,477074229248.64,109569253028.5,2335227000000.0,759272947642.64,1.57,11.45
max,180434000000000.0,73184000000000.0,1332547200000000.0,16533342932403.3,18.93,57.84


## **2. BIS**

### **2.1. python code** <br/>

In [23]:
# import the raw data

var_list = ['bis_q_ggdebt_m', 'bis_q_ggdebt_n', 'bis_q_cpi', 'bis_q_cbpr', 'bis_q_reer_b', 'bis_q_reer_n']

for var in var_list:

    globals()['df_{}'.format(var)] = pd.read_csv('df_' + var + '.csv').set_index('isocode').transpose()
    globals()['df_{}'.format(var)] = pd.DataFrame(globals()['df_{}'.format(var)].stack(level='isocode')).reset_index()
    globals()['df_{}'.format(var)] = globals()['df_{}'.format(var)].rename(columns={'level_0':'quarter', 0:var})
    globals()['df_{}'.format(var)] = globals()['df_{}'.format(var)][['isocode', 'quarter', var]].sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

In [24]:
# merge the raw data 

df_bis_q = pd.merge(globals()['df_{}'.format(var_list[0])], globals()['df_{}'.format(var_list[1])], how='outer', on=['isocode', 'quarter'])

for var in var_list[2:]:

    df_bis_q = pd.merge(df_bis_q, globals()['df_{}'.format(var)], how='outer', on=['isocode', 'quarter'])

In [25]:
# merge the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8').rename(columns={'alpha-3':'isocode'})
df_bis_q = pd.merge(df_wb_isocode, df_bis_q, how='right', on=['isocode'])
df_bis_q = df_bis_q.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)
df_bis_q['quarter'] = pd.to_datetime(df_bis_q['quarter'])
df_bis_q['quarter'] = df_bis_q['quarter'].dt.to_period('Q')

In [26]:
# bis quarterly dataset

df_bis_q

Unnamed: 0,country,isocode,quarter,bis_q_ggdebt_m,bis_q_ggdebt_n,bis_q_cpi,bis_q_cbpr,bis_q_reer_b,bis_q_reer_n
0,United Arab Emirates,ARE,1994Q2,,,,,75.43,
1,United Arab Emirates,ARE,1995Q2,,,,,69.41,
2,United Arab Emirates,ARE,1996Q2,,,,,73.79,
3,United Arab Emirates,ARE,1997Q2,,,,,77.92,
4,United Arab Emirates,ARE,1998Q2,,,,,87.56,
...,...,...,...,...,...,...,...,...,...
15007,South Africa,ZAF,2017Q4,,50.30,147.58,6.75,76.44,
15008,South Africa,ZAF,2018Q4,,53.20,154.85,6.75,77.77,
15009,South Africa,ZAF,2019Q4,,57.80,160.64,6.5,77.58,
15010,South Africa,ZAF,2020Q4,,71.00,165.74,3.5,72.31,


### **2.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|total credit to the government sector at market value (core debt)|bis_q_ggdebt_m|% of GDP|quarterly|CRE|-|market value|https://stats.bis.org/statx/srs/table/f5.1|
|2|total credit to the government sector at nominal value (core debt)|bis_q_ggdebt_n|% of GDP|quarterly|CRE|-|nominal value|https://stats.bis.org/statx/srs/table/f5.4|
|3|consumer price index - BIS spliced, not seasonally adjusted|bis_q_cpi|2010=100|quarterly|CPI|-|-|https://stats.bis.org/statx/srs/table/k1|
|4|central bank policy rates|bis_q_cbpr|%|quarterly|CBP|-|-|https://stats.bis.org/statx/srs/table/l1|
|5|real effective exchange rate, broad(60 economies) indicies|bis_q_reer_b|2010=100|quarterly|EER|-|-|https://stats.bis.org/statx/srs/table/i2?m=B|
|6|real effective exchange rate, narrow(27 economies) indicies|bis_q_reer_n|2010=100|quarterly|EER|-|-|https://stats.bis.org/statx/srs/table/i2?m=B|

### **2.3. data summary**

In [27]:
# number of countries & sample period

print('number of countries:', df_bis_q['isocode'].unique().shape[0])
print('start:', np.min(df_bis_q['quarter'].unique()))
print('end:', np.max(df_bis_q['quarter'].unique()))

number of countries: 60
start: 1913Q1
end: 2022Q2


In [28]:
# non-null count by each variables

df_bis_q.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15012 entries, 0 to 15011
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype        
---  ------          --------------  -----        
 0   country         15012 non-null  object       
 1   isocode         15012 non-null  object       
 2   quarter         15012 non-null  period[Q-DEC]
 3   bis_q_ggdebt_m  3193 non-null   float64      
 4   bis_q_ggdebt_n  4837 non-null   float64      
 5   bis_q_cpi       14629 non-null  float64      
 6   bis_q_cbpr      5616 non-null   object       
 7   bis_q_reer_b    6612 non-null   float64      
 8   bis_q_reer_n    5850 non-null   float64      
dtypes: float64(5), object(3), period[Q-DEC](1)
memory usage: 1.0+ MB


In [29]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_bis_q.describe(), 2)

Unnamed: 0,bis_q_ggdebt_m,bis_q_ggdebt_n,bis_q_cpi,bis_q_reer_b,bis_q_reer_n
count,3193.0,4837.0,14629.0,6612.0,5850.0
mean,65.47,59.03,53.98,97.27,100.15
std,39.45,34.31,56.99,17.94,17.21
min,4.6,1.6,0.0,41.04,45.28
25%,38.8,34.4,7.87,90.25,90.45
50%,57.6,51.2,49.86,97.99,98.45
75%,84.2,74.1,91.91,103.22,106.97
max,238.2,226.9,2031.98,281.92,248.02


## **3. World Bank**

### **3.1. python code** <br/>

In [30]:
# import the raw data

var_list = ['wb_q_ggdebt_d1', 'wb_q_ggdebt_d2', 'wb_q_ggdebt_d3', 'wb_q_ggdebt_d4']

for var in var_list:

    globals()['df_{}'.format(var)] = pd.read_csv('df_' + var + '.csv').set_index('isocode').transpose()
    globals()['df_{}'.format(var)] = pd.DataFrame(globals()['df_{}'.format(var)].stack(level='isocode')).reset_index()
    globals()['df_{}'.format(var)] = globals()['df_{}'.format(var)].rename(columns={'level_0':'quarter', 0:var})
    globals()['df_{}'.format(var)] = globals()['df_{}'.format(var)][['isocode', 'quarter', var]].sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

In [31]:
# merge the raw data 

df_wb_q = pd.merge(globals()['df_{}'.format(var_list[0])], globals()['df_{}'.format(var_list[1])], how='outer', on=['isocode', 'quarter'])

for var in var_list[2:]:

    df_wb_q = pd.merge(df_wb_q, globals()['df_{}'.format(var)], how='outer', on=['isocode', 'quarter'])

In [32]:
# merge the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8').rename(columns={'alpha-3':'isocode'})
df_wb_q = pd.merge(df_wb_isocode, df_wb_q, how='right', on=['isocode'])
df_wb_q = df_wb_q.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

def datetime_func(x):

    result = str(x) # numerical type, not string type -> indexing is not applied"
    return result[0:6]

df_wb_q['quarter'] = pd.DataFrame(df_wb_q['quarter'].apply(datetime_func))
df_wb_q['quarter'] = pd.to_datetime(df_wb_q['quarter'])
df_wb_q['quarter'] = df_wb_q['quarter'].dt.to_period('Q')

In [33]:
# replace the value into NaN

# '..'
df_wb_q = df_wb_q.replace('..', np.nan)

# 0
df_wb_q = df_wb_q.replace(0, np.nan)

In [34]:
# set the astype

df_wb_q = df_wb_q.astype({'wb_q_ggdebt_d1':'float', 'wb_q_ggdebt_d2':'float', 'wb_q_ggdebt_d3':'float', 'wb_q_ggdebt_d4':'float'})

In [35]:
# wb quarterly dataset

df_wb_q

Unnamed: 0,country,isocode,quarter,wb_q_ggdebt_d1,wb_q_ggdebt_d2,wb_q_ggdebt_d3,wb_q_ggdebt_d4
0,Albania,ALB,1995Q1,,,,
1,Albania,ALB,1995Q2,,,,
2,Albania,ALB,1995Q3,,,,
3,Albania,ALB,1995Q4,,,,
4,Albania,ALB,1996Q1,,,,
...,...,...,...,...,...,...,...
11222,South Africa,ZAF,2021Q1,,,,
11223,South Africa,ZAF,2021Q2,,,,
11224,South Africa,ZAF,2021Q3,,,,
11225,South Africa,ZAF,2021Q4,,,,


### **3.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|Gross PSD, General Gov.-D1, All maturities, Debt securities + loans, Nominal Value, % of GDP|wb_q_ggdebt_d1|% of GDP|quarterly|DP.DOD.DLD1.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|2|Gross PSD, General Gov.-D2, All maturities, D1+ SDRs + currency and deposits, Nominal Value, % of GDP|wb_q_ggdebt_d2|% of GDP|quarterly|DP.DOD.DLD2.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|3|Gross PSD, General Gov.-D3, All maturities, D2+other accounts payable, Nominal Value, % of GDP|wb_q_ggdebt_d3|% of GDP|quarterly|DP.DOD.DLD3.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|4|Gross PSD, General Gov.-D4, All maturities, D3+insurance, pensions, and standardized guarantees, Nominal Value, % of GDP|wb_q_ggdebt_d4|% of GDP|quarterly|DP.DOD.DLD2.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|

### **3.3. data summary**

In [36]:
# number of countries & sample period

print('number of countries:', df_wb_q['isocode'].unique().shape[0])
print('start:', np.min(df_wb_q['quarter'].unique()))
print('end:', np.max(df_wb_q['quarter'].unique()))

number of countries: 103
start: 1995Q1
end: 2022Q1


In [37]:
# non-null count by each variables

df_wb_q.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11227 entries, 0 to 11226
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype        
---  ------          --------------  -----        
 0   country         11227 non-null  object       
 1   isocode         11227 non-null  object       
 2   quarter         11227 non-null  period[Q-DEC]
 3   wb_q_ggdebt_d1  3907 non-null   float64      
 4   wb_q_ggdebt_d2  3759 non-null   float64      
 5   wb_q_ggdebt_d3  2730 non-null   float64      
 6   wb_q_ggdebt_d4  651 non-null    float64      
dtypes: float64(4), object(2), period[Q-DEC](1)
memory usage: 614.1+ KB


In [38]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_wb_q.describe(), 2)

Unnamed: 0,wb_q_ggdebt_d1,wb_q_ggdebt_d2,wb_q_ggdebt_d3,wb_q_ggdebt_d4
count,3907.0,3759.0,2730.0,651.0
mean,53.02,44.04,49.76,78.62
std,34.12,35.29,35.34,40.5
min,0.0,0.0,0.0,0.0
25%,28.66,13.18,23.15,44.68
50%,46.63,41.15,47.66,76.16
75%,67.68,66.0,70.23,107.62
max,232.1,159.34,146.42,171.92


## **4. Eurostat**

### **4.1. python code** <br/>

In [39]:
# import the raw data

var_list = ['eustat_q_ggdebt', 'eustat_q_ggexp', 'eustat_q_gginv']

for var in var_list:

    globals()['df_{}'.format(var)] = pd.read_csv('df_' + var + '.csv')
    globals()['df_{}'.format(var)] = globals()['df_{}'.format(var)].sort_values(by=['alpha-2', 'quarter']).reset_index(drop=True)

In [40]:
# merge the raw data 

df_eustat_q = pd.merge(globals()['df_{}'.format(var_list[0])], globals()['df_{}'.format(var_list[1])], how='outer', on=['alpha-2', 'quarter'])

for var in var_list[2:]:

    df_eustat_q = pd.merge(df_eustat_q, globals()['df_{}'.format(var)], how='outer', on=['alpha-2', 'quarter'])

In [41]:
# replace the alpha-2

# Greece
df_eustat_q = df_eustat_q.replace({'alpha-2':'EL'}, 'GR')

In [42]:
# update the wb isocode data

df_isocode = pd.read_csv('df_isocode.csv', encoding='utf-8').drop(columns=['country', 'numeric'])
df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8')

df_wb_isocode = pd.merge(df_wb_isocode, df_isocode, how='left', on=['alpha-3'])
df_wb_isocode = df_wb_isocode[['country', 'alpha-3', 'alpha-2']].rename(columns={'alpha-3':'isocode'})

In [43]:
# merge the wb isocode data

df_eustat_q = pd.merge(df_wb_isocode, df_eustat_q, how='right', on=['alpha-2']).drop(columns=['alpha-2'])
df_eustat_q = df_eustat_q.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)
df_eustat_q['quarter'] = pd.to_datetime(df_eustat_q['quarter'])
df_eustat_q['quarter'] = df_eustat_q['quarter'].dt.to_period('Q')

In [44]:
# eustat quarterly dataset

df_eustat_q

Unnamed: 0,country,isocode,quarter,eustat_q_ggdebt,eustat_q_ggexp,eustat_q_gginv
0,Austria,AUT,2000Q1,70.20,,
1,Austria,AUT,2000Q2,70.80,,
2,Austria,AUT,2000Q3,71.40,,
3,Austria,AUT,2000Q4,66.10,,
4,Austria,AUT,2001Q1,69.90,18.70,1.90
...,...,...,...,...,...,...
2972,Sweden,SWE,2021Q1,39.20,26.00,4.00
2973,Sweden,SWE,2021Q2,37.60,25.70,4.60
2974,Sweden,SWE,2021Q3,35.90,26.20,4.50
2975,Sweden,SWE,2021Q4,36.30,25.90,5.70


### **4.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|government consolidated gross debt [S13: general government]|eustat_q_ggdebt|% of GDP|quarterly|GOV_10Q_GGDEBT [GD]|-|PC_GDP|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGDEBT__custom_3260679/default/table?lang=en|
|2|final consumption expenditure [S13: general government]|eustat_q_ggexp|% of GDP|quarterly|GOV_10Q_GGNFA [P3]|-|PC_GDP / NSA|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGNFA__custom_3263185/default/table?lang=en|
|3|gross fixed capital formation [S13: general government]|eustat_q_gginv|% of GDP|quarterly|GOV_10Q_GGNFA [P51G]|-|PC_GDP / NSA|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGNFA__custom_3263870/default/table?lang=en|

### **4.3. data summary** <br/>

In [45]:
# number of countries & sample period

print('number of countries:', df_eustat_q['isocode'].unique().shape[0])
print('start:', np.min(df_eustat_q['quarter'].unique()))
print('end:', np.max(df_eustat_q['quarter'].unique()))

number of countries: 30
start: 1980Q1
end: 2022Q1


In [46]:
# non-null count by each variables

df_eustat_q.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2977 entries, 0 to 2976
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype        
---  ------           --------------  -----        
 0   country          2977 non-null   object       
 1   isocode          2977 non-null   object       
 2   quarter          2977 non-null   period[Q-DEC]
 3   eustat_q_ggdebt  2578 non-null   float64      
 4   eustat_q_ggexp   2878 non-null   float64      
 5   eustat_q_gginv   2878 non-null   float64      
dtypes: float64(3), object(2), period[Q-DEC](1)
memory usage: 139.7+ KB


In [47]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_eustat_q.describe(), 2)

Unnamed: 0,eustat_q_ggdebt,eustat_q_ggexp,eustat_q_gginv
count,2578.0,2878.0,2878.0
mean,59.2,19.79,3.73
std,35.16,3.68,1.43
min,3.4,2.0,0.5
25%,35.9,17.7,2.8
50%,52.4,19.7,3.6
75%,75.6,22.4,4.4
max,209.3,29.7,17.1


## **5. IMF**

### **5.1. python code** <br/>

In [48]:
# import the raw data

df_imf_q = pd.read_csv('df_imf_q.csv').sort_values(by=['country', 'quarter']).reset_index(drop=True)

In [49]:
# filter

cond1 = (df_imf_q['country'] == 'Advanced Economies')
cond2 = (df_imf_q['country'] == 'Anguilla')
cond3 = (df_imf_q['country'] == 'Cura?ao, Kingdom of the Netherlands')
cond4 = (df_imf_q['country'] == 'Czechoslovakia')
cond5 = (df_imf_q['country'] == 'Emerging and Developing Asia')
cond6 = (df_imf_q['country'] == 'Emerging and Developing Countries')
cond7 = (df_imf_q['country'] == 'Euro Area')
cond8 = (df_imf_q['country'] == 'Netherlands Antilles')
cond9 = (df_imf_q['country'] == 'Sub-Saharan Africa')
cond10 = (df_imf_q['country'] == 'Taiwan Province of China')
cond11 = (df_imf_q['country'] == 'West African Economic and Monetary Union (WAEMU)')
cond12 = (df_imf_q['country'] == 'West Bank and Gaza')
cond13 = (df_imf_q['country'] == 'Western Hemisphere')
cond14 = (df_imf_q['country'] == 'World')
cond15 = (df_imf_q['country'] == 'Yugoslavia')
cond16 = (df_imf_q['country'] == 'Montserrat')

df_imf_q = df_imf_q.loc[~cond1 & ~cond2 & ~cond3 & ~cond4 & ~cond5 & ~cond6 & ~cond7 & ~cond8 & ~cond9 & ~cond10 & ~cond11 & ~cond12 & ~cond13 & ~cond14 & ~cond15 & ~cond16]

In [50]:
# replace the country

df_imf_q = df_imf_q.replace({'country':'Afghanistan, Islamic Rep. of'}, 'Afghanistan')
df_imf_q = df_imf_q.replace({'country':'Armenia, Rep. of'}, 'Armenia')
df_imf_q = df_imf_q.replace({'country':'Aruba, Kingdom of the Netherlands'}, 'Aruba')
df_imf_q = df_imf_q.replace({'country':'Azerbaijan, Rep. of'}, 'Azerbaijan')
df_imf_q = df_imf_q.replace({'country':'Bahrain, Kingdom of'}, 'Bahrain')
df_imf_q = df_imf_q.replace({'country':'Belarus, Rep. of'}, 'Belarus')
df_imf_q = df_imf_q.replace({'country':"C?te d'Ivoire"}, "Cote d'Ivoire")
df_imf_q = df_imf_q.replace({'country':'Central African Rep.'}, 'Central African Republic')
df_imf_q = df_imf_q.replace({'country':'China, P.R.: Hong Kong'}, 'Hong Kong SAR, China')
df_imf_q = df_imf_q.replace({'country':'China, P.R.: Mainland'}, 'China')
df_imf_q = df_imf_q.replace({'country':'Comoros, Union of the'}, 'Comoros')
df_imf_q = df_imf_q.replace({'country':'Congo, Dem. Rep. of the'}, 'Congo, Dem. Rep.')
df_imf_q = df_imf_q.replace({'country':'Congo, Rep. of'}, 'Congo, Rep.')
df_imf_q = df_imf_q.replace({'country':'Croatia, Rep. of'}, 'Croatia')
df_imf_q = df_imf_q.replace({'country':'Czech Rep.'}, 'Czech Republic')
df_imf_q = df_imf_q.replace({'country':'Dominican Rep.'}, 'Dominican Republic')
df_imf_q = df_imf_q.replace({'country':'Egypt, Arab Rep. of'}, 'Egypt, Arab Rep.')
df_imf_q = df_imf_q.replace({'country':'Equatorial Guinea, Rep. of'}, 'Equatorial Guinea')
df_imf_q = df_imf_q.replace({'country':'Estonia, Rep. of'}, 'Estonia')
df_imf_q = df_imf_q.replace({'country':'Eswatini, Kingdom of'}, 'Eswatini')
df_imf_q = df_imf_q.replace({'country':'Ethiopia, The Federal Dem. Rep. of'}, 'Ethiopia')
df_imf_q = df_imf_q.replace({'country':'Fiji, Rep. of'}, 'Fiji')
df_imf_q = df_imf_q.replace({'country':'Iran, Islamic Rep. of'}, 'Iran, Islamic Rep.')
df_imf_q = df_imf_q.replace({'country':'Kazakhstan, Rep. of'}, 'Kazakhstan')
df_imf_q = df_imf_q.replace({'country':'Korea, Rep. of'}, 'Korea, Rep.')
df_imf_q = df_imf_q.replace({'country':'Kosovo, Rep. of'}, 'Kosovo')
df_imf_q = df_imf_q.replace({'country':'Kyrgyz Rep.'}, 'Kyrgyz Republic')
df_imf_q = df_imf_q.replace({'country':"Lao People's Dem. Rep."}, 'Lao PDR')
df_imf_q = df_imf_q.replace({'country':'Lesotho, Kingdom of'}, 'Lesotho')
df_imf_q = df_imf_q.replace({'country':'China, P.R.: Macao'}, 'Macao SAR, China')
df_imf_q = df_imf_q.replace({'country':'Madagascar, Rep. of'}, 'Madagascar')
df_imf_q = df_imf_q.replace({'country':'Mauritania, Islamic Rep. of'}, 'Mauritania')
df_imf_q = df_imf_q.replace({'country':'Micronesia, Federated States of'}, 'Micronesia, Fed. Sts.')
df_imf_q = df_imf_q.replace({'country':'Moldova, Rep. of'}, 'Moldova')
df_imf_q = df_imf_q.replace({'country':'Mozambique, Rep. of'}, 'Mozambique')
df_imf_q = df_imf_q.replace({'country':'Nauru, Rep. of'}, 'Nauru')
df_imf_q = df_imf_q.replace({'country':'Netherlands, The'}, 'Netherlands')
df_imf_q = df_imf_q.replace({'country':'North Macedonia, Republic of'}, 'North Macedonia')
df_imf_q = df_imf_q.replace({'country':'Palau, Rep. of'}, 'Palau')
df_imf_q = df_imf_q.replace({'country':'Poland, Rep. of'}, 'Poland')
df_imf_q = df_imf_q.replace({'country':'S?o Tom? and Pr?ncipe, Dem. Rep. of'}, 'Sao Tome and Principe')
df_imf_q = df_imf_q.replace({'country':'San Marino, Rep. of'}, 'San Marino')
df_imf_q = df_imf_q.replace({'country':'Serbia, Rep. of'}, 'Serbia')
df_imf_q = df_imf_q.replace({'country':'Sint Maarten, Kingdom of the Netherlands'}, 'Sint Maarten (Dutch part)')
df_imf_q = df_imf_q.replace({'country':'Slovak Rep.'}, 'Slovak Republic')
df_imf_q = df_imf_q.replace({'country':'Slovenia, Rep. of'}, 'Slovenia')
df_imf_q = df_imf_q.replace({'country':'South Sudan, Rep. of'}, 'South Sudan')
df_imf_q = df_imf_q.replace({'country':'Syrian Arab Rep.'}, 'Syrian Arab Republic')
df_imf_q = df_imf_q.replace({'country':'Tajikistan, Rep. of'}, 'Tajikistan')
df_imf_q = df_imf_q.replace({'country':'Tanzania, United Rep. of'}, 'Tanzania')
df_imf_q = df_imf_q.replace({'country':'Timor-Leste, Dem. Rep. of'}, 'Timor-Leste')
df_imf_q = df_imf_q.replace({'country':'Turkey'}, 'Turkiye')
df_imf_q = df_imf_q.replace({'country':'Uzbekistan, Rep. of'}, 'Uzbekistan')
df_imf_q = df_imf_q.replace({'country':'Venezuela, Rep. Bolivariana de'}, 'Venezuela, RB')
df_imf_q = df_imf_q.replace({'country':'Yemen, Rep. of'}, 'Yemen, Rep.')

In [51]:
# merge the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8').rename(columns={'alpha-3':'isocode'})
df_imf_q = df_imf_q[['country', 'quarter', 'imf_q_nggexp', 'imf_q_rggexp', 'imf_q_ngdp', 'imf_q_rgdp', 'imf_q_cpi', 'imf_q_gdpd', 'imf_q_nca', 'imf_q_rca', 'imf_q_neer', 'imf_q_reer']]
df_imf_q = pd.merge(df_wb_isocode, df_imf_q, how='right', on=['country'])
df_imf_q = df_imf_q.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)
df_imf_q['quarter'] = pd.to_datetime(df_imf_q['quarter'])
df_imf_q['quarter'] = df_imf_q['quarter'].dt.to_period('Q')

In [52]:
# imf quarterly dataset

df_imf_q

Unnamed: 0,country,isocode,quarter,imf_q_nggexp,imf_q_rggexp,imf_q_ngdp,imf_q_rgdp,imf_q_cpi,imf_q_gdpd,imf_q_nca,imf_q_rca,imf_q_neer,imf_q_reer
0,Aruba,ABW,1986Q1,,,,,41.61,,,,,
1,Aruba,ABW,1986Q2,,,,,41.71,,,,,
2,Aruba,ABW,1986Q3,,,,,42.03,,,,,
3,Aruba,ABW,1986Q4,,,,,42.29,,,,,
4,Aruba,ABW,1987Q1,,,,,43.00,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
35726,Zimbabwe,ZWE,2020Q4,,,,,4113.56,,,,,
35727,Zimbabwe,ZWE,2021Q1,,,,,4641.16,,,,,
35728,Zimbabwe,ZWE,2021Q2,,,,,4984.78,,,,,
35729,Zimbabwe,ZWE,2021Q3,,,,,5520.55,,,,,


### **5.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|1|general government final consumption expenditure, norminal|df_imf_q_nggexp|DCU|quarterly|NCGG_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|2|general government final consumption expenditure, real|df_imf_q_rggexp|DCU|quarterly|NCGG_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|3|gross domestic product, norminal|df_imf_q_ngdp|DCU|quarterly|NGDP_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|4|gross domestic product, real|df_imf_q_rgdp|DCU|quarterly|NGDP_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|5|prices, consumer price index, all items|df_imf_q_cpi|index|quarterly|PCPI_IX|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|6|gross domestic product, deflator|df_imf_q_gdpd|index|quarterly|NGDP_D_SA_IX|-|SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|7|external balance of goods and services, norminal|df_imf_q_nca|DCU|quarterly|NNXGS_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|8|external balance of goods and services, real|df_imf_q_rca|DCU|quarterly|NNXGS_R_SA_XDC||real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|9|exchange rates, nominal effective exchange rate|df_imf_q_neer|index|quarterly|ENEER_IX|-|nominal|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|10|exchange rates, real effective exchange rate based on consumer price index|df_imf_q_reer|index|quarterly|EREER_IX|-|real|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|

### **5.3. data summary** <br/>

In [53]:
# number of countries & sample period

print('number of countries:', df_imf_q['isocode'].unique().shape[0])
print('start:', np.min(df_imf_q['quarter'].unique()))
print('end:', np.max(df_imf_q['quarter'].unique()))

number of countries: 190
start: 1950Q1
end: 2022Q2


In [54]:
# non-null count by each variables

df_imf_q.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35731 entries, 0 to 35730
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype        
---  ------        --------------  -----        
 0   country       35731 non-null  object       
 1   isocode       35731 non-null  object       
 2   quarter       35731 non-null  period[Q-DEC]
 3   imf_q_nggexp  6756 non-null   float64      
 4   imf_q_rggexp  6715 non-null   float64      
 5   imf_q_ngdp    6823 non-null   float64      
 6   imf_q_rgdp    7083 non-null   float64      
 7   imf_q_cpi     34150 non-null  float64      
 8   imf_q_gdpd    6630 non-null   float64      
 9   imf_q_nca     6441 non-null   float64      
 10  imf_q_rca     2745 non-null   float64      
 11  imf_q_neer    16528 non-null  float64      
 12  imf_q_reer    15051 non-null  float64      
dtypes: float64(10), object(2), period[Q-DEC](1)
memory usage: 3.5+ MB


In [55]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_imf_q.describe(), 2)

Unnamed: 0,imf_q_nggexp,imf_q_rggexp,imf_q_ngdp,imf_q_rgdp,imf_q_cpi,imf_q_gdpd,imf_q_nca,imf_q_rca,imf_q_neer,imf_q_reer
count,6756.0,6715.0,6823.0,7083.0,34150.0,6630.0,6441.0,2745.0,16528.0,15051.0
mean,4205204400311.81,4009550560139.0,38817798818308.29,35181397260593.5,77.59,91.19,511021361526.78,1126256476630.97,3817252747.92,114.72
std,26577525156456.94,20014372210197.52,283516440422157.9,223144125304839.1,480.81,102.27,6412817275696.54,9724729285105.64,134425254762.94,116.51
min,39453000.0,261872261.9,267985000.0,81160000.0,0.0,0.86,-64100000000000.0,-31000000000000.0,0.41,15.38
25%,6603421353.75,9632300018.5,31591896826.0,46900000000.0,14.47,67.04,-3793000000.0,-29975000000.0,94.57,92.93
50%,37290795000.0,50055600000.0,180000000000.0,268000000000.0,61.03,91.4,20189000.0,938000000.0,102.34,100.62
75%,164000000000.0,154000000000.0,859000000000.0,819500000000.0,99.98,107.69,6522300000.0,11649300000.0,125.37,112.45
max,393000000000000.0,230000000000000.0,4900000000000000.0,2920000000000000.0,29407.15,3553.02,212000000000000.0,168000000000000.0,8470000000000.0,5988.81


## **6. pooled data**

### **6.1. python code** <br/>

In [56]:
# generate the quarterly pooled data

df_q_pooled = pd.merge(df_oecd_q, df_bis_q, how='outer', on=['country', 'isocode', 'quarter'])
df_q_pooled = pd.merge(df_q_pooled, df_wb_q, how='outer', on=['country', 'isocode', 'quarter'])
df_q_pooled = pd.merge(df_q_pooled, df_eustat_q, how='outer', on=['country', 'isocode', 'quarter'])
df_q_pooled = pd.merge(df_q_pooled, df_imf_q, how='outer', on=['country', 'isocode', 'quarter'])
df_q_pooled = df_q_pooled.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

In [57]:
# generate the quarterly temp data

df_q_temp = pd.read_csv('df_q_temp.csv').set_index(['country', 'alpha-3']).transpose()
df_q_temp = pd.DataFrame(df_q_temp.stack(level=['country', 'alpha-3'])).reset_index()
df_q_temp = df_q_temp.rename(columns={'level_0':'quarter', 'alpha-3':'isocode', 0:'value'})
df_q_temp = df_q_temp[['country', 'isocode', 'quarter', 'value']].sort_values(by=['isocode', 'quarter']).reset_index(drop=True)
df_q_temp['quarter'] = pd.to_datetime(df_q_temp['quarter'])
df_q_temp['quarter'] = df_q_temp['quarter'].dt.to_period('Q')

In [58]:
# merge the quarterly pooled data with the quarterly temp data

df_q_pooled = pd.merge(df_q_pooled, df_q_temp, how='outer', on=['country', 'isocode', 'quarter']).drop(columns=['value'])

In [59]:
# quarterly pooled dataset

df_q_pooled = df_q_pooled.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

df_q_pooled

Unnamed: 0,country,isocode,quarter,oecd_q_ggexp,oecd_q_gginv,oecd_q_rgdp_dcu,oecd_q_rgdp_usd,oecd_q_ca,oecd_q_stir,bis_q_ggdebt_m,...,imf_q_nggexp,imf_q_rggexp,imf_q_ngdp,imf_q_rgdp,imf_q_cpi,imf_q_gdpd,imf_q_nca,imf_q_rca,imf_q_neer,imf_q_reer
0,Aruba,ABW,1913Q1,,,,,,,,...,,,,,,,,,,
1,Aruba,ABW,1913Q2,,,,,,,,...,,,,,,,,,,
2,Aruba,ABW,1913Q3,,,,,,,,...,,,,,,,,,,
3,Aruba,ABW,1913Q4,,,,,,,,...,,,,,,,,,,
4,Aruba,ABW,1914Q1,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95041,Zimbabwe,ZWE,2021Q2,,,,,,,,...,,,,,4984.78,,,,,
95042,Zimbabwe,ZWE,2021Q3,,,,,,,,...,,,,,5520.55,,,,,
95043,Zimbabwe,ZWE,2021Q4,,,,,,,,...,,,,,6497.51,,,,,
95044,Zimbabwe,ZWE,2022Q1,,,,,,,,...,,,,,,,,,,


### **6.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|$OECD$|||||||||
|1|government final consumption expenditure, volume|oecd_q_ggexp|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|2|goverment gross fixed capital formation, volume|oecd_q_gginv|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|3|gross domestic product, volume, market prices|oecd_q_rgdp_dcu|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|4|gross domestic product, volume in USD, constant exchange rates|oecd_q_rgdp_usd|USD, 2015|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|5|current account balance as a percentage of GDP|oecd_q_ca|% of GDP|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|6|short-term interest rate|oecd_q_stir|% per quarter|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|$BIS$|||||||||
|1|total credit to the government sector at market value (core debt)|bis_q_ggdebt_m|% of GDP|quarterly|CRE|-|market value|https://stats.bis.org/statx/srs/table/f5.1|
|2|total credit to the government sector at nominal value (core debt)|bis_q_ggdebt_n|% of GDP|quarterly|CRE|-|nominal value|https://stats.bis.org/statx/srs/table/f5.4|
|3|consumer price index - BIS spliced, not seasonally adjusted|bis_q_cpi|2010=100|quarterly|CPI|-|-|https://stats.bis.org/statx/srs/table/k1|
|4|central bank policy rates|bis_q_cbpr|%|quarterly|CBP|-|-|https://stats.bis.org/statx/srs/table/l1|
|5|real effective exchange rate, broad(60 economies) indicies|bis_q_reer_b|2010=100|quarterly|EER|-|-|https://stats.bis.org/statx/srs/table/i2?m=B|
|6|real effective exchange rate, narrow(27 economies) indicies|bis_q_reer_n|2010=100|quarterly|EER|-|-|https://stats.bis.org/statx/srs/table/i2?m=B|
|$World Bank$|||||||||
|1|Gross PSD, General Gov.-D1, All maturities, Debt securities + loans, Nominal Value, % of GDP|wb_q_ggdebt_d1|% of GDP|quarterly|DP.DOD.DLD1.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|2|Gross PSD, General Gov.-D2, All maturities, D1+ SDRs + currency and deposits, Nominal Value, % of GDP|wb_q_ggdebt_d2|% of GDP|quarterly|DP.DOD.DLD2.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|3|Gross PSD, General Gov.-D3, All maturities, D2+other accounts payable, Nominal Value, % of GDP|wb_q_ggdebt_d3|% of GDP|quarterly|DP.DOD.DLD3.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|4|Gross PSD, General Gov.-D4, All maturities, D3+insurance, pensions, and standardized guarantees, Nominal Value, % of GDP|wb_q_ggdebt_d4|% of GDP|quarterly|DP.DOD.DLD2.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|$Eurostat$|||||||||
|1|government consolidated gross debt [S13: general government]|eustat_q_ggdebt|% of GDP|quarterly|GOV_10Q_GGDEBT [GD]|-|PC_GDP|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGDEBT__custom_3260679/default/table?lang=en|
|2|final consumption expenditure [S13: general government]|eustat_q_ggexp|% of GDP|quarterly|GOV_10Q_GGNFA [P3]|-|PC_GDP / NSA|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGNFA__custom_3263185/default/table?lang=en|
|3|gross fixed capital formation [S13: general government]|eustat_q_gginv|% of GDP|quarterly|GOV_10Q_GGNFA [P51G]|-|PC_GDP / NSA|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGNFA__custom_3263870/default/table?lang=en|
|$IMF$|||||||||
|1|general government final consumption expenditure, norminal|df_imf_q_nggexp|DCU|quarterly|NCGG_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|2|general government final consumption expenditure, real|df_imf_q_rggexp|DCU|quarterly|NCGG_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|3|gross domestic product, norminal|df_imf_q_ngdp|DCU|quarterly|NGDP_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|4|gross domestic product, real|df_imf_q_rgdp|DCU|quarterly|NGDP_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|5|prices, consumer price index, all items|df_imf_q_cpi|index|quarterly|PCPI_IX|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|6|gross domestic product, deflator|df_imf_q_gdpd|index|quarterly|NGDP_D_SA_IX|-|SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|7|external balance of goods and services, norminal|df_imf_q_nca|DCU|quarterly|NNXGS_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|8|external balance of goods and services, real|df_imf_q_rca|DCU|quarterly|NNXGS_R_SA_XDC||real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|9|exchange rates, nominal effective exchange rate|df_imf_q_neer|index|quarterly|ENEER_IX|-|nominal|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|
|10|exchange rates, real effective exchange rate based on consumer price index|df_imf_q_reer|index|quarterly|EREER_IX|-|real|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976|

### **6.3. data summary**

In [60]:
# number of countries & sample period

print('number of countries:', df_q_pooled['isocode'].unique().shape[0])
print('start:', np.min(df_q_pooled['quarter'].unique()))
print('end:', np.max(df_q_pooled['quarter'].unique()))

number of countries: 217
start: 1913Q1
end: 2022Q2


In [61]:
# non-null count by each variables

df_q_pooled.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95046 entries, 0 to 95045
Data columns (total 32 columns):
 #   Column           Non-Null Count  Dtype        
---  ------           --------------  -----        
 0   country          95046 non-null  object       
 1   isocode          95046 non-null  object       
 2   quarter          95046 non-null  period[Q-DEC]
 3   oecd_q_ggexp     4596 non-null   float64      
 4   oecd_q_gginv     2574 non-null   float64      
 5   oecd_q_rgdp_dcu  4712 non-null   float64      
 6   oecd_q_rgdp_usd  4712 non-null   float64      
 7   oecd_q_ca        3710 non-null   float64      
 8   oecd_q_stir      4324 non-null   float64      
 9   bis_q_ggdebt_m   3193 non-null   float64      
 10  bis_q_ggdebt_n   4837 non-null   float64      
 11  bis_q_cpi        14629 non-null  float64      
 12  bis_q_cbpr       5616 non-null   object       
 13  bis_q_reer_b     6612 non-null   float64      
 14  bis_q_reer_n     5850 non-null   float64      
 15  wb

In [62]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_q_pooled.describe(), 2)

Unnamed: 0,oecd_q_ggexp,oecd_q_gginv,oecd_q_rgdp_dcu,oecd_q_rgdp_usd,oecd_q_ca,oecd_q_stir,bis_q_ggdebt_m,bis_q_ggdebt_n,bis_q_cpi,bis_q_reer_b,...,imf_q_nggexp,imf_q_rggexp,imf_q_ngdp,imf_q_rgdp,imf_q_cpi,imf_q_gdpd,imf_q_nca,imf_q_rca,imf_q_neer,imf_q_reer
count,4596.0,2574.0,4712.0,4712.0,3710.0,4324.0,3193.0,4837.0,14629.0,6612.0,...,6756.0,6715.0,6823.0,7083.0,34150.0,6630.0,6441.0,2745.0,16528.0,15051.0
mean,6242188094073.38,4110268804224.7,43267557294382.57,877952832737.61,-0.84,8.69,65.47,59.03,53.98,97.27,...,4205204400311.81,4009550560139.0,38817798818308.29,35181397260593.5,77.59,91.19,511021361526.78,1126256476630.97,3817252747.92,114.72
std,20778616834903.2,12179893287988.37,143658966097430.34,1950298072701.03,4.48,6.33,39.45,34.31,56.99,17.94,...,26577525156456.94,20014372210197.52,283516440422157.94,223144125304839.1,480.81,102.27,6412817275696.54,9724729285105.65,134425254762.94,116.51
min,1365709967.41,-44992000000.0,8075649110.82,2478559424.72,-32.73,0.05,4.6,1.6,0.0,41.04,...,39453000.0,261872261.9,267985000.0,81160000.0,0.0,0.86,-64100000000000.0,-31000000000000.0,0.41,15.38
25%,38324020752.54,13771695535.06,200299275061.1,106562043365.17,-3.17,4.48,38.8,34.4,7.87,90.25,...,6603421353.75,9632300018.5,31591896826.0,46900000000.0,14.47,67.04,-3793000000.0,-29975000000.0,94.57,92.93
50%,170953346178.88,42408000000.0,820200027039.0,242521326068.86,-0.73,7.09,57.6,51.2,49.86,97.99,...,37290795000.0,50055600000.0,180000000000.0,268000000000.0,61.03,91.4,20189000.0,938000000.0,102.34,100.62
75%,477074229248.64,109569253028.5,2335227000000.0,759272947642.64,1.57,11.45,84.2,74.1,91.91,103.22,...,164000000000.0,154000000000.0,859000000000.0,819500000000.0,99.98,107.69,6522300000.0,11649300000.0,125.37,112.45
max,180434000000000.0,73184000000000.0,1332547200000000.0,16533342932403.3,18.93,57.84,238.2,226.9,2031.98,281.92,...,393000000000000.0,230000000000000.0,4900000000000000.0,2920000000000000.0,29407.15,3553.02,212000000000000.0,168000000000000.0,8470000000000.0,5988.81


## **7. panel data**

### **7.1. python code** <br/>

In [63]:
# generate the count matrix

count_df_q_pooled = df_q_pooled.groupby(df_q_pooled['isocode']).count()

In [64]:
# filter

# ggdebt
cond1 = (count_df_q_pooled['bis_q_ggdebt_m'] == 0)
cond2 = (count_df_q_pooled['bis_q_ggdebt_n'] == 0)
cond3 = (count_df_q_pooled['wb_q_ggdebt_d2'] == 0)
cond4 = (count_df_q_pooled['eustat_q_ggdebt'] == 0)

# ggexp
cond5 = (count_df_q_pooled['oecd_q_ggexp'] == 0)
cond6 = (count_df_q_pooled['eustat_q_ggexp'] == 0)

# gginv
cond7 = (count_df_q_pooled['oecd_q_gginv'] == 0)
cond8 = (count_df_q_pooled['eustat_q_gginv'] == 0)

count_df_q_pooled = count_df_q_pooled.loc[(~cond1 | ~cond2 | ~cond3 | ~cond4) & (~cond5 | ~cond6) & (~cond7 | ~cond8)]
count_df_q_pooled = count_df_q_pooled.reset_index()

In [65]:
# generate the panel data

# select the T
cond_T = (df_q_pooled['quarter'].dt.year >= 1990)

df_q_panel = df_q_pooled.loc[cond_T]

# select the N
print(count_df_q_pooled['isocode'].unique().shape[0])
isocode_list = count_df_q_pooled['isocode'].unique()

for i in range(0, count_df_q_pooled['isocode'].unique().shape[0]):
  
    globals()['cond_N{}'.format(i+1)] = (df_q_pooled['isocode'] == isocode_list[i])

df_q_panel = df_q_panel.loc[cond_N1 | cond_N2 | cond_N3 | cond_N4 | cond_N5 | cond_N6 | cond_N7 | cond_N8 | cond_N9 | cond_N10 | cond_N11 | cond_N12 | cond_N13 | cond_N14 | cond_N15 | cond_N16 | cond_N17 | cond_N18 | cond_N19 | cond_N20 | cond_N21 | cond_N22 | cond_N23 | cond_N24 | cond_N25 | cond_N26 | cond_N27 | cond_N28 | cond_N29 | cond_N30 | cond_N31 | cond_N32 | cond_N33 | cond_N34 | cond_N35 | cond_N36]

36


In [66]:
# quarterly panel data

df_q_panel = df_q_panel.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

df_q_panel

Unnamed: 0,country,isocode,quarter,oecd_q_ggexp,oecd_q_gginv,oecd_q_rgdp_dcu,oecd_q_rgdp_usd,oecd_q_ca,oecd_q_stir,bis_q_ggdebt_m,...,imf_q_nggexp,imf_q_rggexp,imf_q_ngdp,imf_q_rgdp,imf_q_cpi,imf_q_gdpd,imf_q_nca,imf_q_rca,imf_q_neer,imf_q_reer
0,Australia,AUS,1990Q1,100307485056.99,14577358445.89,545325762992.24,370110607742.37,-1.74,8.13,21.90,...,17769000000.00,39191000000.00,102000000000.00,210000000000.00,58.48,56.65,-1921000000.00,7736000000.00,77.16,86.60
1,Australia,AUS,1990Q2,98999365788.29,14245511042.43,546137985036.21,370661859883.23,-0.96,10.04,20.10,...,18442000000.00,40164000000.00,104000000000.00,210000000000.00,59.42,57.66,-655000000.00,9087000000.00,78.71,87.80
2,Australia,AUS,1990Q3,99264190344.01,14277692592.93,550241277104.66,373446749254.43,-1.09,9.45,20.10,...,18410000000.00,39238000000.00,103000000000.00,209000000000.00,59.83,57.64,-829000000.00,9466000000.00,80.07,88.64
3,Australia,AUS,1990Q4,97836958810.71,14431437918.75,555282974866.89,376868531149.70,-1.70,9.34,21.10,...,18631000000.00,39643000000.00,105000000000.00,210000000000.00,61.39,58.00,-536000000.00,10902000000.00,75.08,83.04
4,Australia,AUS,1991Q1,100668164829.69,14760009676.52,553022779965.17,375334545107.85,-2.92,9.32,21.80,...,19217000000.00,40845000000.00,103000000000.00,208000000000.00,61.29,57.96,60000000.00,12299000000.00,76.44,83.29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4675,United States,USA,2021Q2,2458666000000.00,651721000000.00,15577779000000.00,16308267226546.70,-5.37,5.32,125.10,...,806000000000.00,674000000000.00,5690000000000.00,4840000000000.00,123.51,122.10,-220000000000.00,-311000000000.00,117.54,113.90
4676,United States,USA,2021Q3,2471120000000.00,653520000000.00,15671605000000.00,16406493005767.10,-4.83,5.42,121.50,...,819000000000.00,675000000000.00,5800000000000.00,4870000000000.00,125.48,123.87,-237000000000.00,-329000000000.00,118.91,116.09
4677,United States,USA,2021Q4,2487844000000.00,661414000000.00,15767146000000.00,16506514206420.30,-4.54,5.02,122.20,...,829000000000.00,672000000000.00,6000000000000.00,4950000000000.00,127.39,126.02,-243000000000.00,-338000000000.00,120.65,118.57
4678,United States,USA,2022Q1,2497180000000.00,659504000000.00,15702906000000.00,16439261802426.60,-4.96,3.23,,...,844000000000.00,668000000000.00,6100000000000.00,4930000000000.00,130.30,128.55,-294000000000.00,-386000000000.00,121.41,120.23


### **7.2. data summary**

In [67]:
# number of countries & sample period

print('number of countries:', df_q_panel['isocode'].unique().shape[0])
print(df_q_panel['isocode'].unique())
print('start:', np.min(df_q_panel['quarter'].unique()))
print('end:', np.max(df_q_panel['quarter'].unique()))

number of countries: 36
['AUS' 'AUT' 'BEL' 'BGR' 'CAN' 'CHE' 'CYP' 'CZE' 'DEU' 'DNK' 'ESP' 'EST'
 'FIN' 'FRA' 'GBR' 'GRC' 'HRV' 'HUN' 'IRL' 'ITA' 'JPN' 'KOR' 'LTU' 'LUX'
 'LVA' 'MLT' 'NLD' 'NOR' 'NZL' 'POL' 'PRT' 'ROU' 'SVK' 'SVN' 'SWE' 'USA']
start: 1990Q1
end: 2022Q2


In [68]:
# non-null count by each variables

df_q_panel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4680 entries, 0 to 4679
Data columns (total 32 columns):
 #   Column           Non-Null Count  Dtype        
---  ------           --------------  -----        
 0   country          4680 non-null   object       
 1   isocode          4680 non-null   object       
 2   quarter          4680 non-null   period[Q-DEC]
 3   oecd_q_ggexp     3150 non-null   float64      
 4   oecd_q_gginv     1890 non-null   float64      
 5   oecd_q_rgdp_dcu  3150 non-null   float64      
 6   oecd_q_rgdp_usd  3150 non-null   float64      
 7   oecd_q_ca        2859 non-null   float64      
 8   oecd_q_stir      3148 non-null   float64      
 9   bis_q_ggdebt_m   2575 non-null   float64      
 10  bis_q_ggdebt_n   2525 non-null   float64      
 11  bis_q_cpi        4624 non-null   float64      
 12  bis_q_cbpr       1944 non-null   object       
 13  bis_q_reer_b     4104 non-null   float64      
 14  bis_q_reer_n     2860 non-null   float64      
 15  wb_q

In [69]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_q_panel.describe(), 2)

Unnamed: 0,oecd_q_ggexp,oecd_q_gginv,oecd_q_rgdp_dcu,oecd_q_rgdp_usd,oecd_q_ca,oecd_q_stir,bis_q_ggdebt_m,bis_q_ggdebt_n,bis_q_cpi,bis_q_reer_b,...,imf_q_nggexp,imf_q_rggexp,imf_q_ngdp,imf_q_rgdp,imf_q_cpi,imf_q_gdpd,imf_q_nca,imf_q_rca,imf_q_neer,imf_q_reer
count,3150.0,1890.0,3150.0,3150.0,2859.0,3148.0,2575.0,2525.0,4624.0,4104.0,...,4136.0,4077.0,4139.0,4131.0,4644.0,4127.0,4135.0,889.0,4128.0,4099.0
mean,6773479214421.96,5041807238430.76,42726612363989.31,1098234076343.88,-0.58,7.94,69.47,66.92,88.25,96.13,...,1947171439315.93,2170594262815.24,12328351032080.68,13396024065006.0,87.96,92.76,284777921667.98,949830868391.45,285.19,95.75
std,23365691978118.2,13792240539298.37,158653905541748.47,2257776861928.65,4.59,5.54,41.75,39.48,24.08,12.39,...,8948714712734.27,9312742883183.02,56207084461652.48,59160453469239.38,24.58,20.84,2277123414959.53,6698035800574.12,4058.65,13.67
min,2258405872.06,-44992000000.0,9181304800.0,10163703795.61,-20.14,0.05,4.6,6.8,0.03,41.04,...,39453000.0,261872261.9,267985000.0,1293591273.0,0.03,1.91,-8230000000000.0,-19500000000000.0,18.67,32.42
25%,37005815738.31,14751254969.11,180701530000.0,150299991450.03,-3.18,3.96,40.0,37.9,74.76,91.61,...,4535615868.25,6498000000.0,24131721416.0,31906000000.0,74.63,79.44,-1359817500.0,-116000000000.0,93.77,91.31
50%,166379261251.88,47880000000.0,840095667030.1,327530210361.57,-0.64,6.42,61.3,59.0,92.32,98.1,...,22008694547.5,22300000000.0,130000000000.0,150000000000.0,92.25,96.61,325553252.1,186000000.0,99.26,97.77
75%,448563200000.0,123233074457.7,1983191564067.46,1058834583356.66,2.21,10.98,93.05,85.6,106.35,102.16,...,102000000000.0,107000000000.0,486500000000.0,512000000000.0,106.33,106.36,9241599902.0,13795500000.0,103.14,102.47
max,180434000000000.0,73184000000000.0,1332547200000000.0,16533342932403.3,18.93,53.27,238.2,226.9,150.85,149.46,...,98500000000000.0,87600000000000.0,533000000000000.0,492000000000000.0,150.05,172.75,37200000000000.0,39000000000000.0,142373.8,150.9


## **8. export the data**

In [70]:
# quarterly panel data

count_df_q_pooled.to_excel(excel_writer='count_df_q_pooled.xlsx')
df_q_pooled.to_excel(excel_writer='df_q_pooled.xlsx')
df_q_panel.to_excel(excel_writer='df_q_panel.xlsx')

## **9. references**

* https://data.oecd.org/ <br/>
* https://stats.bis.org/#ppq=XRU_D_24D;pv=1,2~4~1,0,0~both <br/>
* https://databank.worldbank.org/source/world-development-indicators/preview/on <br/>
* https://wits.worldbank.org/wits/wits/witshelp/content/codes/country_codes.htm <br/>
* https://ec.europa.eu/eurostat/en/web/main/data/database <br/>
* https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976 <br/>

