# **RA1: quarterly panel data**

Dong Gyun Ko <br/>
last updated: september 21, 2022 <br/>

In [1]:
# import libraries

import numpy as np
import pandas as pd
import datetime as dt
import pandas_datareader as pdr
import pandas_datareader.data as web
from pandas_datareader import wb
from pandas_datareader.data import DataReader
import requests # python 3.6

import os
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
# set the working directory

os.chdir('/content/gdrive/MyDrive/Colab Notebooks/RA1_data')

In [3]:
# set the pandas display option

# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

# pd.options.display.max_rows = 100
# pd.options.display.max_columns = 100

## **1. panel data**

### **1.1. python code** <br/>

In [4]:
# import the pooled data

df_q_pooled = pd.read_excel('df_q_pooled.xlsx').drop(columns=['Unnamed: 0'])
df_q_pooled['quarter'] = pd.to_datetime(df_q_pooled['quarter'])
df_q_pooled['quarter'] = df_q_pooled['quarter'].dt.to_period('Q')

In [5]:
# import the class data

df_class = pd.read_csv('df_class.csv')

In [6]:
# merge the wb isocode data

df_wb_isocode = pd.read_csv('df_wb_isocode.csv', encoding='utf-8').rename(columns={'alpha-3':'isocode'})
df_class = pd.merge(df_wb_isocode, df_class, how='left', on=['isocode'])
df_class = df_class.sort_values(by=['isocode']).reset_index(drop=True)

In [7]:
# generate the indicator variable

# region
for i in range(1, 8):

    globals()['cond_region{}'.format(i)] = (df_class['region'] == df_class['region'].unique()[i-1])
    globals()['df_class_region{}'.format(i)] = df_class.loc[globals()['cond_region{}'.format(i)]]
    
    globals()['region{}_list'.format(i)] = list(globals()['df_class_region{}'.format(i)]['isocode'].unique())

    for j in range(0, len(globals()['region{}_list'.format(i)])):
  
        globals()['cond_region_{}'.format(j+1)] = (df_q_pooled['isocode'] == globals()['region{}_list'.format(i)][j])
        df_q_pooled.loc[globals()['cond_region_{}'.format(j+1)], 'ind_region'] = df_class['region'].unique()[i-1]

cond_east_asia_pacific = (df_q_pooled['ind_region'] == 'East Asia & Pacific')
cond_south_asia = (df_q_pooled['ind_region'] == 'South Asia')
cond_europe_central_asia = (df_q_pooled['ind_region'] == 'Europe & Central Asia')
cond_middle_east_north_africa = (df_q_pooled['ind_region'] == 'Middle East & North Africa')
cond_sub_saharan_africa = (df_q_pooled['ind_region'] == 'Sub-Saharan Africa')
cond_north_america = (df_q_pooled['ind_region'] == 'North America')
cond_latin_america_caribbean = (df_q_pooled['ind_region'] == 'Latin America & Caribbean')

df_q_pooled.loc[cond_east_asia_pacific, 'ind_region'] = 1
df_q_pooled.loc[cond_south_asia , 'ind_region'] = 2
df_q_pooled.loc[cond_europe_central_asia , 'ind_region'] = 3
df_q_pooled.loc[cond_middle_east_north_africa , 'ind_region'] = 4
df_q_pooled.loc[cond_sub_saharan_africa , 'ind_region'] = 5
df_q_pooled.loc[cond_north_america , 'ind_region'] = 6
df_q_pooled.loc[cond_latin_america_caribbean , 'ind_region'] = 7

# income
for i in range(1, 6):

    globals()['cond_income{}'.format(i)] = (df_class['income'] == df_class['income'].unique()[i-1])
    globals()['df_class_income{}'.format(i)] = df_class.loc[globals()['cond_income{}'.format(i)]]
    
    globals()['income{}_list'.format(i)] = list(globals()['df_class_income{}'.format(i)]['isocode'].unique())

    for j in range(0, len(globals()['income{}_list'.format(i)])):
  
        globals()['cond_income_{}'.format(j+1)] = (df_q_pooled['isocode'] == globals()['income{}_list'.format(i)][j])
        df_q_pooled.loc[globals()['cond_income_{}'.format(j+1)], 'ind_income'] = df_class['income'].unique()[i-1]

cond_high_income = (df_q_pooled['ind_income'] == 'High income')
cond_upper_middle_income = (df_q_pooled['ind_income'] == 'Upper middle income')
cond_lower_middle_income = (df_q_pooled['ind_income'] == 'Lower middle income')
cond_low_income = (df_q_pooled['ind_income'] == 'Low income')
cond_nan =  (df_q_pooled['ind_income'] == np.nan)

df_q_pooled.loc[cond_high_income, 'ind_income'] = 1
df_q_pooled.loc[cond_upper_middle_income , 'ind_income'] = 2
df_q_pooled.loc[cond_lower_middle_income , 'ind_income'] = 3
df_q_pooled.loc[cond_low_income , 'ind_income'] = 4
df_q_pooled.loc[cond_nan , 'ind_income'] = np.nan

# oecd
df_q_pooled['ind_oecd'] = np.nan

oecd_list = ['AUS', 'AUT', 'BEL', 'CAN', 'CHE', 'CHL', 'COL', 'CRI', 'CZE', 'DEU',
             'DNK', 'ESP', 'EST', 'FIN', 'FRA', 'GBR', 'GRC', 'HUN', 'IRL', 'ISL',
             'ISR', 'ITA', 'JPN', 'KOR', 'LTU', 'LUX', 'LVA', 'MEX', 'NLD', 'NOR',
             'NZL', 'POL', 'PRT', 'SVK', 'SVN', 'SWE', 'TUR', 'USA']

for i in range(0, len(oecd_list)):
  
    globals()['cond_oecd_{}'.format(i+1)] = (df_q_pooled['isocode'] == oecd_list[i])
    df_q_pooled.loc[globals()['cond_oecd_{}'.format(i+1)], 'ind_oecd'] = 1

# g7
df_q_pooled['ind_g7'] = np.nan

g7_list = ['CAN', 'DEU', 'FRA', 'GBR', 'ITA', 'JPN', 'USA']

for i in range(0, len(g7_list)):
  
    globals()['cond_g7_{}'.format(i+1)] = (df_q_pooled['isocode'] == g7_list[i])
    df_q_pooled.loc[globals()['cond_g7_{}'.format(i+1)], 'ind_g7'] = 1

In [8]:
# replace var. =  NaN if val. == 0

# ggdebt
cond_wb_q_ggdebt_d1 = (df_q_pooled['wb_q_ggdebt_d1'] == 0)
cond_wb_q_ggdebt_d2 = (df_q_pooled['wb_q_ggdebt_d2'] == 0)
cond_wb_q_ggdebt_d3 = (df_q_pooled['wb_q_ggdebt_d3'] == 0)
cond_wb_q_ggdebt_d4 = (df_q_pooled['wb_q_ggdebt_d4'] == 0)

df_q_pooled.loc[cond_wb_q_ggdebt_d1, 'wb_q_ggdebt_d1'] = np.nan
df_q_pooled.loc[cond_wb_q_ggdebt_d2, 'wb_q_ggdebt_d2'] = np.nan
df_q_pooled.loc[cond_wb_q_ggdebt_d3, 'wb_q_ggdebt_d3'] = np.nan
df_q_pooled.loc[cond_wb_q_ggdebt_d4, 'wb_q_ggdebt_d4'] = np.nan

In [9]:
# quarterly pooled dataset

df_q_pooled = df_q_pooled.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

df_q_pooled

Unnamed: 0,country,isocode,quarter,oecd_q_rggexp,oecd_q_gginv,oecd_q_rgdp_dcu,oecd_q_rgdp_usd,oecd_q_ca,oecd_q_stir,bis_q_ggdebt_m,...,imf_q_stir_3mtb,imf_q_stir_mp,imf_q_USD_DCU_ep,imf_q_USD_DCU_pa,fred_q_rggexp,fred_q_ca,ind_region,ind_income,ind_oecd,ind_g7
0,Aruba,ABW,1913Q1,,,,,,,,...,,,,,,,7,1,,
1,Aruba,ABW,1913Q2,,,,,,,,...,,,,,,,7,1,,
2,Aruba,ABW,1913Q3,,,,,,,,...,,,,,,,7,1,,
3,Aruba,ABW,1913Q4,,,,,,,,...,,,,,,,7,1,,
4,Aruba,ABW,1914Q1,,,,,,,,...,,,,,,,7,1,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95041,Zimbabwe,ZWE,2021Q2,,,,,,,,...,,,0.011706,0.011803,,,5,3,,
95042,Zimbabwe,ZWE,2021Q3,,,,,,,,...,,,0.011407,0.011637,,,5,3,,
95043,Zimbabwe,ZWE,2021Q4,,,,,,,,...,,,0.009203,0.009963,,,5,3,,
95044,Zimbabwe,ZWE,2022Q1,,,,,,,,...,,,0.007021,0.008259,,,5,3,,


In [10]:
# generate the count matrix for quarterly pooled data

df_q_pooled_count_N = df_q_pooled.groupby(df_q_pooled['isocode']).count()
df_q_pooled_count_T = df_q_pooled.groupby(df_q_pooled['quarter']).count()

In [11]:
# filter (N)

# ggdebt
condN1 = (df_q_pooled_count_N['bis_q_ggdebt_m'] == 0)
condN2 = (df_q_pooled_count_N['bis_q_ggdebt_n'] == 0)
condN3 = (df_q_pooled_count_N['wb_q_ggdebt_d2'] == 0)
condN4 = (df_q_pooled_count_N['eustat_q_ggdebt'] == 0)
condN5 = (df_q_pooled_count_N['imf_q_ggdebt'] == 0)

# ggexp
condN6 = (df_q_pooled_count_N['oecd_q_rggexp'] == 0)
condN7 = (df_q_pooled_count_N['eustat_q_nggexp'] == 0)
condN8 = (df_q_pooled_count_N['imf_q_nggexp'] == 0)
condN9 = (df_q_pooled_count_N['imf_q_rggexp'] == 0)
condN10 = (df_q_pooled_count_N['fred_q_rggexp'] == 0)

df_q_pooled_count_N = df_q_pooled_count_N.loc[(~condN1 | ~condN2 | ~condN3 | ~condN4 | ~condN5) & (~condN6 | ~condN7 | ~condN8 | ~condN9 | ~condN10)]
df_q_pooled_count_N = df_q_pooled_count_N.reset_index()

print('countries:', df_q_pooled_count_N['isocode'].unique().shape[0])

countries: 50


In [12]:
# filter (T)

# ggdebt
condT1 = (df_q_pooled_count_T['bis_q_ggdebt_m'] == 0)
condT2 = (df_q_pooled_count_T['bis_q_ggdebt_n'] == 0)
condT3 = (df_q_pooled_count_T['wb_q_ggdebt_d2'] == 0)
condT4 = (df_q_pooled_count_T['eustat_q_ggdebt'] == 0)
condT5 = (df_q_pooled_count_T['imf_q_ggdebt'] == 0)

# ggexp
condT6 = (df_q_pooled_count_T['oecd_q_rggexp'] == 0)
condT7 = (df_q_pooled_count_T['eustat_q_nggexp'] == 0)
condT8 = (df_q_pooled_count_T['imf_q_nggexp'] == 0)
condT9 = (df_q_pooled_count_T['imf_q_rggexp'] == 0)
condT10 = (df_q_pooled_count_T['fred_q_rggexp'] == 0)

df_q_pooled_count_T = df_q_pooled_count_T.loc[(~condT1 | ~condT2 | ~condT3 | ~condT4 | ~condT5) & (~condT6 | ~condT7 | ~condT8 | ~condT9 | ~condT10)]
df_q_pooled_count_T = df_q_pooled_count_T.reset_index()

print('start:', df_q_pooled_count_T['quarter'].unique().min())
print('end:', df_q_pooled_count_T['quarter'].unique().max())

start: 1950Q1
end: 2022Q1


In [13]:
# generate the panel data

# select the T
cond_T = (df_q_pooled['quarter'].dt.year >= 1950)

df_q_panel = df_q_pooled.loc[cond_T]

# select the N
print(df_q_pooled_count_N['isocode'].unique().shape[0])
isocode_list = df_q_pooled_count_N['isocode'].unique()

for i in range(0, df_q_pooled_count_N['isocode'].unique().shape[0]):
  
    globals()['cond_N{}'.format(i+1)] = (df_q_pooled['isocode'] == isocode_list[i])

df_q_panel = df_q_panel.loc[cond_N1 | cond_N2 | cond_N3 | cond_N4 | cond_N5 | cond_N6 | cond_N7 | cond_N8 | cond_N9 | cond_N10 | cond_N11 | cond_N12 | cond_N13 | cond_N14 | cond_N15 | cond_N16 | cond_N17 | cond_N18 | cond_N19 | cond_N20 | cond_N21 | cond_N22 | cond_N23 | cond_N24 | cond_N25 | cond_N26 | cond_N27 | cond_N28 | cond_N29 | cond_N30 | cond_N31 | cond_N32 | cond_N33 | cond_N34 | cond_N35 | cond_N36 | cond_N37 | cond_N38 | cond_N39 | cond_N40 | cond_N41 | cond_N42 | cond_N43 | cond_N44 | cond_N45 | cond_N46 | cond_N47 | cond_N48 | cond_N49 | cond_N50]

50


In [14]:
# select the columns

df_q_panel = df_q_panel[['country', 'isocode', 'quarter',
                         'oecd_q_rggexp', 'oecd_q_rgdp_dcu', 'oecd_q_rgdp_usd', 'oecd_q_ca',
                         'bis_q_ggdebt_m', 'bis_q_ggdebt_n', 'bis_q_reer_b', 'bis_q_reer_n',
                         'wb_q_ggdebt_d1', 'wb_q_ggdebt_d2', 'wb_q_ggdebt_d3', 'wb_q_ggdebt_d4',
                         'eustat_q_ggdebt', 'eustat_q_nggexp',
                         'imf_q_nggexp', 'imf_q_rggexp', 'imf_q_ngdp', 'imf_q_rgdp', 'imf_q_cpi', 'imf_q_gdpd', 'imf_q_reer', 'imf_q_ca', 'imf_q_nebgs', 'imf_q_rebgs', 'imf_q_USD_DCU_ep', 'imf_q_USD_DCU_pa',
                         'fred_q_rggexp', 'fred_q_ca', 
                         'ind_region', 'ind_income', 'ind_oecd', 'ind_g7']]

In [15]:
# generate the ca/gdp variables from the IMF IFS data source

df_q_panel['imf_q_ca_gdp_1'] = (df_q_panel['imf_q_ca'] / (df_q_panel['imf_q_ngdp'] * df_q_panel['imf_q_USD_DCU_pa'])) * 100
df_q_panel['imf_q_ca_gdp_2'] = (df_q_panel['imf_q_ca'] / (((df_q_panel['imf_q_rgdp'] * df_q_panel['imf_q_cpi']) / 100) * df_q_panel['imf_q_USD_DCU_pa'])) * 100

In [16]:
# generate the variables from the pooled data source

df_q_panel['pds_q_ggdebt'] = df_q_panel['bis_q_ggdebt_n']
df_q_panel['pds_q_rggexp'] = df_q_panel['imf_q_rggexp']
df_q_panel['pds_q_rgdp'] = df_q_panel['imf_q_rgdp']
df_q_panel['pds_q_reer'] = df_q_panel['bis_q_reer_b']
df_q_panel['pds_q_ca'] = df_q_panel['fred_q_ca']

In [17]:
# replace the value of pds variables

# ggdebt (1)
bis_q_ggdebt_m_list = ['CHL', 'KOR']

for i in range(0, len(bis_q_ggdebt_m_list)):
  
    globals()['cond_bis_q_ggdebt_m_{}'.format(i+1)] = (df_q_panel['isocode'] == bis_q_ggdebt_m_list[i])
    df_q_panel.loc[globals()['cond_bis_q_ggdebt_m_{}'.format(i+1)], 'pds_q_ggdebt'] = df_q_panel['bis_q_ggdebt_m']

# ggdebt (2)
wb_q_ggdebt_d2_list = ['CHE', 'CZE', 'DEU', 'IRL', 'ITA', 'LTU', 'NLD', 'POL', 'PRT', 'SWE']

for i in range(0, len(wb_q_ggdebt_d2_list)):
  
    globals()['cond_wb_q_ggdebt_d2_{}'.format(i+1)] = (df_q_panel['isocode'] == wb_q_ggdebt_d2_list[i])
    df_q_panel.loc[globals()['cond_wb_q_ggdebt_d2_{}'.format(i+1)], 'pds_q_ggdebt'] = df_q_panel['wb_q_ggdebt_d2']

# ggdebt (3)
eustat_q_ggdebt_list = ['AUT', 'BGR', 'CYP', 'DNK', 'EST', 'FIN', 'FRA', 'GRC', 'HRV', 'LUX', 'LVA', 'MLT', 'NOR', 'ROU', 'SVK', 'SVN']

for i in range(0, len(eustat_q_ggdebt_list)):
  
    globals()['cond_eustat_q_ggdebt_{}'.format(i+1)] = (df_q_panel['isocode'] == eustat_q_ggdebt_list[i])
    df_q_panel.loc[globals()['cond_eustat_q_ggdebt_{}'.format(i+1)], 'pds_q_ggdebt'] = df_q_panel['eustat_q_ggdebt']

# ggexp
cond_imf_q_rggexp_1 = (df_q_panel['isocode'] == 'IDN')
df_q_panel.loc[cond_imf_q_rggexp_1, 'pds_q_rggexp'] = (df_q_panel['imf_q_nggexp'] / df_q_panel['imf_q_cpi']) * 100

cond_imf_q_rggexp_2 = (df_q_panel['isocode'] == 'THA')
df_q_panel.loc[cond_imf_q_rggexp_2, 'pds_q_rggexp'] = (df_q_panel['imf_q_nggexp'] / df_q_panel['imf_q_cpi']) * 100

cond_imf_q_rggexp_3 = (df_q_panel['isocode'] == 'USA')
df_q_panel.loc[cond_imf_q_rggexp_3, 'pds_q_rggexp'] = (df_q_panel['imf_q_nggexp'] / df_q_panel['imf_q_cpi']) * 100

cond_fred_q_rggexp_1 = (df_q_panel['isocode'] == 'EST')
df_q_panel.loc[cond_fred_q_rggexp_1, 'pds_q_rggexp'] = df_q_panel['fred_q_rggexp']

# reer
imf_q_reer_list = ['AUS', 'AUT', 'BEL', 'BGR', 'BRA', 'CAN', 'CHE', 'CHL', 'COL', 'CYP', 
                   'CZE', 'DEU', 'DNK', 'ESP', 'FIN', 'FRA', 'GBR', 'GRC', 'HUN', 'IRL', 
                   'ISR', 'ITA', 'JPN', 'LUX', 'LVA', 'MEX', 'MLT', 'NLD', 'NOR', 'NZL', 
                   'POL', 'PRT', 'ROU', 'RUS', 'SGP', 'SVK', 'SWE', 'USA', 'ZAF']

for i in range(0, len(imf_q_reer_list)):
  
    globals()['cond_imf_q_reer_{}'.format(i+1)] = (df_q_panel['isocode'] == imf_q_reer_list[i])
    df_q_panel.loc[globals()['cond_imf_q_reer_{}'.format(i+1)], 'pds_q_reer'] = df_q_panel['imf_q_reer']

# ca (1)
oecd_q_ca_list = ['AUT', 'BEL', 'DNK', 'ESP', 'FIN', 'FRA', 'ITA', 'JPN']

for i in range(0, len(oecd_q_ca_list)):
  
    globals()['cond_oecd_q_ca_{}'.format(i+1)] = (df_q_panel['isocode'] == oecd_q_ca_list[i])
    df_q_panel.loc[globals()['cond_oecd_q_ca_{}'.format(i+1)], 'pds_q_ca'] = df_q_panel['oecd_q_ca']

# ca (2)
imf_q_ca_gdp_1_list = ['ARG', 'BGR', 'BRA', 'CAN', 'CHL', 'CYP', 'HRV', 'MEX', 'MLT', 'POL', 
                       'ROU', 'THA']

for i in range(0, len(imf_q_ca_gdp_1_list)):
  
    globals()['cond_imf_q_ca_gdp_1_{}'.format(i+1)] = (df_q_panel['isocode'] == imf_q_ca_gdp_1_list[i])
    df_q_panel.loc[globals()['cond_imf_q_ca_gdp_1_{}'.format(i+1)], 'pds_q_ca'] = df_q_panel['imf_q_ca_gdp_1']

# ca (3)
imf_q_ca_gdp_2_list = ['HKG', 'SGP']

for i in range(0, len(imf_q_ca_gdp_2_list)):
  
    globals()['cond_imf_q_ca_gdp_2_{}'.format(i+1)] = (df_q_panel['isocode'] == imf_q_ca_gdp_2_list[i])
    df_q_panel.loc[globals()['cond_imf_q_ca_gdp_2_{}'.format(i+1)], 'pds_q_ca'] = df_q_panel['imf_q_ca_gdp_2']

In [18]:
# replace the val. with NaN

cond_PRT = (df_q_panel['isocode'] == 'PRT')
cond_IRL = (df_q_panel['isocode'] == 'IRL')
cond_LTU = (df_q_panel['isocode'] == 'LTU')
cond_NLD = (df_q_panel['isocode'] == 'NLD')
cond_SVN = (df_q_panel['isocode'] == 'SVN')

cond_1995Q4 = (df_q_panel['quarter'] == '1995Q4')
cond_1996Q4 = (df_q_panel['quarter'] == '1996Q4')
cond_1997Q4 = (df_q_panel['quarter'] == '1997Q4')
cond_1998Q4 = (df_q_panel['quarter'] == '1998Q4')

# PRT
df_q_panel.loc[(cond_PRT & cond_1995Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_PRT & cond_1996Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_PRT & cond_1997Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_PRT & cond_1998Q4), 'pds_q_ggdebt'] = np.nan

# IRL
df_q_panel.loc[(cond_IRL & cond_1995Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_IRL & cond_1996Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_IRL & cond_1997Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_IRL & cond_1998Q4), 'pds_q_ggdebt'] = np.nan

# LTU
df_q_panel.loc[(cond_LTU & cond_1995Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_LTU & cond_1996Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_LTU & cond_1997Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_LTU & cond_1998Q4), 'pds_q_ggdebt'] = np.nan

# NLD
df_q_panel.loc[(cond_NLD & cond_1995Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_NLD & cond_1996Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_NLD & cond_1997Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_NLD & cond_1998Q4), 'pds_q_ggdebt'] = np.nan

# SVN
df_q_panel.loc[(cond_SVN & cond_1995Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_SVN & cond_1996Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_SVN & cond_1997Q4), 'pds_q_ggdebt'] = np.nan
df_q_panel.loc[(cond_SVN & cond_1998Q4), 'pds_q_ggdebt'] = np.nan

In [19]:
# quarterly panel data

df_q_panel = df_q_panel.sort_values(by=['isocode', 'quarter']).reset_index(drop=True)

df_q_panel

Unnamed: 0,country,isocode,quarter,oecd_q_rggexp,oecd_q_rgdp_dcu,oecd_q_rgdp_usd,oecd_q_ca,bis_q_ggdebt_m,bis_q_ggdebt_n,bis_q_reer_b,...,ind_income,ind_oecd,ind_g7,imf_q_ca_gdp_1,imf_q_ca_gdp_2,pds_q_ggdebt,pds_q_rggexp,pds_q_rgdp,pds_q_reer,pds_q_ca
0,Argentina,ARG,1950Q1,,,,,,,,...,2,,,,,,,,,
1,Argentina,ARG,1950Q2,,,,,,,,...,2,,,,,,,,,
2,Argentina,ARG,1950Q3,,,,,,,,...,2,,,,,,,,,
3,Argentina,ARG,1950Q4,,,,,,,,...,2,,,,,,,,,
4,Argentina,ARG,1951Q1,,,,,,,,...,2,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14495,South Africa,ZAF,2021Q2,,,,,,70.1,79.796667,...,2,,,6.465662,5.191886,70.1,2.230000e+11,1.140000e+12,80.196367,5.584279
14496,South Africa,ZAF,2021Q3,,,,,,70.3,78.690000,...,2,,,2.326532,1.871286,70.3,2.240000e+11,1.120000e+12,78.671305,2.988018
14497,South Africa,ZAF,2021Q4,,,,,,70.9,75.500000,...,2,,,3.387572,2.723933,70.9,2.240000e+11,1.130000e+12,75.720268,2.300206
14498,South Africa,ZAF,2022Q1,,,,,,,76.766667,...,2,,,0.579187,0.465000,,2.270000e+11,1.150000e+12,76.993295,1.534868


### **1.2. var. summary** <br/>

| |variable|abbreviation|unit|frequency|indicator|subject|measure|source|
|-|--------|------------|----|---------|---------|-------|-------|------|
|$OECD$|||||||||
|1|government final consumption expenditure, volume|oecd_q_rggexp|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|2|gross domestic product, volume, market prices|oecd_q_rgdp_dcu|DCU, base year|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|3|gross domestic product, volume in USD, constant exchange rates|oecd_q_rgdp_usd|USD, 2015|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|4|current account balance as a percentage of GDP|oecd_q_ca|% of GDP|quarterly|-|-|-|https://stats.oecd.org/Index.aspx?QueryId=61357#|
|$BIS$|||||||||
|1|total credit to the government sector at market value (core debt)|bis_q_ggdebt_m|% of GDP|quarterly|CRE|-|market value|https://stats.bis.org/statx/srs/table/f5.1|
|2|total credit to the government sector at nominal value (core debt)|bis_q_ggdebt_n|% of GDP|quarterly|CRE|-|nominal value|https://stats.bis.org/statx/srs/table/f5.4|
|3|real effective exchange rate, broad(60 economies) indicies|bis_q_reer_b|2010=100|quarterly|EER|-|-|https://stats.bis.org/statx/srs/table/i2?m=B|
|4|real effective exchange rate, narrow(27 economies) indicies|bis_q_reer_n|2010=100|quarterly|EER|-|-|https://stats.bis.org/statx/srs/table/i2?m=B|
|$World Bank$|||||||||
|1|Gross PSD, General Gov.-D1, All maturities, Debt securities + loans, Nominal Value, % of GDP|wb_q_ggdebt_d1|% of GDP|quarterly|DP.DOD.DLD1.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|2|Gross PSD, General Gov.-D2, All maturities, D1+ SDRs + currency and deposits, Nominal Value, % of GDP|wb_q_ggdebt_d2|% of GDP|quarterly|DP.DOD.DLD2.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|3|Gross PSD, General Gov.-D3, All maturities, D2+other accounts payable, Nominal Value, % of GDP|wb_q_ggdebt_d3|% of GDP|quarterly|DP.DOD.DLD3.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|4|Gross PSD, General Gov.-D4, All maturities, D3+insurance, pensions, and standardized guarantees, Nominal Value, % of GDP|wb_q_ggdebt_d4|% of GDP|quarterly|DP.DOD.DLD2.CR.GG.Z1|-|nominal value|https://databank.worldbank.org/source/Quarterly-Public-Sector-Debt/#|
|$Eurostat$|||||||||
|1|government consolidated gross debt [S13: general government]|eustat_q_ggdebt|% of GDP|quarterly|GOV_10Q_GGDEBT [GD]|-|PC_GDP|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGDEBT__custom_3260679/default/table?lang=en|
|2|final consumption expenditure [S13: general government]|eustat_q_nggexp|% of GDP|quarterly|GOV_10Q_GGNFA [P3]|-|PC_GDP / NSA|https://ec.europa.eu/eurostat/databrowser/view/GOV_10Q_GGNFA__custom_3263185/default/table?lang=en|
|$IMF$|||||||||
|1|general government final consumption expenditure, nominal|imf_q_nggexp|DCU|quarterly|NCGG_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|2|general government final consumption expenditure, real|imf_q_rggexp|DCU|quarterly|NCGG_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|3|gross domestic product, nominal|imf_q_ngdp|DCU|quarterly|NGDP_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|4|gross domestic product, real|imf_q_rgdp|DCU|quarterly|NGDP_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|5|prices, consumer price index, all items|imf_q_cpi|index|quarterly|PCPI_IX|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|6|gross domestic product, deflator|imf_q_gdpd|index|quarterly|NGDP_D_SA_IX|-|SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|7|exchange rates, real effective exchange rate based on consumer price index|imf_q_reer|index|quarterly|EREER_IX|-|real|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|8|supplementary items, current account, net (excluding exceptional financing)|imf_q_ca|USD|quarterly|BCAXF_BP6_USD|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|9|external balance of goods and services, nominal|imf_q_nebgs|DCU|quarterly|NNXGS_SA_XDC|-|nominal / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|10|external balance of goods and services, real|imf_q_rebgs|DCU|quarterly|NNXGS_R_SA_XDC|-|real / SA|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|11|exchange rates, US dollar per domestic currency, end of period|imf_q_USD_DCU_ep|USD/DCU|quarterly|EDNE_USD_XDC_RATE|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|12|exchange rates, US dollar per domestic currency, period average, rate|imf_q_USD_DCU_pa|USD/DCU|quarterly|EDNA_USD_XDC_RATE|-|-|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|13|current account balance (1)|imf_q_ca_gdp_1|% of GDP|quarterly|-|-|(imf_q_ca / (imf_q_ngdp * imf_q_USD_DCU_pa)) * 100|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|14|current account balance (2)|imf_q_ca_gdp_2|% of GDP|quarterly|-|-|(imf_q_ca / (((imf_q_rgdp * imf_q_cpi) / 100) * imf_q_USD_DCU_pa)) * 100|https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1390030341854|
|$FRED$|||||||||
|1|real general government final consumption expenditure|fred_q_rggexp|DCU|quarterly|NCGGRSAXDC + isocode [alpha-2] + Q|-|real / SA|https://fred.stlouisfed.org/searchresults?st=real+general+government+final+consumption+expenditure|
|2|balance of payments BPM6: current account balance: total: total balance|fred_q_ca|% of GDP|quarterly|isocode [alpha-2] + B6BLTT02STSA + Q|-|SA|https://fred.stlouisfed.org/searchresults?st=BPM6%3A+current+account+balance+%25&pageID=1|
|$Indicator$|||||||||
|1|region indicator|ind_region|-|-|-|-|EAS: 1, SAS: 2, ECS: 3, MEA: 4, SSF: 5, NAC: 6, LCN: 7|World Bank|
|2|income indicator|ind_income|-|-|-|-|HIC: 1, UMC: 2, LMC: 3, LIC: 4|World Bank|
|3|oecd indicator|ind_oecd|-|-|-|-|OED: 1|World Bank|
|4|g7 indicator|ind_g7|-|-|-|-|G7: 1|World Bank|
|$Pooled\;Data\;Sources$|||||||||
|1|general government debt|pds_q_ggdebt|% of GDP|quarterly|-|-|-|BIS, World Bank, Eurostat|
|2|general government consumption, real|pds_q_rggexp|DCU|quarterly|-|-|real / SA|IMF, FRED|
|3|gross domestic product, real|pds_q_rgdp|DCU|quarterly|-|-|real / SA|IMF|
|4|real effective exchange rate|pds_q_reer|index|quarterly|-|-|real|BIS, IMF|
|5|current account balance|pds_q_ca|% of GDP|quarterly|-|-|-|OECD, FRED|

### **1.3. data summary**

In [20]:
# number of countries & sample period

print('number of countries:', df_q_panel['isocode'].unique().shape[0])
print(df_q_panel['isocode'].unique())
print('start:', np.min(df_q_panel['quarter'].unique()))
print('end:', np.max(df_q_panel['quarter'].unique()))

number of countries: 50
['ARG' 'AUS' 'AUT' 'BEL' 'BGR' 'BRA' 'CAN' 'CHE' 'CHL' 'COL' 'CYP' 'CZE'
 'DEU' 'DNK' 'ESP' 'EST' 'FIN' 'FRA' 'GBR' 'GRC' 'HKG' 'HRV' 'HUN' 'IDN'
 'IND' 'IRL' 'ISR' 'ITA' 'JPN' 'KOR' 'LTU' 'LUX' 'LVA' 'MEX' 'MLT' 'NLD'
 'NOR' 'NZL' 'POL' 'PRT' 'ROU' 'RUS' 'SGP' 'SVK' 'SVN' 'SWE' 'THA' 'TUR'
 'USA' 'ZAF']
start: 1950Q1
end: 2022Q2


In [21]:
# non-null count by each variables

df_q_panel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14500 entries, 0 to 14499
Data columns (total 42 columns):
 #   Column            Non-Null Count  Dtype        
---  ------            --------------  -----        
 0   country           14500 non-null  object       
 1   isocode           14500 non-null  object       
 2   quarter           14500 non-null  period[Q-DEC]
 3   oecd_q_rggexp     4340 non-null   float64      
 4   oecd_q_rgdp_dcu   4456 non-null   float64      
 5   oecd_q_rgdp_usd   4456 non-null   float64      
 6   oecd_q_ca         3534 non-null   float64      
 7   bis_q_ggdebt_m    3184 non-null   float64      
 8   bis_q_ggdebt_n    4414 non-null   float64      
 9   bis_q_reer_b      5700 non-null   float64      
 10  bis_q_reer_n      5850 non-null   float64      
 11  wb_q_ggdebt_d1    3124 non-null   float64      
 12  wb_q_ggdebt_d2    2877 non-null   float64      
 13  wb_q_ggdebt_d3    2186 non-null   float64      
 14  wb_q_ggdebt_d4    633 non-null    floa

In [22]:
# descriptive statistics

pd.options.display.float_format = '{:.2f}'.format
round(df_q_panel.describe(), 2)

Unnamed: 0,oecd_q_rggexp,oecd_q_rgdp_dcu,oecd_q_rgdp_usd,oecd_q_ca,bis_q_ggdebt_m,bis_q_ggdebt_n,bis_q_reer_b,bis_q_reer_n,wb_q_ggdebt_d1,wb_q_ggdebt_d2,...,fred_q_ca,ind_oecd,ind_g7,imf_q_ca_gdp_1,imf_q_ca_gdp_2,pds_q_ggdebt,pds_q_rggexp,pds_q_rgdp,pds_q_reer,pds_q_ca
count,4340.0,4456.0,4456.0,3534.0,3184.0,4414.0,5700.0,5850.0,3124.0,2877.0,...,4745.0,10440.0,2030.0,4111.0,4144.0,5587.0,6442.0,6389.0,7591.0,6086.0
mean,6542472575848.33,45445265002657.84,927588342684.5,-0.63,65.4,60.4,96.15,100.15,57.51,55.33,...,-0.1,1.0,1.0,-1.13,-4.87,55.95,4531295333599.56,38873114840111.78,97.43,-0.09
std,21341904505376.79,147422616429373.9,1994209164892.58,4.3,39.49,34.9,18.08,17.21,35.53,31.1,...,5.32,0.0,0.0,346.55,313.49,34.46,21612178259820.28,234662112287474.72,19.1,6.01
min,1365709967.41,8075649110.82,8956691869.0,-20.14,4.6,3.6,41.04,45.28,3.33,3.44,...,-56.27,1.0,1.0,-3256.93,-3282.51,3.4,29962.28,1293591273.0,32.42,-56.27
25%,36126826394.71,187550515950.61,126901687722.33,-2.93,38.7,35.1,89.56,90.45,33.54,33.22,...,-3.12,1.0,1.0,-4.08,-3.88,31.9,10800000000.0,55400000000.0,89.78,-3.26
50%,158832000000.0,760965321672.18,268975028885.86,-0.63,57.5,53.5,97.58,98.45,50.96,50.16,...,-0.5,1.0,1.0,-1.49,-1.18,48.5,53756900000.0,268000000000.0,97.94,-0.67
75%,460097500000.0,2316546626302.49,828358210890.16,1.71,84.0,75.8,102.43,106.97,74.23,72.41,...,2.72,1.0,1.0,2.35,2.83,71.79,195000000000.0,769000000000.0,103.85,2.55
max,180434000000000.0,1332547200000000.0,16533342932403.3,18.93,238.2,226.9,281.92,248.02,232.1,159.34,...,44.17,1.0,1.0,8618.03,8507.24,226.9,250407348113526.88,2920000000000000.0,281.92,44.17


## **2. export the data**

In [23]:
# generate the count matrix for quarterly panel data

df_q_panel_count_N = df_q_panel.groupby(df_q_panel['isocode']).count()
df_q_panel_count_T = df_q_panel.groupby(df_q_panel['quarter']).count()

In [24]:
# export the count matrix for quarterly panel data

df_q_panel_count_N.to_excel(excel_writer='df_q_panel_count_N.xlsx')
df_q_panel_count_T.to_excel(excel_writer='df_q_panel_count_T.xlsx')

In [25]:
# export the quarterly panel data

df_q_panel.to_excel(excel_writer='df_q_panel.xlsx')

## **3. references**

* https://data.oecd.org/ <br/>
* https://stats.bis.org/#ppq=XRU_D_24D;pv=1,2~4~1,0,0~both <br/>
* https://databank.worldbank.org/source/world-development-indicators/preview/on <br/>
* https://wits.worldbank.org/wits/wits/witshelp/content/codes/country_codes.htm <br/>
* https://ec.europa.eu/eurostat/en/web/main/data/database <br/>
* https://data.imf.org/?sk=4c514d48-b6ba-49ed-8ab9-52b0c1a0179b&sId=1409151240976 <br/>
* https://fred.stlouisfed.org/ <br/>