#  Author: Eero Tölö 

This code makes the data transformations for the input data used in the article. 

Eero Tölö, Predicting systemic financial crises with recurrent neural networks,
Journal of Financial Stability, Volume 49, 2020, 100746.
https://doi.org/10.1016/j.jfs.2020.100746

http://www.sciencedirect.com/science/article/pii/S1572308920300243

The newest data file can be downloaded from:

http://www.macrohistory.net/data/

I used version JSTdatasetR3.csv, which is available upon request from me.

In [None]:
import pandas as pd
import numpy as np



Using TensorFlow backend.


## Import data

In [None]:
# Upload the data file in Google Colab (if you run the code locally, you may skip this cell)
from google.colab import files
uploaded = files.upload()

Saving JSTdatasetR3.csv to JSTdatasetR3.csv


In [None]:
# Load the raw data file into dataframe
df = pd.read_csv('JSTdatasetR3.csv')
df.head()

Unnamed: 0,year,country,iso,ifs,pop,rgdpmad,rgdppc,rconpc,gdp,iy,cpi,ca,imports,exports,narrowm,money,stir,ltrate,stocks,debtgdp,revenue,expenditure,xrusd,crisisJST,tloans,tmort,thh,tbus,hpnom
0,1870,Australia,AUS,193,1775.0,3273.239437,13.836157,21.449734,208.78,0.109266,2.708333,-6.147594,36.0,37.0,23.3,54.3,4.88,4.911817,0.304609,0.172568,,,0.366946,0,54.792,1.68,1.68,53.112,0.492253
1,1871,Australia,AUS,193,1675.0,3298.507463,13.936864,19.930801,211.56,0.104579,2.666667,5.260774,34.0,46.0,27.2,59.5,4.6,4.844633,0.280119,0.191799,,,0.369146,0,53.748,1.766,1.766,51.982,0.469877
2,1872,Australia,AUS,193,1722.0,3553.426249,15.044247,21.085006,227.4,0.130438,2.541667,7.867636,38.0,53.0,36.2,68.5,4.6,4.73735,0.323705,0.15492,,,0.369239,0,55.822,1.47,1.47,54.352,0.484794
3,1873,Australia,AUS,193,1769.0,3823.629169,16.219443,23.25491,266.54,0.124986,2.541667,-11.047833,49.0,50.0,38.6,73.7,4.4,4.671958,0.3743,0.142692,,,0.362405,0,65.38,1.364,1.364,64.016,0.469877
4,1874,Australia,AUS,193,1822.0,3834.796926,16.268228,23.45805,287.58,0.14196,2.666667,-5.563959,49.0,54.0,37.9,79.3,4.5,4.653317,0.394924,0.194322,,,0.372223,0,71.478,1.434,1.434,70.044,0.566836


## Function for initializing the data (includes making stationary, dropping NaN values and picking the interesting variables)

In [None]:

def init_data(df,start_year = 1970, y_shift = 2, normalize = False):
  # Define real gdp as nominal gdp / price level and real house price as nominal house price / price level
  df2 = df
  df2['tloansgdp'] = 100 * df2['tloans'] / df2['gdp']
  df2['ca/gdp'] = 100 * df2['ca'] / df2['gdp']
  df2['rgdp'] = df2['gdp'] / df2['cpi']
  df2['rhp'] = df2['hpnom'] / df2['cpi']
  df2['rsp'] = df2['stocks'] / df2['cpi']
  df2['rtloans'] = df2['tloans'] / df2['cpi']
  df2['rtmort'] = df2['tmort'] / df2['cpi']
  df2['rthh'] = df2['thh'] / df2['cpi']
  df2['rtbus'] = df2['tbus'] / df2['cpi']
  # Define annual growth rates
  df2['tloansgdp_g'] = 100 * ( df2['tloansgdp'] / df2['tloansgdp'].shift(+y_shift) - 1 )
  df2['cpi_g'] = 100 * ( df2['cpi'] / df2['cpi'].shift(+y_shift) - 1 )
  df2['rsp_g'] = 100 * ( df2['rsp'] / df2['rsp'].shift(+y_shift) - 1 )
  df2['debtgdp_g'] = 100 * ( df2['debtgdp'] / df2['debtgdp'].shift(+y_shift) - 1 )
  df2['rgdp_g'] = 100 * ( df2['rgdp'] / df2['rgdp'].shift(+y_shift) - 1 )
  df2['rhp_g'] = 100 * ( df2['rhp'] / df2['rhp'].shift(+y_shift) - 1 )
  df2['rtloans_g'] = 100 * ( df2['rtloans'] / df2['rtloans'].shift(+y_shift) - 1 )
  df2['rtmort_g'] = 100 * ( df2['rtmort'] / df2['rtmort'].shift(+y_shift) - 1 )
  df2['rthh_g'] = 100 * ( df2['rthh'] / df2['rthh'].shift(+y_shift) - 1 )
  df2['rtbus_g'] = 100 * ( df2['rtbus'] / df2['rtbus'].shift(+y_shift) - 1 )
  [n,k] = df2.shape
  for i in range(2,n):
    if df2.at[i, 'iso'] != df2.at[i-y_shift, 'iso']:
      df2.at[i, 'tloansgdp_g'] = np.nan
      df2.at[i, 'cpi_g'] = np.nan
      df2.at[i, 'rsp_g'] = np.nan
      df2.at[i, 'debtgdp_g'] = np.nan
      df2.at[i, 'rgdp_g'] = np.nan
      df2.at[i, 'rhp_g'] = np.nan
      df2.at[i, 'rtloans_g'] = np.nan
      df2.at[i, 'rtmort_g'] = np.nan
      df2.at[i, 'rthh_g'] = np.nan
      df2.at[i, 'rtbus_g'] = np.nan
  # Keep only a set of variables
  varlist = ['year','iso','cpi_g','rgdp_g','ca/gdp','debtgdp_g','tloansgdp_g','rsp_g','rhp_g','rtloans_g','rtmort_g','rthh_g','rtbus_g','ltrate','stir','crisisJST']
  df2=df2[varlist]
  # And drop NaN values.
  df2 = df2[df2['tloansgdp_g'].notna()]
  
  df2 = df2[df2['rgdp_g'].notna()]
  df2 = df2[df2['ca/gdp'].notna()]
  df2 = df2[df2['rhp_g'].notna()]
  df2 = df2[df2['rsp_g'].notna()]
  df2 = df2[df2['rthh_g'].notna()]  
  df2 = df2[df2['cpi_g'].notna()]
  df2 = df2[df2['debtgdp_g'].notna()]
  df2 = df2[df2['rtloans_g'].notna()]
  df2 = df2[df2['rtmort_g'].notna()]
  df2 = df2[df2['rtbus_g'].notna()]
  df2 = df2[df2['ltrate'].notna()]
  df2 = df2[df2['stir'].notna()]
  df2 = df2[df2.year >= start_year]
  
  for var in varlist[2:-1]:
    mean = np.mean(df2[var])
    std = np.std(df2[var])
    print(var,'mean',mean,'std',std)
    if normalize:
      df2[var] = (df2[var]-mean)/std*10
    
  return df2


In [None]:
# Check whether a given year should be filtered out,
# according to Schularick and Taylor's exclusion principle
def jst_check(year,country):  
  if(year>=1914 and year <= 1919): #Exclude WW1
    return True
  if(year>=1939 and year <= 1947):      #Exclude WW2
    return True
  if(country == 'DEU' and (year>=1920 and year <= 1925)): #Exclude post WW1 German crisis
    return True
  return False
  

## A bit heavy function for the simple task of defining the target variable.

In [None]:
def add_precrisis(df2,fcast_horizon=1,postdrop=1, stfilter = False):
  # Prehorizon = how many years in advance try to forecat the crisis, default is 1 year
  # Postdrop = how many years starting with the crisis to exclude from the analysis, default is to drop 1 year
  # which already ensures a decent cut before the model tries to predict crisis again (1 + number of lags for the features)
  df2.reset_index(drop=True,inplace=True)
  [n,k] = df2.shape
  df2['precrisis'] = 0
  df2['stfilt'] = 0
  df2.head()
  for i in range(1,n):
    if(jst_check(df2.at[i, 'year'],df2.at[i, 'iso'])):
       df2.at[i, 'stfilt'] == 1
    for j in range(1,fcast_horizon+1):
      if(i+j<n):
        if (df2.at[i, 'iso'] == df2.at[i+j, 'iso']) and (df2.at[i+j, 'crisisJST'] == 1):
          if j==fcast_horizon:
            df2.at[i, 'precrisis'] =  1
          if j!=fcast_horizon:
            df2.at[i, 'precrisis'] = -1 # -1 means exclude periods between the crisis and the point of forecast
    for j in range(0,postdrop):
      if(i-j>=0):
        if (df2.at[i, 'iso'] == df2.at[i-j, 'iso']) and (df2.at[i-j, 'crisisJST'] == 1):
          df2.at[i, 'precrisis'] = -1 # means dropping years immediately following the crisis, default drops the crisis year only.

  df2 = df2[df2.precrisis!=-1]
  
  if(stfilter):
    # Drop 1914-19, 1939-47 and "Germany 1920-25"
    df2 = df2[df2.stfilt!=1]
  df2.reset_index(drop=True,inplace=True)
  return df2

## Run the following code to confirm that the data processing works.

In [None]:
df2=init_data(df = df, start_year = 1970, y_shift = 1, normalize = True) # initializes the data
df2=add_precrisis(df2,fcast_horizon=2,postdrop=1) # defines target variable
pd.set_option('display.max_rows', 10)
display(df2)

[number_of_crises, tranquil_years] = [np.sum(df2['precrisis']==1),np.sum(df2['precrisis']==0)]
print('Number of crises:',number_of_crises, 'Tranquil years:',tranquil_years)

cpi_g mean 4.279757604657745 std 4.171036287884855
rgdp_g mean 2.448680833040614 std 2.9422529689972694
ca/gdp mean 0.5509198627352472 std 4.398831049450754
debtgdp_g mean 2.6360613964796813 std 10.644301249642599
tloansgdp_g mean 1.7313030811287393 std 4.964514641654108
rsp_g mean 4.795797833879647 std 21.973916576276352
rhp_g mean 2.0862222736706144 std 7.124422012947581
rtloans_g mean 4.2241475112027675 std 5.963996183076989
rtmort_g mean 5.600261726341057 std 6.7851295510877865
rthh_g mean 5.63111138136095 std 7.662861272653756
rtbus_g mean 3.4727506128615033 std 8.292772083636851
ltrate mean 6.857459806288039 std 3.888000183654738
stir mean 5.8580043823505505 std 4.581161240588056


Unnamed: 0,year,iso,cpi_g,rgdp_g,ca/gdp,debtgdp_g,tloansgdp_g,rsp_g,rhp_g,rtloans_g,rtmort_g,rthh_g,rtbus_g,ltrate,stir,crisisJST,precrisis,stfilt
0,1970,AUS,-0.896067,20.351324,-5.598320,-7.267344,-9.671152,-11.194618,5.899360,1.481313,-0.636207,0.245544,1.468672,-0.542163,-1.039775,0,0,0
1,1971,AUS,4.277785,2.342775,-5.156454,-9.621237,-3.644166,-8.977320,6.098612,-1.955827,-3.069745,-2.903995,-0.749086,-0.370696,-0.974289,0,0,0
2,1972,AUS,3.800555,6.031054,0.672938,-7.248748,-2.770068,8.947865,2.784488,0.620657,-0.687525,2.541596,-0.790041,-2.638357,-3.359056,0,0,0
3,1973,AUS,12.433690,-0.986132,0.038422,-12.805070,23.674109,-18.063522,11.070001,19.634154,0.635950,11.952202,15.885908,0.195148,-1.409041,0,0,0
4,1974,AUS,25.965708,9.579452,-9.224532,-14.733649,8.789493,-20.370458,4.026667,12.506612,-0.990719,-0.011019,14.380367,5.604956,7.009494,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
683,2012,USA,-5.253802,-1.600300,-7.508619,1.535819,-5.240612,3.303283,-5.885030,-5.254760,-10.354627,-8.952744,2.772825,-13.001439,-12.481561,0,0,0
684,2013,USA,-6.777800,-2.070124,-7.057096,-1.444116,-4.054376,9.323630,5.297013,-4.478920,-7.673589,-7.712022,1.847766,-11.591125,-12.547047,0,0,0
685,2014,USA,-6.337245,0.952402,-5.812026,-0.619840,-1.229142,3.189917,2.359014,-0.576054,-5.846459,-5.461751,5.747503,-11.102434,-12.590704,0,0,0
686,2015,USA,-9.984926,4.788504,-5.941879,-4.661314,3.068058,-2.238979,4.758648,5.052686,1.437869,-0.015742,7.814387,-12.144101,-12.503390,0,0,0


Number of crises: 24 Tranquil years: 664
