In [2]:
# Import necessary libraries
import logging

import os
from datetime import datetime
import tempfile

import matplotlib
import matplotlib.pyplot as plt
import mlflow
import mlflow.sklearn
import numpy as np
import pandas as pd
import statsmodels.api as sm
import xgboost as xgb
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.inspection import permutation_importance
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.neural_network import MLPRegressor
from statsmodels.tsa.api import VAR
#import shap



In [3]:
matplotlib.style.use('ggplot')
# Get the current date
today = datetime.today().date().isoformat()
today_str = today.replace('-', '')

In [4]:
df = pd.read_csv('data/data_input_monthly.csv')
df_dummy = pd.read_csv('data/financialStressDummy.csv')
df['date'] = pd.to_datetime(df['date'])
df_dummy['date'] = pd.to_datetime(df_dummy['date'])
df = pd.merge(df, df_dummy, on = ['iso2','date'], how = 'outer')
df = df.rename(columns={'dsrHouseholds_x':'dsrHouseholds', 'dsrHousehold_y':'dsrNfc'})
df.head()

Unnamed: 0,iso2,date,loansPnfs_yoy,resPropPrice,cpi_yoy,fx,totalCreditPnfs2GDP,totalCreditPnfsLCY,dsrPnfs,dsrHousehold_x,dsrNfc,policyRate,spotRate10year,EAtermSpread,spotRate2year,bankCreditPnfs,UStermSpread,financialStressIndex,financialStressDummy
0,AT,1949-01-01,,,20.999985,,,,,,,,,,,,,,
1,AT,1949-02-01,,,19.899912,,,,,,,,,,,,,,
2,AT,1949-03-01,,,16.699841,,,,,,,,,,,,,,
3,AT,1949-04-01,,,17.400087,,,,,,,,,,,,,,
4,AT,1949-05-01,,,17.599962,,,,,,,,,,,,,,


In [8]:
df['financialStressDummy'].replace(np.nan, 0)

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
25162    0.0
25163    0.0
25164    0.0
25165    0.0
25166    0.0
Name: financialStressDummy, Length: 25167, dtype: float64

In [23]:
print(df['iso2'].drop_duplicates().to_list())

['AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EA', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'MT', 'NL', 'PL', 'PT', 'RO', 'SE', 'SI', 'SK', 'US']


In [6]:
eu_iso2 = ['AT', 'BE', 'BG', 'HR', 'CY', 'CZ', 'DK', 'EE', 'FI', 'FR', 'DE', 
           'GR', 'HU', 'IE', 'IT', 'LV', 'LT', 'LU', 'MT', 'NL', 'PL', 'PT', 
           'RO', 'SK', 'SI', 'ES', 'SE']

In [7]:
df.columns

Index(['iso2', 'date', 'loansPnfs_yoy', 'resPropPrice', 'cpi_yoy', 'fx',
       'totalCreditPnfs2GDP', 'totalCreditPnfsLCY', 'dsrPnfs',
       'dsrHousehold_x', 'dsrNfc', 'policyRate', 'spotRate10year',
       'EAtermSpread', 'spotRate2year', 'bankCreditPnfs', 'UStermSpread',
       'financialStressIndex', 'financialStressDummy'],
      dtype='object')

In [30]:
df.loc[df[df['iso2']=='EA'].index,  'EAtermSpread']

6115         NaN
6116         NaN
6117         NaN
6118         NaN
6119         NaN
          ...   
6711   -0.323753
6712   -0.433149
6713   -0.346007
6714   -0.363233
6715   -0.339852
Name: EAtermSpread, Length: 601, dtype: float64

In [13]:
df_country = df[df['iso2'] == 'GR'].copy()
df_country = df_country.dropna(axis=1, how='all')
df_country.dropna()

Unnamed: 0,iso2,date,resPropPrice,cpi_yoy,fx,totalCreditPnfs2GDP,totalCreditPnfsLCY,bankCreditPnfs,financialStressIndex,financialStressDummy
11360,GR,2006-01-01,112.0074,3.246287,0.8263,86.0,175.739,138.628,0.0689,0.0
11363,GR,2006-04-01,113.0191,3.270973,0.815017,88.2,184.244,148.540667,0.0691,0.0
11366,GR,2006-07-01,115.5599,3.844323,0.788455,90.5,192.983,153.740667,0.0669,0.0
11369,GR,2006-10-01,118.6805,2.812318,0.792983,93.2,203.356,162.625,0.0459,0.0
11372,GR,2007-01-01,118.5488,2.726534,0.769353,93.8,207.578,170.045667,0.0591,0.0
11375,GR,2007-04-01,117.6458,2.529338,0.739896,96.4,217.169,181.122,0.0689,0.0
11378,GR,2007-07-01,119.5312,2.52149,0.729123,98.7,226.329,192.549333,0.0369,0.0
11381,GR,2007-10-01,117.2876,3.097095,0.702882,102.1,237.75,204.560667,0.0641,0.0
11384,GR,2008-01-01,116.9995,3.896544,0.679479,105.5,249.707,222.131667,0.0636,0.0
11387,GR,2008-04-01,114.2615,4.433826,0.635246,108.8,259.875,231.956333,0.1727,0.0


In [7]:
# Calculate percentage changes and add lag features
cols2calc = ['m1', 'm3sa', 'ip', 'retailSales']
df[[f'{c}_gr' for c in cols2calc]] = df[cols2calc].pct_change(12, fill_method=None) * 100
df['termSpread'] = df['gvtYld10y'] - df['ShortGovYield']

# Add lag features
df[[c + '_L1' for c in ['cpi_gr']][0]] = df['cpi_gr'].shift(1).copy()
df[[c + '_L2' for c in ['cpi_gr']][0]] = df['cpi_gr'].shift(2).copy()
df[[c + '_F1' for c in ['cpi_gr']][0]] = df['cpi_gr'].shift(-1).copy()
df[[c + '_F3' for c in ['cpi_gr']][0]] = df['cpi_gr'].shift(-3).copy()
df[[c + '_F12' for c in ['cpi_gr']][0]] = df['cpi_gr'].shift(-12).copy()

# Define columns to lag
cols2lag = ['pmiCom', 'policyRate', 'm3sa', 'm1', 'UnempRate',
            'termSpread', 'm1_gr', 'm3sa_gr', 'ip_gr', 'retailSales_gr']

# Create binary target variable based on cpi_gr being above a certain threshold
threshold = df['cpi_gr'].median()
df['cpi_gr_binary'] = (df['cpi_gr'] > threshold).astype(int)

In [None]:
# Define model specifications
mod_1 = ['cpi_gr'] + [f'cpi_gr_{l}' for l in ['L1', 'L2']]
mod_2 = mod_1 + ['pmiCom', 'policyRate', 'UnempRate', 'termSpread', 'm1_gr', 'm3sa_gr', 'ip_gr', 'retailSales_gr']
mod_3 = mod_1 + ['policyRate']
mod_4 = mod_1 + ['termSpread', 'UnempRate']
mod_5 = mod_1 + ['m1_gr']
mod_6 = mod_1 + ['m3sa_gr']
mod_7 = mod_1 + ['retailSales_gr', 'ip_gr']

# Store model specifications in a list
x_plot_ = [mod_1, mod_2, mod_3, mod_4, mod_5, mod_6, mod_7]

# Define the end date for train-test split
end_date = '2022-01-01'
end_date_dt = pd.to_datetime(end_date)

# Define the cycle column (binary target)
cycle = 'cpi_gr_binary'