# 6. STRESS TESTING

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy  as np
import warnings

In [2]:
warnings.filterwarnings('ignore')
plt.style.use('seaborn-whitegrid')

In [43]:
from utils import get_train_test, load_statistical_clusters, run_model, stress_test

## From now on we use the dataset with imputed missing values

In [4]:
df = pd.read_csv('imputed_MICE_forrest.csv').iloc[:, 1:]

## Merge the imputed dataset with info columns (first 8 columns from the orig dataset)
* note: the number of observations in both df is the same and at the exact same order, so we can concat 1:1 withou using any merging key

In [5]:
data = pd.read_pickle('Erasmus_data_stresstesting_2024.pickle').iloc[:, 2:20]

countries_of_interest = ['FI', 'NL', 'AT', 'BE', 'SE', 'DE', 'NO', 'DK', 'IS', 'IT', 'ES', 'PT', 'LV', 'RO', 'HR', 'LT', 'BG', 'SK', 'CZ', 'SI', 'HU', 'PL']

subset_df = data[data['country_code'].isin(countries_of_interest)]

In [6]:
info = subset_df.iloc[:, :7]
info = info.reset_index().iloc[:, 1:]

In [7]:
df = pd.concat([info, df], axis = 1)

## Now we obtain the macro and micro varibales and merge it with our latest df
* note: this time macros are merged based on key:*country code* and WoE_country and WoE_industry on keys:*country_code* and *industry_code*

In [8]:
df2 = pd.read_csv('full_data_woe_rid_-2.csv').iloc[:, 2:]
macro = df2.iloc[:, 61:]

In [9]:
macro = pd.concat([df2[['country_code', 'status_year']], macro], axis = 1)

In [10]:
woe = df2[['country_code', 'industry_code', 'WoE_country', 'WoE_industry']]

In [11]:
macro_uni = macro.drop_duplicates()

In [12]:
woe_uni = woe.drop_duplicates()

In [13]:
df_new = pd.merge(df, woe_uni, on = ['country_code', 'industry_code'])
df_new = pd.merge(df_new, macro_uni, on = ['country_code', 'status_year'])

## Variable *df_new* is final dataframe with info columns, firm characteristics, WoE columns and macro variables

In [14]:
df_new

Unnamed: 0,country_code,industry_code,size_class,status_year,status_date_latest,status_latest,default_indicator,intangible_fixed_assets_0,intangible_fixed_assets_1,tangible_fixed_assets_0,...,EURxTRY,hh_debt,corp_debt,govt_debt,3m_yield,10y_yield,oil,gas,gold,copper
0,DE,G,SME,2020,16991231,non_default,0,2905.0,9624.000000,65007.0,...,9.07982,57.055937,119.453344,67.986011,-0.425150,-0.511024,32.84,3.4902,1898.36,6352.5562
1,DE,G,SME,2020,16991231,non_default,0,1.0,1.000000,266426.0,...,9.07982,57.055937,119.453344,67.986011,-0.425150,-0.511024,32.84,3.4902,1898.36,6352.5562
2,DE,G,SME,2020,16991231,non_default,0,310.0,0.000000,596041.0,...,9.07982,57.055937,119.453344,67.986011,-0.425150,-0.511024,32.84,3.4902,1898.36,6352.5562
3,DE,G,SME,2020,16991231,non_default,0,3269.0,3714.000000,167798.0,...,9.07982,57.055937,119.453344,67.986011,-0.425150,-0.511024,32.84,3.4902,1898.36,6352.5562
4,DE,G,SME,2020,16991231,non_default,0,68.0,262.000000,1053093.0,...,9.07982,57.055937,119.453344,67.986011,-0.425150,-0.511024,32.84,3.4902,1898.36,6352.5562
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1075921,LV,L,SME,2004,16991231,non_default,0,66871.0,90739.000000,1936418.0,...,1.82091,19.996895,88.332385,13.744445,4.231875,3.875000,234.51,692.6744,438.45,2323.6943
1075922,LV,J,SME,2004,16991231,non_default,0,19037.0,47858.105616,203540.0,...,1.82091,19.996895,88.332385,13.744445,4.231875,3.875000,234.51,692.6744,438.45,2323.6943
1075923,LV,R,SME,2004,16991231,non_default,0,5228.0,47858.105616,2498256.0,...,1.82091,19.996895,88.332385,13.744445,4.231875,3.875000,234.51,692.6744,438.45,2323.6943
1075924,LV,A,SME,2001,16991231,non_default,0,0.0,47858.105616,2271159.0,...,1.29230,7.683075,83.443015,17.459588,6.861667,5.408333,114.44,990.0938,278.95,1678.1713


## In this step we assable the clusters based on statistical anlysis done in FCS_code2.5_Clustering.ipynb

In [15]:
clusters = load_statistical_clusters(df_new)

In [16]:
test = pd.read_csv('full_stress_data_base.csv').iloc[:, 1:]
test2 = pd.read_csv('full_stress_data-2.csv').iloc[:, 1:]
testD = pd.read_csv('DynamicLogitStress.csv').iloc[:, 2:].drop(['size_class'], axis = 1)

# Reverse Stress Testing

# War Scenario

We model the case if larger military conflict would happen in Europe. In this case we expect that government debt would skyrocet as Europe countries would have to found the war, at the same time, we would see a spike in inflation and gdp grotwh would plummet. Short terms would increase as well. 

In [17]:
df_new.iloc[:, 58:].columns

Index(['WoE_country', 'WoE_industry', 'gdp_growth', 'inflation_growth',
       'unemployment', 'EURxUSD', 'EURxJPY', 'EURxCNY', 'EURxINR', 'EURxGBP',
       'EURxNOK', 'EURxCHF', 'EURxTRY', 'hh_debt', 'corp_debt', 'govt_debt',
       '3m_yield', '10y_yield', 'oil', 'gas', 'gold', 'copper'],
      dtype='object')

### Stressing Variables

In [18]:
govt = test2[['country_code', 'status_year', 'govt_debt']]
govt_df = govt.drop_duplicates().sort_values('country_code')
govt_growth = {2023 : 1.2, 2024 : 1.25, 2025 : 1.3}
govt_df['govt_stressed'] = govt_df.apply(lambda row: row['govt_debt'] * govt_growth.get(row['status_year'], 1), axis=1)


In [19]:
pi = test2[['country_code', 'status_year', 'inflation_growth']]
pi_df = pi.drop_duplicates().sort_values('country_code')
pi_growth = {2023 : 1.1, 2024 : 1.2, 2025 : 1.25}
pi_df['pi_stressed'] = pi_df.apply(lambda row: row['inflation_growth'] * pi_growth.get(row['status_year'], 1), axis=1)


In [20]:
gdp = test2[['country_code', 'status_year', 'gdp_growth']]
gdp_df = gdp.drop_duplicates().sort_values('country_code')
gdp_growth = {2023 : 1.05, 2024 : 1.1, 2025 : 1.2}
gdp_df['gdp_stressed'] = gdp_df.apply(lambda row: row['gdp_growth'] * gdp_growth.get(row['status_year'], 1), axis=1)


In [21]:
m3 = test2[['country_code', 'status_year', '3m_yield']]
m3_df = m3.drop_duplicates().sort_values('country_code')
m3_growth = {2023 : 1.1, 2024 : 1.2, 2025 : 1.3}
m3_df['3m_stressed'] = m3_df.apply(lambda row: row['3m_yield'] * m3_growth.get(row['status_year'], 1), axis=1)


In [22]:
oil = test2[['country_code', 'status_year', 'oil']]
oil_df = oil.drop_duplicates().sort_values('country_code')
oil_growth = {2023 : 1.8, 2024 : 1.5, 2025 : 1.45}
oil_df['oil_stressed'] = oil_df.apply(lambda row: row['oil'] * oil_growth.get(row['status_year'], 1), axis=1)


In [23]:
gas = test2[['country_code', 'status_year', 'gas']]
gas_df = gas.drop_duplicates().sort_values('country_code')
gas_growth = {2023 : 1.8, 2024 : 1.5, 2025 : 1.45}
gas_df['gas_stressed'] = gas_df.apply(lambda row: row['gas'] * gas_growth.get(row['status_year'], 1), axis=1)

In [24]:
test_RVT = test

In [25]:
test_RVT = test_RVT.merge(gdp_df[['country_code', 'status_year', 'gdp_stressed']], on = ['country_code', 'status_year'])

In [26]:
test_RVT = test_RVT.merge(pi_df[['country_code', 'status_year', 'pi_stressed']], on = ['country_code', 'status_year'])

In [27]:
test_RVT = test_RVT.merge(govt_df[['country_code', 'status_year', 'govt_stressed']], on = ['country_code', 'status_year'])

In [28]:
test_RVT = test_RVT.merge(m3_df[['country_code', 'status_year', '3m_stressed']], on = ['country_code', 'status_year'])

In [29]:
test_RVT = test_RVT.merge(oil_df[['country_code', 'status_year', 'oil_stressed']], on = ['country_code', 'status_year'])

In [30]:
test_RVT = test_RVT.merge(gas_df[['country_code', 'status_year', 'gas_stressed']], on = ['country_code', 'status_year'])

In [31]:
test_RVT['govt_debt'] = test_RVT['govt_stressed']

In [32]:
test_RVT['gdp_growth'] = test_RVT['gdp_stressed']

In [33]:
test_RVT['inflation_growth'] = test_RVT['pi_stressed']

In [34]:
test_RVT['3m_yield'] = test_RVT['3m_stressed']

In [35]:
test_RVT['oil'] = test_RVT['oil_stressed']

In [36]:
test_RVT['gas'] = test_RVT['gas_stressed']

In [37]:
test_RVT = test_RVT.iloc[:, :-6]

In [38]:
test_RVT

Unnamed: 0,country_code,industry_code,status_year,status_date_latest,status_latest,default_indicator,intangible_fixed_assets_0,intangible_fixed_assets_1,tangible_fixed_assets_0,tangible_fixed_assets_1,...,EURxTRY,hh_debt,corp_debt,govt_debt,3m_yield,10y_yield,oil,gas,gold,copper
0,DE,G,2023,20160401,non_default,0,1.0,1.000000,77347.0,9.290600e+04,...,17.15,59.547265,111.313833,90.008511,0.4653,1.99,370.911713,2426.710477,1070.87,4709.59
1,DE,G,2023,20181030,non_default,0,96855.0,127632.000000,285802.0,3.118150e+05,...,17.15,59.547265,111.313833,90.008511,0.4653,1.99,370.911713,2426.710477,1070.87,4709.59
2,DE,G,2023,16991231,non_default,0,932.0,854302.483872,2139690.0,3.044565e+06,...,17.15,59.547265,111.313833,90.008511,0.4653,1.99,370.911713,2426.710477,1070.87,4709.59
3,DE,G,2023,16991231,non_default,0,5.0,5.000000,272832.0,3.075800e+05,...,17.15,59.547265,111.313833,90.008511,0.4653,1.99,370.911713,2426.710477,1070.87,4709.59
4,DE,G,2023,20160825,non_default,0,21127.0,928298.200777,99691.0,1.573224e+06,...,17.15,59.547265,111.313833,90.008511,0.4653,1.99,370.911713,2426.710477,1070.87,4709.59
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178879,LV,B,2025,16991231,non_default,0,320.0,931967.301693,178057.0,9.332394e+05,...,17.15,8.127991,8.582985,49.273954,0.7280,3.65,274.521798,1954.850106,1070.87,4709.59
178880,LV,S,2025,20150408,non_default,0,0.0,0.000000,686745.0,7.517590e+05,...,17.15,8.127991,8.582985,49.273954,0.7280,3.65,274.521798,1954.850106,1070.87,4709.59
178881,LV,S,2025,16991231,non_default,0,479.0,788296.194708,1306865.0,2.056950e+06,...,17.15,8.127991,8.582985,49.273954,0.7280,3.65,274.521798,1954.850106,1070.87,4709.59
178882,LV,S,2025,16991231,non_default,0,358049.0,207365.000000,4862823.0,4.452594e+06,...,17.15,8.127991,8.582985,49.273954,0.7280,3.65,274.521798,1954.850106,1070.87,4709.59


### Results

In [40]:
T_clusters = load_statistical_clusters(test_RVT) # already withou WOE
T_clusters2 = load_statistical_clusters(test2) # with WOE

In [494]:
for cluster in range(len(clusters)):
        y = clusters[cluster]['default_indicator']
        X = clusters[cluster].iloc[:, 7:].drop(['WoE_country', 'WoE_industry'], axis=1)
        data = get_train_test(X, y, use_SMOTE=True)
        m = run_model(data, type = 'RF', n_estimators=50, max_depth=3)
        print(f'Cluster {cluster + 1}')
        prob = stress_test(m, T_clusters[cluster], X, y)
        prob = stress_test(m, T_clusters2[cluster].drop(['WoE_country', 'WoE_industry'], axis=1), X, y)

Results for RF
------------------------------
Confussion Matrx:
 [[70510  1839]
 [  211   317]]

AUC: 0.9039

Feature Importance:

Cluster 1
Total default rate over 3 years: 2.399
Default Rate in the train data: 0.0072451239492512
Stress Test increased PD by multiple of 3.3105510167281604
Number of defaults:
Year 2023 : 393
Year 2024 : 0
Year 2025 : 0
Total default rate over 3 years: 2.386
Default Rate in the train data: 0.0072451239492512
Stress Test increased PD by multiple of 3.2937034288567704
Number of defaults:
Year 2023 : 391
Year 2024 : 0
Year 2025 : 0
Results for RF
------------------------------
Confussion Matrx:
 [[9023  132]
 [  28   14]]

AUC: 0.8669

Feature Importance:

Cluster 2
Total default rate over 3 years: 0.714
Default Rate in the train data: 0.004523410825739947
Stress Test increased PD by multiple of 1.5782812929039345
Number of defaults:
Year 2023 : 12
Year 2024 : 0
Year 2025 : 2
Total default rate over 3 years: 0.51
Default Rate in the train data: 0.0045234108