In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from imblearn.over_sampling import RandomOverSampler
from configs.enums import RiskClassifications
import numpy as np
import os
import warnings
from configs.enums import Column
from machine_learning.utils import split_data, scale_dataset
from machine_learning.neural_networks.utils import plot_history
from machine_learning.neural_networks.fnn import fnn_model, train_fnn_model
from configs.data import MACHINE_LEARNING_DATASET_PATH
import pandas as pd
from tensorflow.keras.models import load_model
warnings.simplefilter(action='ignore', category=FutureWarning)

## 1. Loading the dataset

In [None]:
df = pd.read_excel(MACHINE_LEARNING_DATASET_PATH)
x_feature = Column.GOV_INSTABILITY
y_feature = Column.GDP_PC

df_2_features = df[[x_feature, y_feature, Column.COUNTRY_RISK]]
_, _, test = split_data(df_2_features)
scaled_test, x_test, test_labels = scale_dataset(test, oversample=False)

## 2. FNN

In [None]:
def subplot_scatter(ax, data: pd.DataFrame, risk_col: Column | str, title: str) -> None:
    low = data[data[risk_col] == 1]
    med = data[data[risk_col] == 2]
    hig = data[data[risk_col] == 3]
    
    l = ax.scatter(low[x_feature], low[y_feature], color="green")
    m = ax.scatter(med[x_feature], med[y_feature], color="yellow")
    h = ax.scatter(hig[x_feature], hig[y_feature], color="red")
    
    ax.legend([l, m, h], ("low", "medium", "high"), loc="best")
    ax.grid(True)
    ax.set_xlabel(x_feature.get_description())
    ax.set_ylabel(y_feature.get_description())
    ax.title.set_text(title)

In [None]:
def tune_fnn_model(df, layers, units, dropout_rates, learning_rates):
    least_val_loss = float('inf')
    least_val_loss_params = []
    
    i = 1 
    max = len(layers) * len(units) * len(dropout_rates) * len(learning_rates)
    for l in layers:
        for u in units:
            for dr in dropout_rates:
                for lr in learning_rates:  
                    print(f"[{i}/{max}] Layers: {l}; Units: {u}; Dropout rate: {dr}; Learning rate: {lr};")
                    
                    model, _, _ = train_fnn_model(
                        df, 
                        epochs=100, 
                        patience=20, 
                        layers=l, 
                        units=u,
                        dropout_rate=dr,
                        learning_rate=lr,
                        verbose=0,
                        disable_save=True,
                        disable_plot_history=True,
                        disable_print_report=True)
                    
                    val_loss, val_acc = model.evaluate(x_test, test_labels)
                    print(f"Loss: {val_loss}; Accuracy: {val_acc};")
                    if val_loss < least_val_loss:
                        model.save(os.path.join(os.environ["OUTPUT_PATH"], "Risk_factor_dnn_model_per_feature.keras"))
                        least_val_loss = val_loss
                        least_val_loss_params = [l, u, dr, lr]
                        
                    i += 1
                        
    print(least_val_loss_params)
    print(least_val_loss)

### 2.1 Tuning

In [None]:
tune_fnn_model(
        df=df_2_features,
        layers=[2, 3, 4, 5], # 1, 2, 3
        units=[32, 64, 96, 128], # 8, 16, 32, 64
        dropout_rates=[0.2],
        learning_rates=[0.0015, 0.00175], # 0.001, 0.0015, 0.002  
    )

### 2.2 Manual tuning

In [None]:
train_fnn_model(
                df_2_features, 
                epochs=100, 
                patience=20, 
                layers=3, 
                units=128,
                dropout_rate=0.2,
                learning_rate=0.0015,
                verbose=2)

### 2.3 Plotting

In [None]:
model_file = "fnn_model.keras" 
    # "gov-inst_gdp-pc_3_128_0.2_0.00175.keras"
    # "pol_gdp-pc_4_128_0.2_0.0015.keras"
model = load_model(os.path.join(os.environ["OUTPUT_PATH"], model_file))

y_pred = model.predict(x_test).argmax(axis=1)
print(classification_report(test_labels, y_pred))

result = test 
result["predicted_country_risk"] = y_pred

fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 10))
subplot_scatter(ax1, result, "predicted_country_risk", "Predicted")
subplot_scatter(ax2, result, "country_risk", "Actual")

wrong = result[y_pred != test_labels]
subplot_scatter(ax3, wrong, "predicted_country_risk", "Wrong - Predicted")
subplot_scatter(ax4, wrong, "country_risk", "Wrong - Actual")

plt.show()