In [None]:
import pandas as pd
from sklearn.metrics import classification_report
import os
import warnings
from configs.data import MACHINE_LEARNING_DATASET_PATH
from machine_learning.utils import split_data, scale_dataset
from machine_learning.neural_networks.utils import get_tensorflow_version, plot_history
from machine_learning.neural_networks.fnn import train_fnn_model
from machine_learning.neural_networks.ann import train_ann_model
from tensorflow.keras.models import load_model
from configs.enums import Column, RiskClassifications
warnings.simplefilter(action='ignore', category=FutureWarning)

## 1. Loading the dataset

In [None]:
df = pd.read_excel(MACHINE_LEARNING_DATASET_PATH)
train, valid, test = split_data(df)
test, x_test, test_labels = scale_dataset(test, oversample=False)

## 2. FNN

In [None]:
def tune_fnn_model(df, layers, units, dropout_rates, learning_rates, patience=[10, 20]):
    import time
    from datetime import timedelta
    
    least_val_lost_file_name = f"tuning_least_val_loss.fnn.keras"
    
    to_hh_mm_ss = lambda seconds: str(timedelta(seconds=seconds)).rsplit(".")[0]
    
    least_val_loss = float('inf')
    least_val_loss_params = []
    eta = None
    
    epochs = 200
    time_past = 0
    
    i = 1 
    max = len(layers) * len(units) * len(dropout_rates) * len(learning_rates) * len(patience)
    
    print("[prev: N/A] [eta: TBD]")
    
    for l in layers:
        for u in units:
            for dr in dropout_rates:
                for lr in learning_rates: 
                    for pt in patience:
                        start_time = time.time()

                        print(f"[{i}/{max}] Layers: {l}; Units: {u}; Dropout rate: {dr}; Learning rate: {lr}; Patience: {pt};")
                        
                        # TODO: add batch_size param to tune
                        model, history, num_classes = train_fnn_model(
                            df, 
                            epochs=epochs, 
                            patience=pt, 
                            layers=l, 
                            units=u,
                            dropout_rate=dr,
                            learning_rate=lr,
                            verbose=0,
                            disable_save=True,
                            disable_plot_history=True,
                            disable_print_report=True)
                        
                        val_loss, val_acc = model.evaluate(x_test, test_labels)
                        print(f"Loss: {val_loss}; Accuracy: {val_acc};")
                        if val_loss < least_val_loss:
                            model.save(os.path.join(os.environ["OUTPUT_PATH"], least_val_lost_file_name))
                            least_val_loss = val_loss
                            least_val_loss_params = [l, u, dr, lr, pt]
                            
                        duration = time.time() - start_time
                        time_past += duration
                        avg_duration = time_past / i
                        eta = time_past + avg_duration * (max - i)
                        
                        print(f"\n[eta: {to_hh_mm_ss(time_past)}/{to_hh_mm_ss(eta)}] [prev: {to_hh_mm_ss(duration)}] [avg: {to_hh_mm_ss(avg_duration)}]")
                            
                        i += 1
    
    l, u, dr, lr, pt = least_val_loss_params
    print("\nLeast validation loss:")              
    print(f"\tParams:\t {{Layers: {l}; Units: {u}; Dropout rate: {dr}; Learning rate: {lr}; Patience: {pt};}}")
    print("\tLoss:\t", least_val_loss)
    
    best_model_file_name = f"tf-{get_tensorflow_version()}_Adam_{l}_{u}_{dr}_{lr}_{epochs}_{pt}.fnn.keras"
    os.rename(
        os.path.join(os.environ["OUTPUT_PATH"], least_val_lost_file_name), 
        os.path.join(os.environ["OUTPUT_PATH"], best_model_file_name))
    print(f"\nModel has been saved as '{best_model_file_name}'")
    
    plot_history(history, num_classes)
    
    y_pred = model.predict(x_test).argmax(axis=1)
    print(classification_report(test_labels, y_pred))
    
    return model, history, num_classes


### 2.1 Tuning

In [None]:
tune_fnn_model(
        df=df,
        layers=[7, 8, 9, 10], # 1, 2, 3, 4, 5, 6, 7
        units=[224, 256, 288, 320, 352], # 8, 16, 32, 64, 96, 128, 160, 192
        dropout_rates=[0.2], # 0.2, 0.3, 0.4, 0.5
        learning_rates=[0.00125, 0.0015, 0.00175], # 0.0001, 0.0005, 0.00075, 0.001, 0.00125, 0.0015, 0.00175, 0.002  
        patience=[10, 20],
    )

# Least validation loss:
# 	Params:  [8, 256, 0.2, 0.0015, 20]
# 	Loss:  0.1532142609357834

### 2.2 Loading from a file

In [None]:
model_file = "Risk_factor_fnn_model.keras" # "Adam_6_192_0.2_0.00175_0.1515021026134491_100.keras" # "Risk_factor_dnn_model.keras"
model = load_model(os.path.join(os.environ["OUTPUT_PATH"], model_file))

y_pred = model.predict(x_test).argmax(axis=1)
print(classification_report(test_labels, y_pred))

### 2.3 Manual tuning

In [None]:
model, _ = train_fnn_model(
                        df, 
                        epochs=100, 
                        patience=20, 
                        layers=6, 
                        units=192,
                        dropout_rate=0.2,
                        learning_rate=0.00175)

### 2.4 Shap

In [None]:
import sys
python_version = sys.version.split("|")[0].rstrip()

if python_version == "3.11.6":
    import shap
    print("Shap imported.")
else:
    print("Shap could not be installed on this docker image ('tensorflow-2150'), build 'tensorflow-2140' from docker compose.")

In [None]:
def shapify(data: pd.DataFrame):
    train, valid, test = split_data(data)

    train, x_train, train_labels = scale_dataset(train, oversample=True)
    valid, x_val, val_labels = scale_dataset(valid, oversample=False)
    test, x_test, test_labels = scale_dataset(test, oversample=False)
    
    explainer = shap.KernelExplainer(model.predict, x_train)
    shap_values = explainer.shap_values(x_test, nsamples=100) # nsamples { default = 2 * X.shape[1] + 2048 = 2066 }
    # explainer.save()
    
    return explainer, shap_values, x_test

In [None]:
explainer, shap_values, shap_x_test = shapify(df[:1000])
shap.summary_plot(shap_values, shap_x_test, feature_names=df.columns[Column.COUNTRY_RISK], class_names=RiskClassifications.get_names())

## 3. Artificial Neural Network (ANN)

In [None]:
def tune_ann_model(df, units, dropout_rates, learning_rates):
    least_val_loss = float('inf')
    least_val_loss_params = []
    
    i = 1 
    max = len(units) * len(dropout_rates) * len(learning_rates)
    for u in units:
        for dr in dropout_rates:
            for lr in learning_rates:  
                print(f"[{i}/{max}] Units: {u}; Dropout rate: {dr}; Learning rate: {lr};")
                
                model, _ = train_ann_model(
                    df, 
                    epochs=100, 
                    patience=20, 
                    units=u,
                    dropout_rate=dr,
                    learning_rate=lr,
                    verbose=0,
                    disable_save=True,
                    disable_plot_history=True,
                    disable_print_report=True)
                
                val_loss, val_acc = model.evaluate(x_test, test_labels)
                print(f"Loss: {val_loss}; Accuracy: {val_acc};")
                if val_loss < least_val_loss:
                    model.save(os.path.join(os.environ["OUTPUT_PATH"], "Risk_factor_ann_model.keras"))
                    least_val_loss = val_loss
                    least_val_loss_params = [u, dr, lr]
                    
                i += 1
                        
    print(least_val_loss_params)
    print(least_val_loss)

### 3.1 Tuning

In [None]:
tune_ann_model(df=df,
                units=[320],
                dropout_rates=[0.2],
                learning_rates=[0.002])

In [None]:
### 3.2 Manual tuning

In [None]:
model, _ = train_ann_model(
                        df, 
                        epochs=100, 
                        patience=10, 
                        units=320,
                        dropout_rate=0.2,
                        learning_rate=0.002,
                        verbose=2,
                        disable_save=True)

val_loss, val_acc = model.evaluate(x_test, test_labels)
print(f"Loss: {val_loss}; Accuracy: {val_acc};")