In [17]:
import os
import sys
import time
import pathlib
import types

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import keras

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix, log_loss, jaccard_score, matthews_corrcoef, precision_recall_fscore_support, hamming_loss, cohen_kappa_score, roc_auc_score, log_loss

In [None]:
# preformance
global_metrics = [
    "Support", 
    
    "Accuracy",
    
    "binary",   #
    "macro",
    "micro",
    "weighted",
    
    "Log_Loss", #
    "Jaccard_Score",
    "Metrics_Matthews_Corrcoef",
    "Cohen_Kappa_Score",
    "Hamming_Loss",
]



def model_evalution(model_name: str, label_: np.ndarray, pred_: np.ndarray, metrics_: list[str]=None) -> dict:
    results = {}
    
    if pred_.ndim==2:
        pred_cat = pred_
        pred = np.argmax(pred_, axis=1)
        if label_.ndim==1:
            label_cat = to_categorical(label_)
            label = label_
        else:
            label_cat = label_
            label = np.argmax(label_, axis=1)
    else:
        label = label_
        pred = pred_
        
    metrics = metrics_ or global_metrics

    if "binary" in metrics:
        metrics.remove("binary")
        if metrics_:
            print("Warning: binary is removed from metrics, as it is only applicable to binary classification")
    if pred_.ndim==1:
        metrics.remove("Log_Loss")
        if metrics_:
            print("Warning: Log_Loss is removed from metrics, as it is only applicable if pred_ is 2D")
        
    if "Support" in metrics:
        results["Support"] = len(label)
    if "Accuracy" in metrics:
        results["Accuracy"] = accuracy_score(label, pred)
    if "binary" in metrics:
        p, r, f = precision_recall_fscore_support(label, pred, average="binary")
        results["Precision"] = p
        results["Recall"] = r
        results["F1_Score"] = f
    if "macro" in metrics:
        p, r, f = precision_recall_fscore_support(label, pred, average="macro")
        results["Precision-macro"] = p
        results["Recall-macro"] = r
        results["F1_Score-macro"] = f
    if "micro" in metrics:
        p, r, f = precision_recall_fscore_support(label, pred, average="micro")
        results["Precision-micro"] = p
        results["Recall-micro"] = r
        results["F1_Score-micro"] = f
    if "weighted" in metrics:
        p, r, f = precision_recall_fscore_support(label, pred, average="weighted")
        results["Precision-weighted"] = p
        results["Recall-weighted"] = r
        results["F1_Score-weighted"] = f
    if "Log_Loss" in metrics:
        results["Log_Loss"] = log_loss(label_cat, pred_cat)
    if "Jaccard_Score" in metrics:
        results["Jaccard_Score"] = jaccard_score(label, pred)
    if "Metrics_Matthews_Corrcoef" in metrics:
        results["Metrics_Matthews_Corrcoef"] = matthews_corrcoef(label, pred)
    if "Cohen_Kappa_Score" in metrics:
        results["Cohen_Kappa_Score"] = cohen_kappa_score(label, pred)
    if "Hamming_Loss" in metrics:
        results["Hamming_Loss"] = hamming_loss(label, pred)
    if "ROC_AUC_Score" in metrics:
        results["ROC_AUC_Score"] = roc_auc_score(label, pred)

    return results

def create_model_evalution_df(model_name: str, label_: np.ndarray,  pred_: np.ndarray, metrics_: list[str]=None) -> pd.DataFrame:
    results = model_evalution(model_name, label_, pred_)
    
