In [1]:
import os
import sys
PROJECT_PATH = ".."
SOURCE_PATH = os.path.join(
    PROJECT_PATH, "bayes-conf-mat"
)
sys.path.append(SOURCE_PATH)

In [2]:
import numpy as np
import sklearn
import sklearn.metrics

import aggregation
import metrics.binary
import metrics.multiclass
from conf_mat_io import load_preds_file, pred_target_to_confusion_matrix
from confusion_matrix import BayesianConfusionMatrix

_, _, files = next(os.walk("./confusion_matrices/"))

for fp in files:
    failed_tests = 0
    
    pred_target = load_preds_file(fp="./confusion_matrices/" + fp, structure="pred_target")
    confusion_matrix = pred_target_to_confusion_matrix(pred_target)

    print(f"\n\n>>> {fp} <<<\n\n")

    bayes_conf_mat = BayesianConfusionMatrix(confusion_matrix)
    pseudo_conf_mat_samples = bayes_conf_mat._use_input_as_sample()

    sklearn_confusion_matrix = sklearn.metrics.confusion_matrix(
        y_true=pred_target[:, 1],
        y_pred=pred_target[:, 0],
        normalize="all",
    )

    all_close = np.allclose(sklearn_confusion_matrix, pseudo_conf_mat_samples.norm_confusion_matrix[0])
    if not all_close:
        failed_tests += 1
        print(
            f"Confusion Matrix:\n{sklearn_confusion_matrix:.6f}\n{pseudo_conf_mat_samples.norm_confusion_matrix[0]:.6f}"
        )

    # Multiclass Metrics ===========================================================
    sklearn_acc = sklearn.metrics.accuracy_score(
        y_true=pred_target[:, 1],
        y_pred=pred_target[:, 0],
    )

    bcm_acc = metrics.multiclass.compute_accuracy(pseudo_conf_mat_samples)[0]
    
    all_close = np.allclose(sklearn_acc, bcm_acc)
    if not all_close:
        failed_tests += 1
        print(
            f"Acc: {sklearn_acc:.6f}, {bcm_acc:.6f}"
        )

    sklearn_ba = sklearn.metrics.balanced_accuracy_score(
        y_true=pred_target[:, 1],
        y_pred=pred_target[:, 0],
    )

    bcm_ba = metrics.multiclass.compute_balanced_accuracy(
        pseudo_conf_mat_samples
    )[0]
    
    all_close = np.allclose(sklearn_ba, bcm_ba)
    if not all_close:
        failed_tests += 1
        print(
            f"BA: {sklearn_ba:.6f}, {bcm_ba:.6f}"
        )

    sklearn_ba_adj = sklearn.metrics.balanced_accuracy_score(
        y_true=pred_target[:, 1],
        y_pred=pred_target[:, 0],
        adjusted=True,
    )

    bcm_ba_adj = metrics.multiclass.compute_adjusted_balanced_accuracy(
        pseudo_conf_mat_samples
    )[0]
    
    all_close = np.allclose(sklearn_ba_adj, bcm_ba_adj)
    if not all_close:
        failed_tests += 1
        print(
            f"BA Adj.: {sklearn_ba_adj:.6f}, {bcm_ba_adj:.6f}"
        )

    sklearn_kappa = sklearn.metrics.cohen_kappa_score(
        y1=pred_target[:, 1],
        y2=pred_target[:, 0],
    )

    bcm_kappa = metrics.multiclass.compute_cohens_kappa(pseudo_conf_mat_samples)[
        0
    ]
    
    all_close = np.allclose(sklearn_kappa, bcm_kappa)
    if not all_close:
        failed_tests += 1
        print(
            f"Cohen's Kappa: {sklearn_kappa:.6f}, {bcm_kappa:.6f}"
        )

    sklearn_mcc = sklearn.metrics.matthews_corrcoef(
        y_true=pred_target[:, 1],
        y_pred=pred_target[:, 0],
    )

    bcm_mcc = metrics.multiclass.compute_mcc(pseudo_conf_mat_samples)[0]
    
    all_close = np.allclose(sklearn_mcc, bcm_mcc)
    if not all_close:
        failed_tests += 1
        print(
            f"MCC: {sklearn_mcc:.6f}, {bcm_mcc:.6f}"
        )

    # Binary metrics ===============================================================
    sklearn_precision = sklearn.metrics.precision_score(
        y_true=pred_target[:, 1],
        y_pred=pred_target[:, 0],
        average=None,
    )

    bcm_precision = np.nan_to_num(pseudo_conf_mat_samples.positive_predictive_value[0], nan=0)
    
    all_close = np.allclose(sklearn_precision, bcm_precision)
    if not all_close:
        failed_tests += 1
        print(
            f"Precision: {sklearn_precision}, {bcm_precision}"
        )

    sklearn_recall = sklearn.metrics.recall_score(
        y_true=pred_target[:, 1],
        y_pred=pred_target[:, 0],
        average=None,
    )

    bcm_recall = pseudo_conf_mat_samples.true_positive_rate[0]
    
    all_close = np.allclose(sklearn_recall, bcm_recall)
    if not all_close:
        failed_tests += 1
        print(
            f"Recall: {sklearn_recall}, {bcm_recall}"
        )

    sklearn_f1 = sklearn.metrics.f1_score(
        y_true=pred_target[:, 1],
        y_pred=pred_target[:, 0],
        average=None,
    )

    bcm_f1 = metrics.binary.compute_f1(pseudo_conf_mat_samples)[0]
    
    all_close = np.allclose(sklearn_f1, bcm_f1)
    if not all_close:
        failed_tests += 1
        print(
            f"F1: {sklearn_f1}, {bcm_f1}"
        )

    sklearn_f1_macro = sklearn.metrics.f1_score(
        y_true=pred_target[:, 1],
        y_pred=pred_target[:, 0],
        average='macro',
    )

    bcm_f1_macro = aggregation.numpy_batched_arithmetic_mean(
        metrics.binary.compute_f1(pseudo_conf_mat_samples)[0],
    )[0]
    
    all_close = np.allclose(sklearn_f1_macro, bcm_f1_macro)
    if not all_close:
        failed_tests += 1
        print(
            f"F1 Macro: {sklearn_f1_macro:.6f}, {bcm_f1_macro:.6f}"
        )

    sklearn_f1_weighted = sklearn.metrics.f1_score(
        y_true=pred_target[:, 1],
        y_pred=pred_target[:, 0],
        average='weighted',
    )

    bcm_f1_weighted = aggregation.numpy_batched_convex_combination(
        metrics.binary.compute_f1(pseudo_conf_mat_samples)[0],
        pseudo_conf_mat_samples.diag_mass / pseudo_conf_mat_samples.diag_mass.sum(),
    )[0]

    bcm_f1_weighted = aggregation.numpy_batched_convex_combination(
        metrics.binary.compute_f1(pseudo_conf_mat_samples)[0],
        pseudo_conf_mat_samples.p_condition,
    )[0]
    
    all_close = np.allclose(sklearn_f1_weighted, bcm_f1_weighted)
    if not all_close:
        failed_tests += 1
        print(
            f"F1 Weighted: {np.mean(sklearn_f1_weighted):.6f}, {np.mean(bcm_f1_weighted):.6f}"
        )

    sklearn_f3 = sklearn.metrics.fbeta_score(
        y_true=pred_target[:, 1],
        y_pred=pred_target[:, 0],
        beta=3,
        average=None,
    )

    bcm_f3 = metrics.binary.compute_fbeta(pseudo_conf_mat_samples, beta=3)[0]
    
    all_close = np.allclose(sklearn_f3, bcm_f3)
    if not all_close:
        failed_tests += 1
        print(
            f"F3: {sklearn_f3}, {bcm_f3}"
        )

    sklearn_jaccard = sklearn.metrics.jaccard_score(
        y_true=pred_target[:, 1],
        y_pred=pred_target[:, 0],
        average=None,
    )

    bcm_jaccard = metrics.binary.compute_jaccard_index(pseudo_conf_mat_samples)[0]
    
    all_close = np.allclose(sklearn_jaccard, bcm_jaccard)
    if not all_close:
        failed_tests += 1
        print(
            f"Jaccard: {sklearn_jaccard}, {bcm_jaccard}"
        )

    if failed_tests > 0:
        print(f"Failed {failed_tests} tests")
    else:
        print(f"Passed all tests")




>>> sklearn_face_classification.csv <<<


Passed all tests


>>> sklearn_text_documents.csv <<<


Passed all tests


>>> sklearn_2.csv <<<


Passed all tests


>>> sklearn_1.csv <<<


Passed all tests


  self.norm_confusion_matrix / self.p_pred[:, np.newaxis, :]
  _warn_prf(average, modifier, msg_start, len(result))


In [17]:
pred_target = load_preds_file(fp="./confusion_matrices/" + files[-1], structure="pred_target")
confusion_matrix = pred_target_to_confusion_matrix(pred_target)

In [20]:
confusion_matrix[1, 2] = 0

In [27]:
confusion_matrix = np.array(
    [
        [100, 0, 0],
        [0, 0, 0],
        [10, 0, 200],
    ]
)

In [28]:
bayes_conf_mat = BayesianConfusionMatrix(confusion_matrix)

rng = np.random.default_rng(seed=942)
num_samples = 10000

samples = bayes_conf_mat.sample_posterior(rng=rng, num_samples=num_samples)