In [None]:
!git clone https://github.com/andreazenotto/mesotheliomaSemiSupervisedClassification.git

In [None]:
import os
from tqdm import tqdm
import sys
import shutil
import numpy as np
import tensorflow as tf

sys.path.append('mesotheliomaSemiSupervisedClassification/src')

from simclr import *
from attention_mil import *

## Thresholds finetuning and OOD detection

In [4]:
backbone_model_dir = '/kaggle/input/camel-backbone-model/tensorflow2/default/1/best_backbone.h5'
backbone = tf.keras.models.load_model(backbone_model_dir)

In [15]:
xpatches_dir = '/kaggle/input/xcamel-dataset'
features, labels = extract_features(xpatches_dir, backbone, batch_size=256)
dataset = generate_dataset(features, labels, num_classes=3, batch_size=1)

Processing ood: 100%|██████████| 6/6 [00:00<00:00, 189.80it/s]
Processing sarcomatoid: 100%|██████████| 2/2 [00:00<00:00, 252.50it/s]
Processing epithelioid: 100%|██████████| 2/2 [00:00<00:00, 124.95it/s]
Processing biphasic: 100%|██████████| 2/2 [00:00<00:00, 169.62it/s]
Extracting features: 100%|██████████| 12/12 [04:35<00:00, 22.98s/it]


In [6]:
attention_model_dir = '/kaggle/input/camel-mil-model/keras/default/1/best_mil.keras'
model = tf.keras.models.load_model(attention_model_dir)

In [7]:
def hashMapPredOOD(x):
    x.astype(np.int64)
    labels = ['epithelioid', 'sarcomatoid', 'biphasic', 'ood']
    return labels[x]

def evaluate_thresholds(Ts, Te, Tb, dataset):
    thresholds = {
        'sarcomatoid': Ts,
        'epithelioid': Te,
        'biphasic': Tb
    }

    correct_rejects = 0
    correct_accepts = 0
    total_ood = 0
    total_id = 0

    for x, y in dataset:
        pred = model.predict(x, verbose=0)[0]

        label_y = hashMapPredOOD(np.array(3) if np.all(y[0] == 0) else np.argmax(y[0]))

        if label_y == 'ood': 
            total_ood += 1
            predicted_label = np.argmin(pred)
            predicted_class = hashMapPredOOD(predicted_label)

            # Rejected if all class probabilities < threshold of predicted class
            if np.min(pred) < thresholds.get(predicted_class, 1.0):
                correct_rejects += 1

        else: 
            total_id += 1
            class_label = hashMapPredOOD(np.argmax(y[0]))
            if pred[np.argmax(y[0])] >= thresholds[class_label]:
                predicted_class = hashMapPredOOD(np.argmax(pred))
                if predicted_class == class_label:
                    correct_accepts += 1

    # Compute metrics
    tar = correct_accepts / total_id if total_id > 0 else 0  # True Accept Rate
    trr = correct_rejects / total_ood if total_ood > 0 else 0  # True Reject Rate

    return tar, trr

In [28]:
best_score = -np.inf
alpha = 0.7
history = []

for Ts in np.linspace(0.01, 0.1, 10): # We notice that Ts is critical for OOD detection 
    for Te in np.linspace(0.05, 0.2, 10): # Te can be higher that Ts
        for Tb in np.linspace(0.05, 0.1, 2): # we noticed that Tb does not influence the ood identification since it is always high
            tar, trr = evaluate_thresholds(Ts, Te, Tb, dataset)
            score = alpha * trr + (1 - alpha) * tar
            history.append(((Te, Ts, Tb), score))
            if score >= best_score:
                # print(f"Score: {score}, best triplets = {(Te, Ts, Tb)}")
                best_score = score
                best_triplet = (Te, Ts, Tb)

print(f"Best Thresholds: Te = {best_triplet[0]:.2f}, Ts = {best_triplet[1]:.2f}, Tb = {best_triplet[2]:.2f}")
print(f"Best Score (α = {alpha}): {best_score*100:.2f}%")

Best Thresholds: Te = 0.20, Ts = 0.10, Tb = 0.10
Best Score (α = 0.7): 90.00%


In [27]:
wrong = {
    'epithelioid': 0,
    'sarcomatoid': 0,
    'biphasic': 0,
    'ood': 0
}

def hashMapPred(x):
    x.astype(np.int64)
    labels = ['epithelioid', 'sarcomatoid', 'biphasic', 'ood']
    return labels[x]

correct = 0
total = 0

for x, y in dataset:
    pred = model.predict(x, verbose=0)[0]
    # label corrente: np.argmax(y[0]) return the index 0 in case of 1, 0, 0; the index 1 in case of 0, 1, 0 and so on
    label_y = hashMapPredOOD(np.array(3) if np.all(y[0] == 0) else np.argmax(y[0]))

    if np.min(pred) < best_triplet[np.argmin(pred)]:
        prediction = 'ood'
    else:
        prediction = hashMapPred(np.argmax(pred))
        
    if prediction != label_y:
        wrong[label_y] += 1
    if prediction == label_y:
        correct += 1
        # only for output format
        if label_y == 'ood':
            prediction = f"{prediction}\t"
            label_y = f"{label_y}\t"
        print(f"✅ Prediction = {prediction}\t label = {label_y}\t Probabilities = {pred}")
    else:
        print(f"❌ Prediction = {prediction}\t label = {label_y}\t Probabilities = {pred}")
    total += 1

print(f"\n----------------------\nTotal accuracy: {correct/total:.2%}\n----------------------\n")
print(f"Total miss-classifications per subtype = {wrong}")

✅ Prediction = ood		 label = ood		 Probabilities = [0.58288497 0.06646728 0.35064772]
✅ Prediction = epithelioid	 label = epithelioid	 Probabilities = [0.5352522  0.1802229  0.28452483]
✅ Prediction = ood		 label = ood		 Probabilities = [0.580088   0.04424533 0.37566665]
✅ Prediction = ood		 label = ood		 Probabilities = [0.34199208 0.0247878  0.63322014]
✅ Prediction = epithelioid	 label = epithelioid	 Probabilities = [0.38952363 0.3545914  0.25588495]
✅ Prediction = biphasic	 label = biphasic	 Probabilities = [0.26508182 0.3542476  0.38067052]
✅ Prediction = ood		 label = ood		 Probabilities = [0.44722882 0.02935561 0.5234156 ]
❌ Prediction = biphasic	 label = sarcomatoid	 Probabilities = [0.22811866 0.38188115 0.3900002 ]
✅ Prediction = biphasic	 label = biphasic	 Probabilities = [0.42026716 0.15324026 0.4264926 ]
✅ Prediction = ood		 label = ood		 Probabilities = [0.39846134 0.03297584 0.56856287]
❌ Prediction = epithelioid	 label = sarcomatoid	 Probabilities = [0.42846343 0.274691