In [None]:
import os
from tqdm import tqdm
import sys
import shutil
import numpy as np
import tensorflow as tf

tf.keras.utils.set_random_seed(10)
tf.config.experimental.enable_op_determinism()

!git clone https://github.com/andreazenotto/tempProjectRepo.git

sys.path.append('tempProjectRepo/mesothelioma_project/src')

from simclr import *
from attention_mil import *

# Inference
We evaluate the performances of our proposal model and a standard CNN (the same as our backbone)

## camel-inference-dataset
This dataset is made of 8 WSI: 4 epithelioid and 4 biphasic, used for testing purpouse

### Our Proposal

In [None]:
test_patches_dir = "/kaggle/input/camel-inference-dataset"
backbone_weights_dir = '/kaggle/input/camel-backbone-weights/tensorflow2/t05/1/best_backbone.weights.h5'

In [None]:
backbone = ResNet50(include_top=False, weights=None, pooling="avg")
backbone.load_weights(backbone_weights_dir)

In [None]:
featuresTest, labelsTest = extract_features(test_patches_dir, backbone, batch_size=256)
datasetTest = generate_dataset(featuresTest, labelsTest, num_classes=3, batch_size=1)

Processing epithelioid: 100%|██████████| 4/4 [00:00<00:00,  9.35it/s]
Processing biphasic: 100%|██████████| 4/4 [00:00<00:00, 14.27it/s]
Extracting features:   0%|          | 0/8 [00:00<?, ?it/s]I0000 00:00:1751804015.576505     109 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1751804015.576576     106 cuda_dnn.cc:529] Loaded cuDNN version 90300
Extracting features: 100%|██████████| 8/8 [04:04<00:00, 30.60s/it]


In [None]:
attention_model_dir = "/kaggle/input/camel-mil-weights/tensorflow2/default/1/best_attention_mil.h5"

mhaMILTest = tf.keras.models.load_model(attention_model_dir)

In [31]:
wrong = {
    'epithelioid': 0,
    'sarcomatoid': 0,
    'biphasic': 0
}

def hashMapPred(x):
    x.astype(np.int64)
    labels = ['epithelioid', 'sarcomatoid', 'biphasic']
    return labels[x]

correct = 0
total = 0

for x, y in datasetTest:
    pred = mhaMILTest.predict(x, verbose=0)[0]
    # label corrente: np.argmax(y[0]) return the index 0 in case of 1, 0, 0; the index 1 in case of 0, 1, 0 and so on
    label_y = hashMapPred(np.argmax(y[0]))

    prediction = hashMapPred(np.argmax(pred))
        
    if prediction != label_y:
        wrong[label_y] += 1
    if prediction == label_y:
        correct += 1
        print(f"✅ Prediction = {prediction}\t label = {label_y}\t Probabilities = {pred}")
    else:
        print(f"❌ Prediction = {prediction}\t label = {label_y}\t Probabilities = {pred}")
    total += 1

print(f"\n-------------------\nTotal accuracy: {correct/total:.2%}\n-------------------\n")
print(f"Total miss-classifications per subtype = {wrong}")

❌ Prediction = biphasic	 label = epithelioid	 Probabilities = [0.23667729 0.28411353 0.4792092 ]
✅ Prediction = epithelioid	 label = epithelioid	 Probabilities = [0.4369059  0.1610397  0.40205443]
✅ Prediction = epithelioid	 label = epithelioid	 Probabilities = [0.5301123  0.29685363 0.17303403]
✅ Prediction = biphasic	 label = biphasic	 Probabilities = [0.33682933 0.32479516 0.3383755 ]
✅ Prediction = biphasic	 label = biphasic	 Probabilities = [0.3959599  0.20552082 0.39851925]
✅ Prediction = biphasic	 label = biphasic	 Probabilities = [0.3405206  0.19309033 0.46638906]
✅ Prediction = biphasic	 label = biphasic	 Probabilities = [0.28208977 0.32840988 0.38950038]
✅ Prediction = epithelioid	 label = epithelioid	 Probabilities = [0.5088232  0.20911068 0.2820661 ]

-------------------
Total accuracy: 87.50%
-------------------

Total miss-classifications per subtype = {'epithelioid': 1, 'sarcomatoid': 0, 'biphasic': 0}


### Standard CNN
- ResNet50 pre-trained on Imagenet, final_pred = mean over patches pred

In [None]:
standardCNN = ResNet50(include_top=False, weights='imagenet', pooling="avg")

In [None]:
featuresCNN, labelsCNN = extract_features(test_patches_dir, standardCNN, batch_size=256)
datasetCNN = generate_dataset(featuresCNN, labelsCNN, num_classes=3, batch_size=1)

In [None]:
standardClassifier = tf.keras.Sequential([
    tf.keras.Input(shape=(2048,)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')
])

In [None]:
def classify_wsi(wsi_features, classifier_model):
    preds = classifier_model.predict(wsi_features, verbose=0) 
    return np.mean(preds, axis=0)

In [19]:
correct = 0
total = 0
for x, y in datasetCNN:
    x = tf.squeeze(x)
    pred = classify_wsi(x, standardClassifier)
    pred_label = np.argmax(pred)
    true_label = np.argmax(y)
    if pred_label == true_label:
        correct += 1
    total += 1

accuracy = correct / total
print(f"WSI classification accuracy = {accuracy:.2%}")

WSI classification accuracy = 50.00%
