In [None]:
!git clone https://github.com/andreazenotto/mesotheliomaSemiSupervisedClassification.git

In [None]:
import os
from tqdm import tqdm
import sys
import shutil
import numpy as np
import tensorflow as tf

from tensorflow.keras.applications.resnet50 import (
    ResNet50,
    preprocess_input,
)

sys.path.append('mesotheliomaSemiSupervisedClassification/src')

from simclr import *
from attention_mil import *

In [3]:
patches_dir = "/kaggle/input/camel-dataset"

## Training

In [None]:
def get_patch_dataset(patches_dir):
    images = []
    labels = []

    mapping = {
        "epithelioid": [1, 0, 0],
        "sarcomatoid": [0, 1, 0],
        "biphasic": [0, 0, 1]
    }

    for class_dir in os.listdir(patches_dir):
        class_path = os.path.join(patches_dir, class_dir)
        class_name = class_dir.split('_')[1].lower()
        if os.path.isdir(class_path):
            for wsi_dir in os.listdir(class_path):
                wsi_path = os.path.join(class_path, wsi_dir)
                if os.path.isdir(wsi_path):
                    for img_name in os.listdir(wsi_path):
                        img_path = os.path.join(wsi_path, img_name)
                        images.append(img_path)
                        labels.append(mapping[class_name])

    return images, labels


def build_dataset(image_paths, labels, batch_size=128, shuffle=True):
    path_ds = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    ds = path_ds.map(lambda x, y: (load_and_augment(x), y), num_parallel_calls=tf.data.AUTOTUNE)
    if shuffle:
        ds = ds.shuffle(buffer_size=1000)
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds


def build_model():
    backbone = ResNet50(include_top=False, weights='imagenet', pooling='avg')
    
    classifier = tf.keras.models.Sequential([
        tf.keras.layers.Input(shape=(2048,)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(3, activation='softmax')
    ])
    
    inputs = tf.keras.layers.Input(shape=(224, 224, 3))
    features = backbone(inputs, training=False)
    outputs = classifier(features)
    model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
    return model

In [5]:
image_paths, labels = get_patch_dataset(patches_dir)
train_ds = build_dataset(image_paths, labels)

model = build_model()

model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-4, weight_decay=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.fit(train_ds, epochs=10)

model.save("resnet_classifier.keras")

I0000 00:00:1752496800.955263      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1752496800.955966      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/10


I0000 00:00:1752496868.825953      71 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m854/854[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2143s[0m 2s/step - accuracy: 0.9956 - loss: 0.0359
Epoch 2/10
[1m854/854[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2091s[0m 2s/step - accuracy: 0.9475 - loss: 0.3666
Epoch 3/10
[1m854/854[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2079s[0m 2s/step - accuracy: 0.9365 - loss: 0.3536
Epoch 4/10
[1m854/854[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2077s[0m 2s/step - accuracy: 0.9020 - loss: 0.3497
Epoch 5/10
[1m854/854[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2080s[0m 2s/step - accuracy: 0.8947 - loss: 0.4110
Epoch 6/10
[1m854/854[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2079s[0m 2s/step - accuracy: 0.8675 - loss: 0.4361
Epoch 7/10
[1m854/854[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2073s[0m 2s/step - accuracy: 0.9244 - loss: 0.3883
Epoch 8/10
[1m854/854[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2069s[0m 2s/step - accuracy: 0.9049 - loss: 0.3752
Epoch 9/10
[1m854/854[0m [32m━━━

## Inference

In [None]:
def classify_wsi(patches_dir, model_dir=None, batch_size=256):
    if model_dir:
        model = tf.keras.models.load_model(model_dir)
    else:
        model = build_model()
    wsi_list, labels = get_images(patches_dir)
    y_pred = []
    y_score = []
    y_true = []

    strategy = tf.distribute.MirroredStrategy()

    with strategy.scope():
        @tf.function
        def predict_step(batch):
            return model(batch, training=False)

        for wsi_images, label in tqdm(zip(wsi_list, labels), total=len(wsi_list), desc="Classifying WSIs"):
            preds_list = []

            path_ds = tf.data.Dataset.from_tensor_slices(wsi_images)
            image_ds = path_ds.map(load_and_augment, num_parallel_calls=tf.data.AUTOTUNE)
            image_ds = image_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

            dist_ds = strategy.experimental_distribute_dataset(image_ds)

            for dist_batch in dist_ds:
                per_replica_preds = strategy.run(predict_step, args=(dist_batch,))
                batch_preds = tf.concat(strategy.gather(per_replica_preds, axis=0), axis=0)
                preds_list.extend(batch_preds.numpy())

            preds_array = np.array(preds_list)

            avg_score = np.mean(preds_array, axis=0)
            pred_class = np.argmax(avg_score)
            true_class = np.argmax(label)
            
            y_pred.append(pred_class)
            y_true.append(true_class)

    return np.array(y_pred), np.array(y_true)

In [7]:
test_patches_dir = "/kaggle/input/camel-inference-dataset"
model_dir = "resnet_classifier.keras"

y_pred, y_true = classify_wsi(test_patches_dir, model_dir)

correct = np.sum(y_pred == y_true)

print(f"Accuracy: {correct/len(y_pred):.2%}")

Processing epithelioid: 100%|██████████| 4/4 [00:00<00:00,  8.68it/s]
Processing biphasic: 100%|██████████| 4/4 [00:00<00:00, 14.74it/s]
Classifying WSIs:   0%|          | 0/8 [00:00<?, ?it/s]I0000 00:00:1752517651.355594      70 cuda_dnn.cc:529] Loaded cuDNN version 90300
Classifying WSIs: 100%|██████████| 8/8 [04:11<00:00, 31.46s/it]

Accuracy: 50.00%



