In [2]:
import numpy as np
from keras import Sequential
from keras.layers import Conv2D, GlobalAveragePooling2D, Dense, Flatten

# Init model

model = Sequential([
  Conv2D(8, kernel_size=(3, 3), activation='relu', input_shape=(224, 224, 3), padding='same'),
  GlobalAveragePooling2D(),
  Flatten(),
  Dense(1, activation='sigmoid'),
])

model.summary()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics = ['accuracy', 'precision', 'recall'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import os
import re
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tensorflow as tf

# === 1. Load labels CSV ===
labels = pd.read_csv('/content/drive/My Drive/case_grade_match.csv')  # Adjust path if needed

# === 2. Group patches by case number ===
def group_patches(patch_dir):
    case_patches = {}
    for filename in os.listdir(patch_dir):
        match = re.search(r"case_(\d+)", filename)
        if match:
            case_num = int(match.group(1))
            case_patches.setdefault(case_num, []).append(os.path.join(patch_dir, filename))
    return case_patches

image_folder_path = '/content/drive/My Drive/filtered_patches'
patches = group_patches(image_folder_path)

# === 3. Filter out class == 2 ===
case_nums = list(patches.keys())
dataset = labels.loc[[(int(x)-1) for x in case_nums]]  # Adjusting for 0-indexing?

filtered = dataset[dataset['Class'] != 2.0]
X = filtered['Case'].reset_index(drop=True)
y = filtered['Class'].reset_index(drop=True)

# === 4. Train-test-validation split ===
train_X, test_X, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=40)
train_X, val_X, y_train, y_val = train_test_split(train_X, y_train, test_size=0.2, stratify=y_train, random_state=40)

# === 5. Helper to create image-label pairs ===
def make_image_label_list(case_ids, patch_dict, labels_df):
    image_paths = []
    image_labels = []

    for case_num in case_ids:
        label_row = labels_df[labels_df['Case'] == case_num]
        if label_row.empty:
            continue
        label = label_row['Class'].values[0]
        label = 0 if label == 1 else 1  # Re-labeling logic: 1 -> 0 (benign), others -> 1 (high-grade CMIL)
        for path in patch_dict[int(case_num)]:
            image_paths.append(path)
            image_labels.append(label)

    return image_paths, image_labels

train_paths, train_labels = make_image_label_list(train_X, patches, labels)
val_paths, val_labels = make_image_label_list(val_X, patches, labels)
test_paths, test_labels = make_image_label_list(test_X, patches, labels)

# === 6. Function to create a tf.data.Dataset ===
def load_dataset(image_paths, labels, batch_size=32, shuffle=True):
    def preprocess(image_path, label):
        img = tf.io.read_file(image_path)
        img = tf.image.decode_png(img, channels=3)
        img = tf.image.resize(img, [256, 256])
        img = tf.image.central_crop(img, central_fraction=224/256)
        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.per_image_standardization(img)
        return img, label

    path_ds = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    ds = path_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    if shuffle:
        ds = ds.shuffle(buffer_size=len(image_paths))
    return ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

train_ds = load_dataset(train_paths, train_labels)
val_ds = load_dataset(val_paths, val_labels)
test_ds = load_dataset(test_paths, test_labels, shuffle=False)

In [5]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=3
)

Epoch 1/3
[1m284/284[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13832s[0m 18s/step - accuracy: 0.6055 - loss: 0.6550 - precision: 0.7264 - recall: 0.7031 - val_accuracy: 0.8139 - val_loss: 0.4955 - val_precision: 0.8139 - val_recall: 1.0000
Epoch 2/3
[1m284/284[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m306s[0m 651ms/step - accuracy: 0.7413 - loss: 0.5657 - precision: 0.7413 - recall: 1.0000 - val_accuracy: 0.8139 - val_loss: 0.4884 - val_precision: 0.8139 - val_recall: 1.0000
Epoch 3/3
[1m284/284[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 656ms/step - accuracy: 0.7414 - loss: 0.5663 - precision: 0.7414 - recall: 1.0000 - val_accuracy: 0.8137 - val_loss: 0.4850 - val_precision: 0.8138 - val_recall: 0.9998


In [6]:
loss, acc, prec, rec = model.evaluate(test_ds)
print(f"Loss: {loss}, Accuracy: {acc}, Precision: {prec}, Recall: {rec}")

[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2489s[0m 30s/step - accuracy: 0.7834 - loss: 0.5487 - precision: 0.7834 - recall: 1.0000
Loss: 0.5914034247398376, Accuracy: 0.7358490824699402, Precision: 0.7358490824699402, Recall: 1.0


In [9]:
from sklearn.metrics import accuracy_score, recall_score, f1_score, confusion_matrix

y_true = np.concatenate([y.numpy() for x, y in test_ds], axis=0)

y_pred_probs = model.predict(test_ds)
y_pred = (y_pred_probs > 0.5).astype("int32").flatten()

cm = confusion_matrix(y_true, y_pred)

cm

[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 369ms/step


array([[   0,  686],
       [   0, 1911]])