In [None]:
import numpy as np
from keras import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, BatchNormalization, Activation

model = Sequential()
input_shape=(224, 224, 3)
num_classes=1

    # Layer 1
model.add(Conv2D(96, kernel_size=(11, 11), strides=4, input_shape=input_shape, padding='valid'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3, 3), strides=2))

    # Layer 2
model.add(Conv2D(256, kernel_size=(5, 5), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3, 3), strides=2))

    # Layer 3
model.add(Conv2D(384, kernel_size=(3, 3), padding='same'))
model.add(Activation('relu'))

    # Layer 4
model.add(Conv2D(384, kernel_size=(3, 3), padding='same'))
model.add(Activation('relu'))

    # Layer 5
model.add(Conv2D(256, kernel_size=(3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=2))

model.add(Flatten())

    # Fully Connected Layers
model.add(Dense(4096))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(4096))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(num_classes, activation='sigmoid'))

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

model.summary()


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import re
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tensorflow as tf

# === 1. Load labels CSV ===
labels = pd.read_csv('/content/drive/My Drive/case_grade_match.csv')  # Adjust path if needed

# === 2. Group patches by case number ===
def group_patches(patch_dir):
    case_patches = {}
    for filename in os.listdir(patch_dir):
        match = re.search(r"case_(\d+)", filename)
        if match:
            case_num = int(match.group(1))
            case_patches.setdefault(case_num, []).append(os.path.join(patch_dir, filename))
    return case_patches

image_folder_path = '/content/drive/My Drive/filtered_patches'
patches = group_patches(image_folder_path)

# === 3. Filter out class == 2 ===
case_nums = list(patches.keys())
dataset = labels.loc[[(int(x)-1) for x in case_nums]]  # Adjusting for 0-indexing?

filtered = dataset[dataset['Class'] != 2.0]
X = filtered['Case'].reset_index(drop=True)
y = filtered['Class'].reset_index(drop=True)

# === 4. Train-test-validation split ===
train_X, test_X, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=40)
train_X, val_X, y_train, y_val = train_test_split(train_X, y_train, test_size=0.2, stratify=y_train, random_state=40)

# === 5. Helper to create image-label pairs ===
def make_image_label_list(case_ids, patch_dict, labels_df):
    image_paths = []
    image_labels = []

    for case_num in case_ids:
        label_row = labels_df[labels_df['Case'] == case_num]
        if label_row.empty:
            continue
        label = label_row['Class'].values[0]
        label = 0 if label == 1 else 1  # Re-labeling logic: 1 -> 0 (benign), others -> 1 (high-grade CMIL)
        for path in patch_dict[int(case_num)]:
            image_paths.append(path)
            image_labels.append(label)

    return image_paths, image_labels

train_paths, train_labels = make_image_label_list(train_X, patches, labels)
val_paths, val_labels = make_image_label_list(val_X, patches, labels)
test_paths, test_labels = make_image_label_list(test_X, patches, labels)

# === 6. Function to create a tf.data.Dataset ===
def load_dataset(image_paths, labels, batch_size=32, shuffle=True):
    def preprocess(image_path, label):
        img = tf.io.read_file(image_path)
        img = tf.image.decode_png(img, channels=3)
        img = tf.image.resize(img, [256, 256])
        img = tf.image.central_crop(img, central_fraction=224/256)
        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.per_image_standardization(img)
        return img, label

    path_ds = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    ds = path_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    if shuffle:
        ds = ds.shuffle(buffer_size=len(image_paths))
    return ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

train_ds = load_dataset(train_paths, train_labels)
val_ds = load_dataset(val_paths, val_labels)
test_ds = load_dataset(test_paths, test_labels, shuffle=False)


In [None]:
from keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)


history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=5,
    callbacks=[early_stopping]
)

In [None]:
from sklearn.metrics import accuracy_score, recall_score, f1_score, confusion_matrix

y_true = np.concatenate([y.numpy() for x, y in test_ds], axis=0)

y_pred_probs = model.predict(test_ds)
y_pred = (y_pred_probs > 0.5).astype("int32").flatten()

accuracy = accuracy_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred)

accuracy
recall
f1
cm