In [None]:
!pip install medmnist

In [None]:
# %% [markdown]
# # Training ResNet‑50 (28) on PathMNIST with TensorFlow and CUDA
#
# This notebook demonstrates how to download and preprocess the PathMNIST dataset, define a ResNet‑50 architecture adapted for 28×28 input images, train the model using GPU acceleration with CUDA, evaluate its performance on the test set, and visualize the results using a confusion matrix.

# %% [code]
# Imports and GPU Setup
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Uncomment the following line if medmnist is not installed:
# !pip install medmnist

import medmnist
from medmnist import PathMNIST
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, add, GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model

# Enable GPU memory growth for CUDA optimization
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    print("GPUs detected. Optimized for CUDA.")
else:
    print("No GPU found. Running on CPU.")

# %% [markdown]
# ## 1. Data Acquisition & Loading
# This section downloads the PathMNIST dataset, normalizes the images, converts them to 3 channels if needed, converts labels to one-hot encoding, and creates TensorFlow datasets for training, validation, and testing.
#
# Note: We set `num_classes = 9` as PathMNIST does not provide this attribute.

# %% [code]
# Download and load the PathMNIST dataset
train_dataset = PathMNIST(split='train', download=True)
val_dataset   = PathMNIST(split='val', download=True)
test_dataset  = PathMNIST(split='test', download=True)

# Extract images and labels
X_train, y_train = train_dataset.imgs, train_dataset.labels
X_val,   y_val   = val_dataset.imgs,   val_dataset.labels
X_test,  y_test  = test_dataset.imgs,  test_dataset.labels

# Normalize pixel values to [0, 1]
X_train = X_train.astype('float32') / 255.
X_val   = X_val.astype('float32')   / 255.
X_test  = X_test.astype('float32')  / 255.

# Ensure images have 3 channels (repeat channels if needed)
if X_train.ndim == 3 or X_train.shape[-1] != 3:
    X_train = np.repeat(X_train[..., np.newaxis], 3, axis=-1)
    X_val   = np.repeat(X_val[..., np.newaxis],   3, axis=-1)
    X_test  = np.repeat(X_test[..., np.newaxis],  3, axis=-1)

# Since PathMNIST does not have a num_classes attribute, we explicitly set it to 9.
num_classes = 9

# Convert labels to one-hot encoding
y_train = to_categorical(y_train, num_classes)
y_val   = to_categorical(y_val, num_classes)
y_test  = to_categorical(y_test, num_classes)

print("Training data:", X_train.shape, y_train.shape)
print("Validation data:", X_val.shape, y_val.shape)
print("Test data:", X_test.shape, y_test.shape)

# Create tf.data.Dataset objects for efficient loading
batch_size = 64
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
val_ds   = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_ds  = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# %% [markdown]
# ## 2. Model Definition – ResNet‑50 (28)
# In this section, we define a ResNet‑50 model adapted for 28×28 images. Because of the small input size, the initial convolution uses a 3×3 kernel with stride 1 and no max pooling.
#
# We define two building blocks:
# - **identity_block:** Uses three convolution layers where the input is added back to the output.
# - **conv_block:** Similar to the identity block but includes a convolution on the shortcut to match dimensions.
#
# These blocks are then stacked to create the ResNet‑50 (28) architecture.

# %% [code]
# Define an identity block
def identity_block(input_tensor, filters, kernel_size=3):
    filters1, filters2, filters3 = filters
    x = Conv2D(filters1, (1,1), kernel_initializer='he_normal')(input_tensor)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters2, (kernel_size, kernel_size), padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters3, (1,1), kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    
    x = add([x, input_tensor])
    x = Activation('relu')(x)
    return x

# Define a convolutional block with a shortcut path
def conv_block(input_tensor, filters, kernel_size=3, strides=(2,2)):
    filters1, filters2, filters3 = filters
    x = Conv2D(filters1, (1,1), strides=strides, kernel_initializer='he_normal')(input_tensor)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters2, (kernel_size, kernel_size), padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters3, (1,1), kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    
    shortcut = Conv2D(filters3, (1,1), strides=strides, kernel_initializer='he_normal')(input_tensor)
    shortcut = BatchNormalization()(shortcut)
    
    x = add([x, shortcut])
    x = Activation('relu')(x)
    return x

# Assemble the ResNet-50 (28) model
def ResNet50_28(input_shape=(28,28,3), num_classes=9):
    img_input = Input(shape=input_shape)
    
    # Initial convolutional layer with a smaller kernel and stride for 28x28 inputs
    x = Conv2D(64, (3,3), strides=(1,1), padding='same', kernel_initializer='he_normal')(img_input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    # Note: No max pooling is used because the input image is small
    
    # Stage 2
    x = conv_block(x, filters=[64, 64, 256], strides=(1,1))  # Using stride 1 here to maintain dimensions
    x = identity_block(x, filters=[64, 64, 256])
    x = identity_block(x, filters=[64, 64, 256])
    
    # Stage 3
    x = conv_block(x, filters=[128, 128, 512], strides=(2,2))
    x = identity_block(x, filters=[128, 128, 512])
    x = identity_block(x, filters=[128, 128, 512])
    x = identity_block(x, filters=[128, 128, 512])
    
    # Stage 4
    x = conv_block(x, filters=[256, 256, 1024], strides=(2,2))
    for _ in range(5):
        x = identity_block(x, filters=[256, 256, 1024])
    
    # Stage 5
    x = conv_block(x, filters=[512, 512, 2048], strides=(2,2))
    x = identity_block(x, filters=[512, 512, 2048])
    x = identity_block(x, filters=[512, 512, 2048])
    
    x = GlobalAveragePooling2D()(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=img_input, outputs=outputs)
    return model

# Instantiate and summarize the model
model = ResNet50_28(input_shape=(28,28,3), num_classes=num_classes)
model.summary()

# %% [markdown]
# ## 3. Training
# We compile the model with the Adam optimizer and categorical crossentropy loss, and then train it for 10 epochs while monitoring performance on the validation dataset.

# %% [code]
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
epochs = 10
history = model.fit(train_ds,
                    epochs=epochs,
                    validation_data=val_ds)

# %% [markdown]
# ## 4. Evaluation
# After training, we evaluate the model’s performance on the test dataset.

# %% [code]
test_loss, test_acc = model.evaluate(test_ds)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")

# %% [markdown]
# ## 5. Confusion Matrix
# In this final section, we generate predictions on the test set, compute the confusion matrix using scikit-learn, and visualize it with a heatmap.

# %% [code]
from sklearn.metrics import confusion_matrix

# Retrieve true labels and predicted probabilities from the test dataset
y_true = np.concatenate([y for x, y in test_ds], axis=0)
y_true_labels = np.argmax(y_true, axis=1)
y_pred_probs  = model.predict(test_ds)
y_pred_labels = np.argmax(y_pred_probs, axis=1)

# Compute the confusion matrix
cm = confusion_matrix(y_true_labels, y_pred_labels)

# Plot the confusion matrix
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt="d", cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

No GPU found. Running on CPU.
Training data: (89996, 28, 28, 3) (89996, 9)
Validation data: (10004, 28, 28, 3) (10004, 9)
Test data: (7180, 28, 28, 3) (7180, 9)


Epoch 1/10
[1m 189/1407[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m27:48[0m 1s/step - accuracy: 0.4866 - loss: 1.8257

In [None]:
# %% [code]
# Imports and GPU Setup
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

from medmnist import PathMNIST
from sklearn.metrics import confusion_matrix, classification_report

# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("GPUs detected. Optimized for CUDA.")
else:
    print("No GPU found. Running on CPU.")

# Define a top-level function to ensure images have 3 channels.
from PIL import Image
import numpy as np

def ensure_three_channels(img):
    # If it's a PIL Image, convert to RGB
    if isinstance(img, Image.Image):
        return img.convert("RGB")
    # Otherwise assume it's a NumPy array
    if img.ndim == 2:
        # Grayscale image: add channel axis and repeat to create 3 channels
        return np.stack([img]*3, axis=-1)
    elif img.ndim == 3 and img.shape[-1] == 1:
        # Single channel image stored as (H, W, 1)
        return np.repeat(img, 3, axis=-1)
    return img

# Update the transform pipeline to use the named function instead of a lambda.
transform = transforms.Compose([
    transforms.Lambda(ensure_three_channels),
    transforms.ToTensor()  # This converts the numpy array to a tensor and scales pixels to [0,1]
])

# %% [code]
# 1. Data Acquisition & Loading
# Define a transform that ensures images have 3 channels and are normalized.
# transform = transforms.Compose([
#     # If the image is 2D, add a channel dimension and repeat to get 3 channels.
#     transforms.Lambda(lambda img: img if img.ndim == 3 else np.repeat(img[..., None], 3, axis=-1)),
#     transforms.ToTensor()  # Converts to tensor and scales pixel values to [0, 1]
# ])

# Download and load the PathMNIST dataset
train_dataset = PathMNIST(split='train', transform=transform, download=True)
val_dataset   = PathMNIST(split='val',   transform=transform, download=True)
test_dataset  = PathMNIST(split='test',  transform=transform, download=True)

# PathMNIST labels are returned as integers.
num_classes = 9

print("Training data samples:", len(train_dataset))
print("Validation data samples:", len(val_dataset))
print("Test data samples:", len(test_dataset))

# Create DataLoaders for efficient batching
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False, num_workers=0)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False, num_workers=0)



# %% [code]
# 2. Model Definition – ResNet‑50 (28)
# Define the basic building blocks: IdentityBlock and ConvBlock

class IdentityBlock(nn.Module):
    def __init__(self, in_channels, filters, kernel_size=3):
        """
        An identity block with three conv layers.
        :param in_channels: Number of input channels.
        :param filters: A list/tuple of three integers, specifying the number of filters for each conv.
        :param kernel_size: Size of the middle convolution's kernel.
        """
        super(IdentityBlock, self).__init__()
        f1, f2, f3 = filters
        self.conv1 = nn.Conv2d(in_channels, f1, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(f1)
        
        self.conv2 = nn.Conv2d(f1, f2, kernel_size=kernel_size, padding=kernel_size//2, bias=False)
        self.bn2 = nn.BatchNorm2d(f2)
        
        self.conv3 = nn.Conv2d(f2, f3, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(f3)
        
        self.relu = nn.ReLU(inplace=True)
    
    def forward(self, x):
        identity = x
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        
        out = self.conv3(out)
        out = self.bn3(out)
        
        out += identity
        out = self.relu(out)
        return out

class ConvBlock(nn.Module):
    def __init__(self, in_channels, filters, kernel_size=3, stride=2):
        """
        A convolutional block that uses a convolution on the shortcut path.
        :param in_channels: Number of input channels.
        :param filters: A list/tuple of three integers, specifying the number of filters.
        :param kernel_size: Kernel size for the middle convolution.
        :param stride: Stride for the first conv and shortcut conv.
        """
        super(ConvBlock, self).__init__()
        f1, f2, f3 = filters
        self.conv1 = nn.Conv2d(in_channels, f1, kernel_size=1, stride=stride, bias=False)
        self.bn1 = nn.BatchNorm2d(f1)
        
        self.conv2 = nn.Conv2d(f1, f2, kernel_size=kernel_size, padding=kernel_size//2, bias=False)
        self.bn2 = nn.BatchNorm2d(f2)
        
        self.conv3 = nn.Conv2d(f2, f3, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(f3)
        
        self.shortcut = nn.Sequential(
            nn.Conv2d(in_channels, f3, kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(f3)
        )
        self.relu = nn.ReLU(inplace=True)
    
    def forward(self, x):
        shortcut = self.shortcut(x)
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        
        out = self.conv3(out)
        out = self.bn3(out)
        
        out += shortcut
        out = self.relu(out)
        return out

# Assemble the ResNet‑50 (28) model using the blocks defined above.
# Note: For 28x28 images, the initial conv uses a 3x3 kernel with stride 1 (and no max pooling).

class ResNet50_28(nn.Module):
    def __init__(self, num_classes=9):
        super(ResNet50_28, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        
        # Stage 2: conv block (stride 1) + 2 identity blocks
        self.layer2 = nn.Sequential(
            ConvBlock(64, [64, 64, 256], kernel_size=3, stride=1),
            IdentityBlock(256, [64, 64, 256], kernel_size=3),
            IdentityBlock(256, [64, 64, 256], kernel_size=3)
        )
        # Stage 3: conv block (stride 2) + 3 identity blocks
        self.layer3 = nn.Sequential(
            ConvBlock(256, [128, 128, 512], kernel_size=3, stride=2),
            IdentityBlock(512, [128, 128, 512], kernel_size=3),
            IdentityBlock(512, [128, 128, 512], kernel_size=3),
            IdentityBlock(512, [128, 128, 512], kernel_size=3)
        )
        # Stage 4: conv block (stride 2) + 5 identity blocks
        self.layer4 = nn.Sequential(
            ConvBlock(512, [256, 256, 1024], kernel_size=3, stride=2),
            IdentityBlock(1024, [256, 256, 1024], kernel_size=3),
            IdentityBlock(1024, [256, 256, 1024], kernel_size=3),
            IdentityBlock(1024, [256, 256, 1024], kernel_size=3),
            IdentityBlock(1024, [256, 256, 1024], kernel_size=3),
            IdentityBlock(1024, [256, 256, 1024], kernel_size=3)
        )
        # Stage 5: conv block (stride 2) + 2 identity blocks
        self.layer5 = nn.Sequential(
            ConvBlock(1024, [512, 512, 2048], kernel_size=3, stride=2),
            IdentityBlock(2048, [512, 512, 2048], kernel_size=3),
            IdentityBlock(2048, [512, 512, 2048], kernel_size=3)
        )
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(2048, num_classes)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

# Instantiate the model and print its structure.
model = ResNet50_28(num_classes=num_classes).to(device)
print(model)

# %% [code]
# 3. Training
criterion = nn.CrossEntropyLoss()  # Note: expects raw logits and integer labels
optimizer = optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images = images.to(device)
        # If labels are shaped (batch, 1), squeeze to (batch,)
        labels = labels.squeeze().to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.squeeze().to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_loss /= val_total
    val_acc = val_correct / val_total
    
    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f}, "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

# %% [code]
# 4. Evaluation on the Test Set
model.eval()
test_loss = 0.0
test_correct = 0
test_total = 0
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.squeeze().to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
test_loss /= test_total
test_acc = test_correct / test_total
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")

# %% [code]
# 5. Confusion Matrix
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Optionally, print a classification report
print(classification_report(all_labels, all_preds, target_names=[f"Class {i}" for i in range(num_classes)]))

No GPU found. Running on CPU.
Training data samples: 89996
Validation data samples: 10004
Test data samples: 7180
ResNet50_28(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer2): Sequential(
    (0): ConvBlock(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), b

AttributeError: 'Image' object has no attribute 'ndim'