In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import tensorflow_datasets as tfds
import numpy as np
from torch.optim.lr_scheduler import CosineAnnealingLR

# Custom Dataset to convert TensorFlow dataset to PyTorch
class PatchCamelyonDataset(Dataset):
    def __init__(self, tfds_dataset, transform=None):
        self.dataset = tfds_dataset
        self.transform = transform
        # Convert to list for indexing
        self.data = list(tfds.as_numpy(self.dataset))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        image = item['image'].astype(np.float32) / 255.0  # Normalize to [0,1]
        label = item['label'].astype(np.float32)  # Float for BCEWithLogitsLoss
        # Transpose image to PyTorch format (C, H, W)
        image = np.transpose(image, (2, 0, 1))
        image = torch.from_numpy(image)  # Convert to tensor
        if self.transform:
            image = self.transform(image)
        return image, label

# Debug data loading
def debug_dataset(dataset, num_samples=5):
    for i in range(num_samples):
        image, label = dataset[i]
        print(f"Sample {i}: Image shape: {image.shape}, Label: {label}, Image min: {image.min()}, Image max: {image.max()}")

# Define the Vision Transformer components
class PatchEmbedding(nn.Module):
    def __init__(self, img_size, patch_size, in_channels, embed_dim):
        super(PatchEmbedding, self).__init__()
        self.img_size = img_size
        self.patch_size = patch_size
        self.n_patches = (img_size // patch_size) ** 2
        self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)

    def forward(self, x):
        x = self.proj(x)  # (B, embed_dim, H/patch_size, W/patch_size)
        x = x.flatten(2)  # (B, embed_dim, n_patches)
        x = x.transpose(1, 2)  # (B, n_patches, embed_dim)
        return x

class PositionalEncoding(nn.Module):
    def __init__(self, n_patches, embed_dim):
        super(PositionalEncoding, self).__init__()
        self.pos_embed = nn.Parameter(torch.zeros(1, n_patches + 1, embed_dim))
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))

    def forward(self, x):
        B = x.shape[0]
        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embed
        return x

class TransformerEncoder(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, num_layers, dropout):
        super(TransformerEncoder, self).__init__()
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim,
            nhead=num_heads,
            dim_feedforward=ff_dim,
            dropout=dropout,
            activation='gelu'
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

    def forward(self, x):
        return self.transformer(x)

class VisionTransformer(nn.Module):
    def __init__(self, img_size, patch_size, in_channels, embed_dim, num_heads, ff_dim, num_layers, dropout):
        super(VisionTransformer, self).__init__()
        self.patch_embed = PatchEmbedding(img_size, patch_size, in_channels, embed_dim)
        self.pos_embed = PositionalEncoding(self.patch_embed.n_patches, embed_dim)
        self.transformer = TransformerEncoder(embed_dim, num_heads, ff_dim, num_layers, dropout)
        self.mlp_head = nn.Sequential(
            nn.LayerNorm(embed_dim),
            nn.Linear(embed_dim, 1)  # Single output for BCEWithLogitsLoss
        )

    def forward(self, x):
        x = self.patch_embed(x)
        x = self.pos_embed(x)
        x = self.transformer(x)
        x = x[:, 0]  # Take the CLS token
        x = self.mlp_head(x)
        return x

# Hyperparameters
img_size = 96
patch_size = 16
in_channels = 3
embed_dim = 384  # Smaller for faster convergence
num_heads = 6
ff_dim = 1536
num_layers = 6  # Simpler model to avoid overfitting
dropout = 0.1
batch_size = 64  # Conservative for Kaggle
learning_rate = 1e-4
num_epochs = 10

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Data preprocessing
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(90),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load Patch Camelyon dataset
train_ds, train_info = tfds.load('patch_camelyon', split='train', as_supervised=False, with_info=True)
val_ds = tfds.load('patch_camelyon', split='validation', as_supervised=False)
test_ds = tfds.load('patch_camelyon', split='test', as_supervised=False)

# Wrap TensorFlow datasets in PyTorch Dataset
train_dataset = PatchCamelyonDataset(train_ds, transform=transform)
val_dataset = PatchCamelyonDataset(val_ds, transform=transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]))
test_dataset = PatchCamelyonDataset(test_ds, transform=transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]))

# Debug dataset
print("Debugging train dataset:")
debug_dataset(train_dataset)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=0)

# Initialize the model
model = VisionTransformer(
    img_size=img_size,
    patch_size=patch_size,
    in_channels=in_channels,
    embed_dim=embed_dim,
    num_heads=num_heads,
    ff_dim=ff_dim,
    num_layers=num_layers,
    dropout=dropout
).to(device)

# Loss and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)
scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)

# Training loop
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device).view(-1, 1)  # Reshape for BCEWithLogitsLoss
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    train_accuracy = train_correct / train_total
    scheduler.step()

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device).view(-1, 1)
            outputs = model(images)
            val_loss += criterion(outputs, labels).item()
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_accuracy = val_correct / val_total
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss/len(train_loader):.4f}, '
          f'Train Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss/len(val_loader):.4f}, '
          f'Val Accuracy: {val_accuracy:.4f}')

# Test the model
model.eval()
test_loss = 0.0
test_correct = 0
test_total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device).view(-1, 1)
        outputs = model(images)
        test_loss += criterion(outputs, labels).item()
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_accuracy = test_correct / test_total
print(f'Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {test_accuracy:.4f}')
torch.save(model.state_dict(), '/kaggle/working/my_model.pth')

2025-04-18 19:37:33.230768: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745005053.452390      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745005053.515885      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Downloading and preparing dataset 7.48 GiB (download: 7.48 GiB, generated: Unknown size, total: 7.48 GiB) to /root/tensorflow_datasets/patch_camelyon/2.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating test examples...:   0%|          | 0/32768 [00:00<?, ? examples/s]

I0000 00:00:1745005249.218179      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Shuffling /root/tensorflow_datasets/patch_camelyon/incomplete.DDKWMA_2.0.0/patch_camelyon-test.tfrecord*...:  …

Generating train examples...:   0%|          | 0/262144 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/patch_camelyon/incomplete.DDKWMA_2.0.0/patch_camelyon-train.tfrecord*...: …

Generating validation examples...:   0%|          | 0/32768 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/patch_camelyon/incomplete.DDKWMA_2.0.0/patch_camelyon-validation.tfrecord*…

Dataset patch_camelyon downloaded and prepared to /root/tensorflow_datasets/patch_camelyon/2.0.0. Subsequent calls will reuse this data.
Debugging train dataset:
Sample 0: Image shape: torch.Size([3, 96, 96]), Label: 0.0, Image min: -1.0, Image max: 1.0
Sample 1: Image shape: torch.Size([3, 96, 96]), Label: 0.0, Image min: -1.0, Image max: 1.0
Sample 2: Image shape: torch.Size([3, 96, 96]), Label: 1.0, Image min: -1.0, Image max: 1.0
Sample 3: Image shape: torch.Size([3, 96, 96]), Label: 1.0, Image min: -1.0, Image max: 1.0
Sample 4: Image shape: torch.Size([3, 96, 96]), Label: 1.0, Image min: -1.0, Image max: 1.0




Epoch 1/10, Train Loss: 0.6956, Train Accuracy: 0.5004, Val Loss: 0.6939, Val Accuracy: 0.4995
Epoch 2/10, Train Loss: 0.6936, Train Accuracy: 0.5018, Val Loss: 0.6932, Val Accuracy: 0.5005
Epoch 3/10, Train Loss: 0.6935, Train Accuracy: 0.5008, Val Loss: 0.6935, Val Accuracy: 0.5005
Epoch 4/10, Train Loss: 0.6934, Train Accuracy: 0.5003, Val Loss: 0.6933, Val Accuracy: 0.4995
Epoch 5/10, Train Loss: 0.6933, Train Accuracy: 0.4993, Val Loss: 0.6932, Val Accuracy: 0.5005
Epoch 6/10, Train Loss: 0.6932, Train Accuracy: 0.4990, Val Loss: 0.6931, Val Accuracy: 0.4995
Epoch 7/10, Train Loss: 0.6932, Train Accuracy: 0.4990, Val Loss: 0.6932, Val Accuracy: 0.4995
Epoch 8/10, Train Loss: 0.6932, Train Accuracy: 0.5002, Val Loss: 0.6932, Val Accuracy: 0.5005
