In [1]:
# Install dependencies
!pip install monai

import h5py
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from monai.networks.nets import UNet
from scipy.stats import spearmanr
import random

# Set Seed
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

# Load Data
with h5py.File('/kaggle/input/el-hackathon-2025/elucidata_ai_challenge_data.h5', 'r') as f:
    train_spots = {k: pd.DataFrame(np.array(f[f'spots/Train/{k}'])) for k in f['spots/Train'].keys()}
    train_images = {k: np.array(f[f'images/Train/{k}']) for k in f['images/Train'].keys()}
    test_spots = {k: pd.DataFrame(np.array(f[f'spots/Test/{k}'])) for k in f['spots/Test'].keys()}
    test_images = {k: np.array(f[f'images/Test/{k}']) for k in f['images/Test'].keys()}

# Preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# Dataset Class
class TissueDataset(Dataset):
    def __init__(self, spots, images, transform=None, patch_size=55):
        self.spots = spots
        self.images = images
        self.transform = transform
        self.patch_size = patch_size
    
    def __len__(self):
        return sum(len(v) for v in self.spots.values())
    
    def __getitem__(self, idx):
        slide_ids = list(self.spots.keys())
        slide_idx = idx // len(self.spots[slide_ids[0]])
        spot_idx = idx % len(self.spots[slide_ids[slide_idx]])

        slide_id = slide_ids[slide_idx]
        spot = self.spots[slide_id].iloc[spot_idx]
        
        x, y = int(spot['x']), int(spot['y'])
        label = torch.tensor(spot.iloc[2:].values, dtype=torch.float32)

        # Extract the patch around the spot
        half_patch = self.patch_size // 2
        img_h, img_w, _ = self.images[slide_id].shape  # Image dimensions

        # Adjust patch bounds while ensuring valid patch size
        x1, x2 = max(0, x - half_patch), min(img_w, x + half_patch + 1)
        y1, y2 = max(0, y - half_patch), min(img_h, y + half_patch + 1)

        # Ensure patch has minimum valid dimensions
        if x2 - x1 < 1:
            x2 = min(img_w, x1 + 1)  # Ensure at least 1 pixel width
        if y2 - y1 < 1:
            y2 = min(img_h, y1 + 1)  # Ensure at least 1 pixel height

        image = self.images[slide_id][y1:y2, x1:x2, :]

        # Convert to tensor and permute dimensions
        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)

        if self.transform:
            image = self.transform(image)

        return image, label



# 2D U-Net Model
class UNet2DPredictor(nn.Module):
    def __init__(self, num_classes=35):
        super(UNet2DPredictor, self).__init__()
        self.unet2d = UNet(
            spatial_dims=2,
            in_channels=3,
            out_channels=num_classes,
            channels=(16, 32, 64, 128, 256),
            strides=(2, 2, 2, 2),
            num_res_units=2,
        )
    
    def forward(self, x):
        x = self.unet2d(x)
        return x.mean(dim=(2, 3))  # Global average pooling

# Loss Function
class HybridLoss(nn.Module):
    def __init__(self, alpha=0.5):
        super(HybridLoss, self).__init__()
        self.mse = nn.MSELoss()
        self.alpha = alpha
    
    def forward(self, preds, targets):
        mse_loss = self.mse(preds, targets)
        rank_diffs = preds.unsqueeze(2) - preds.unsqueeze(1)
        target_diffs = targets.unsqueeze(2) - targets.unsqueeze(1)
        pl_loss = torch.log(1 + torch.exp(-rank_diffs * target_diffs)).mean()
        return self.alpha * mse_loss + (1 - self.alpha) * pl_loss

# Training Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = HybridLoss(alpha=0)

# Train & Evaluate Function
def train_epoch(model, dataloader, optimizer, criterion):
    model.train()
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

def evaluate(model, dataloader):
    model.eval()
    predictions, truths = [], []
    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            preds = model(images).cpu().numpy()
            predictions.extend(preds)
            truths.extend(labels.numpy())
    return np.mean([spearmanr(p, t).correlation for p, t in zip(predictions, truths)])

# Load data for cross-validation
last_slide = list(train_spots.keys())[-1]
train_spots_cv = {k: v for k, v in train_spots.items() if k != last_slide}
val_spots_cv = {last_slide: train_spots[last_slide]}
train_images_cv = {k: v for k, v in train_images.items() if k != last_slide}
val_images_cv = {last_slide: train_images[last_slide]}

train_dataset = TissueDataset(train_spots_cv, train_images_cv, transform, patch_size=1)  # Starting with 1 pixel
val_dataset = TissueDataset(val_spots_cv, val_images_cv, transform, patch_size=1)  
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

# Model Training with Patch Size Variations
patch_sizes = [45]

for patch_size in patch_sizes:
    print(f"Training model with patch size {patch_size} pixels.")
    
    # Create a new model for each patch size
    model = UNet2DPredictor().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    
    # Create new dataset with the updated patch size
    train_dataset = TissueDataset(train_spots_cv, train_images_cv, transform, patch_size=patch_size)
    val_dataset = TissueDataset(val_spots_cv, val_images_cv, transform, patch_size=patch_size)
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
    
    # Train the model
    best_score = -np.inf
    patience, patience_counter = 10, 0
    for epoch in range(100):
        train_epoch(model, train_loader, optimizer, criterion)
        score = evaluate(model, val_loader)
        print(f"Epoch {epoch+1}, Validation Spearman: {score:.4f}")
        
        if score > best_score:
            best_score = score
            patience_counter = 0
            torch.save(model.state_dict(), f"best_model_patch_{patch_size}.pth")
        else:
            patience_counter += 1
        
        if patience_counter >= patience:
            print("Early stopping triggered.")
            break
    
    # Load Best Model for Inference
    model.load_state_dict(torch.load(f"best_model_patch_{patch_size}.pth"))
    
    # Inference and Save Submission
    model.eval()
    test_dataset = TissueDataset(test_spots, test_images, transform, patch_size=patch_size)
    test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)
    predictions = []
    
    with torch.no_grad():
        for images, _ in test_loader:
            images = images.to(device)
            preds = model(images).cpu().numpy()
            predictions.extend(preds)
    
    # Save Submission File
    submission = pd.DataFrame(predictions, columns=[f'C{i+1}' for i in range(35)])
    submission.insert(0, 'ID', range(len(submission)))
    submission.to_csv(f'submission_patch_{patch_size}.csv', index=False)
    print(f"Submission for patch size {patch_size} saved as submission_patch_{patch_size}.csv")


Collecting monai
  Downloading monai-1.4.0-py3-none-any.whl.metadata (11 kB)
Downloading monai-1.4.0-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: monai
Successfully installed monai-1.4.0
Training model with patch size 45 pixels.
Epoch 1, Validation Spearman: 0.2387
Epoch 2, Validation Spearman: 0.2436
Epoch 3, Validation Spearman: 0.2438
Epoch 4, Validation Spearman: 0.2659
Epoch 5, Validation Spearman: 0.2821
Epoch 6, Validation Spearman: 0.3296
Epoch 7, Validation Spearman: 0.3357
Epoch 8, Validation Spearman: 0.3711
Epoch 9, Validation Spearman: 0.3538
Epoch 10, Validation Spearman: 0.2991
Epoch 11, Validation Spearman: 0.3963
Epoch 12, Validation Spearman: 0.3037
Epoch 13, Validation Spearman: 0.4128
Epoch 14, Validation Spearman: 0.3659
Epoch 15, Validation Spearman: 0.3545
Epoch 16, Validation Spearman: 0.3783
Epoch 17, Validation Spearman: 

  model.load_state_dict(torch.load(f"best_model_patch_{patch_size}.pth"))


Submission for patch size 45 saved as submission_patch_45.csv
