In [1]:
import numpy as np
import pandas as pd
import openslide
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import random
import os

import warnings
warnings.filterwarnings('ignore')


In [2]:

# EXTRACT PATCHES
def percentage(mask):
    return (np.sum(mask > 0) / mask.size) * 100

def extract_patches(im_slide, ms_slide, level, size_mask, num_patches_needed):
    f = int(ms_slide.level_downsamples[level])
    size_scale = im_slide.level_dimensions[level][0] // ms_slide.level_dimensions[level][0]
    coord_scale = im_slide.level_dimensions[0][0] // ms_slide.level_dimensions[level][0]
    size_image = (size_mask[0] * size_scale, size_mask[1] * size_scale)
    
    ms_width, ms_height = ms_slide.level_dimensions[level]
    
    l = [(x_ms, y_ms) for y_ms in range(0, ms_height, size_mask[1]) 
         for x_ms in range(0, ms_width, size_mask[0])]
    
    count, used_indices = 0, []
    random.seed(42)
    image_patches = []

    while count < num_patches_needed:
        index = random.randint(0, len(l) - 1)
        if index not in used_indices:
            used_indices.append(index)
            x_ms, y_ms = l[index]
            x_im, y_im = x_ms * coord_scale, y_ms * coord_scale
            mask_patch = ms_slide.read_region((x_ms * f, y_ms * f), level, size_mask).convert("L")
            image_patch = im_slide.read_region((x_im, y_im), level, size_image).convert("RGB")
   
            if percentage(np.array(mask_patch)) > 50:
                image_patches.append(np.array(image_patch))
                count += 1
                if count == num_patches_needed:
                    break

    return image_patches


In [3]:

# DATASET
class PatchDataset(Dataset):
    def __init__(self, images_dir, masks_dir, csv_file, num_patches_per_image, level, size_mask):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.df = pd.read_csv(csv_file)
        self.num_patches_per_image = num_patches_per_image
        self.level = level
        self.size_mask = size_mask
        self.image_list = sorted(os.listdir(images_dir))
    
    def __len__(self):
        return len(self.image_list)
    
    def __getitem__(self, idx):
        image_file = self.image_list[idx]
        impath = os.path.join(self.images_dir, image_file)
        mspath = os.path.join(self.masks_dir, image_file.replace('.tif', '_tissue.tif'))
        
        im_slide = openslide.OpenSlide(impath)
        ms_slide = openslide.OpenSlide(mspath)
        
        image_patches = extract_patches(im_slide, ms_slide, self.level, self.size_mask, self.num_patches_per_image)
        image_patches = np.array(image_patches, dtype=np.float32) / 255.0  # Normalize to [0, 1]
        
        case_id_to_find = image_file
        filtered_row = self.df.loc[self.df['case_id'] == case_id_to_find].iloc[0]
        event = filtered_row["event"]
        years = filtered_row["follow_up_years"]
        
        labels = np.array([[event, years]] * self.num_patches_per_image, dtype=np.float32)
        
        return torch.from_numpy(image_patches.transpose(0, 3, 1, 2)), torch.from_numpy(labels)


In [4]:

# MODEL
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.conv(x)

class UNet(nn.Module):
    def __init__(self, num_patches, input_shape):
        super(UNet, self).__init__()
        self.num_patches = num_patches
        self.input_shape = input_shape

        self.inc = DoubleConv(3, 64)
        self.down1 = nn.Sequential(nn.MaxPool2d(2), DoubleConv(64, 128))
        self.down2 = nn.Sequential(nn.MaxPool2d(2), DoubleConv(128, 256))
        self.down3 = nn.Sequential(nn.MaxPool2d(2), DoubleConv(256, 512))
        self.down4 = nn.Sequential(nn.MaxPool2d(2), DoubleConv(512, 1024))

        self.up1 = nn.ConvTranspose2d(1024, 512, 2, stride=2)
        self.up2 = nn.ConvTranspose2d(512, 256, 2, stride=2)
        self.up3 = nn.ConvTranspose2d(256, 128, 2, stride=2)
        self.up4 = nn.ConvTranspose2d(128, 64, 2, stride=2)

        self.conv1 = DoubleConv(1024, 512)
        self.conv2 = DoubleConv(512, 256)
        self.conv3 = DoubleConv(256, 128)
        self.conv4 = DoubleConv(128, 64)

        self.gap = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Linear(64, 32)
        self.fc2 = nn.Linear(32, 2)

    def forward(self, x):
        batch_size = x.size(0)
        x = x.view(-1, self.input_shape[0], self.input_shape[1], self.input_shape[2])

        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)

        x = self.up1(x5)
        x = self.conv1(torch.cat([x4, x], dim=1))
        x = self.up2(x)
        x = self.conv2(torch.cat([x3, x], dim=1))
        x = self.up3(x)
        x = self.conv3(torch.cat([x2, x], dim=1))
        x = self.up4(x)
        x = self.conv4(torch.cat([x1, x], dim=1))

        x = self.gap(x)
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)

        x = x.view(batch_size, self.num_patches, 2)

        return x


In [5]:
def custom_loss(y_pred, y_true):
    mse = nn.MSELoss()
    years_loss = mse(y_pred[:, :, 1], y_true[:, :, 1])
    event_loss = mse(y_pred[:, :, 0], y_true[:, :, 0])
    return event_loss + years_loss

# TRAINING
def train_model(model, train_loader, optimizer, device, num_epochs, checkpoint_dir):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            print(output.shape , data.shape , target.shape)
            loss = custom_loss(output, target)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

        if (epoch + 1) % 200 == 0:
            torch.save(model.state_dict(), f"{checkpoint_dir}/epoch{epoch+1:03d}-loss{avg_loss:.4f}.pth")
            print(f"Model saved at epoch {epoch + 1}")

In [6]:
BATCH_SIZE = 16
NUM_PATCHES = 4
INPUT_SHAPE = (3, 512, 512)
TOTAL_IMAGES = 55

images_dir = "/kaggle/input/dddddddd/images"
masks_dir = "/kaggle/input/dddddddd/masks"
csv_file = "/kaggle/input/dddddddd/training_labels.csv"
checkpoint_dir = "/kaggle/working"
dataset = PatchDataset(images_dir, masks_dir, csv_file, num_patches_per_image=NUM_PATCHES, level=1, size_mask=(64, 64))
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UNet(num_patches=NUM_PATCHES, input_shape=INPUT_SHAPE)
model = nn.DataParallel(model).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/dddddddd/training_labels.csv'

In [None]:
for x , y in train_loader:
    print(x.shape , y.shape)
    

In [None]:
num_epochs = 2000
train_model(model, train_loader, optimizer, device, num_epochs, checkpoint_dir)