In [1]:
# run this cell first
!pip install -q gdown transformers timm opencv-python matplotlib
# (torch is preinstalled on Colab; if you want a specific version, install it here)


In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [5]:
!unzip "/content/drive/MyDrive/Injury localization dataset (1).zip" -d "/content/localization_dataset"


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/localization_dataset/Injury localization dataset/train/scaled-dicom-184.xml  
  inflating: /content/localization_dataset/__MACOSX/Injury localization dataset/train/._scaled-dicom-184.xml  
  inflating: /content/localization_dataset/Injury localization dataset/train/scaled-dicom-3595.png  
  inflating: /content/localization_dataset/__MACOSX/Injury localization dataset/train/._scaled-dicom-3595.png  
  inflating: /content/localization_dataset/Injury localization dataset/train/scaled-dicom-4946.xml  
  inflating: /content/localization_dataset/__MACOSX/Injury localization dataset/train/._scaled-dicom-4946.xml  
  inflating: /content/localization_dataset/Injury localization dataset/train/predict7_scaled-A0069_frame75.png  
  inflating: /content/localization_dataset/__MACOSX/Injury localization dataset/train/._predict7_scaled-A0069_frame75.png  
  inflating: /content/localization_dataset/Injury localizatio

In [6]:
!unzip "/content/drive/MyDrive/segmentation dataset (1).zip" -d "/content/segmentation_dataset"


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/segmentation_dataset/segmentation dataset/train_masks/scaled-dicom-2349.png  
  inflating: /content/segmentation_dataset/segmentation dataset/train_masks/scaled-dicom-235.png  
  inflating: /content/segmentation_dataset/segmentation dataset/train_masks/scaled-dicom-2350.png  
  inflating: /content/segmentation_dataset/segmentation dataset/train_masks/scaled-dicom-2351.png  
  inflating: /content/segmentation_dataset/segmentation dataset/train_masks/scaled-dicom-2352.png  
  inflating: /content/segmentation_dataset/segmentation dataset/train_masks/scaled-dicom-2353.png  
  inflating: /content/segmentation_dataset/segmentation dataset/train_masks/scaled-dicom-2354.png  
  inflating: /content/segmentation_dataset/segmentation dataset/train_masks/scaled-dicom-2355.png  
  inflating: /content/segmentation_dataset/segmentation dataset/train_masks/scaled-dicom-2356.png  
  inflating: /content/segmentation_d

In [7]:
!ls /content/localization_dataset
!ls /content/segmentation_dataset


'Injury localization dataset'   __MACOSX
'segmentation dataset'


In [8]:
import os

for path in ["/content/localization_dataset", "/content/segmentation_dataset"]:
    print(f"\n🔎 Checking {path}")
    for root, dirs, files in os.walk(path):
        print("  ", root, "->", len(files), "files")
        if files[:5]:
            print("     sample:", files[:5])
        # Stop after first two levels
        if root.count(os.sep) - path.count(os.sep) >= 1:
            break



🔎 Checking /content/localization_dataset
   /content/localization_dataset -> 0 files
   /content/localization_dataset/__MACOSX -> 1 files
     sample: ['._Injury localization dataset']

🔎 Checking /content/segmentation_dataset
   /content/segmentation_dataset -> 0 files
   /content/segmentation_dataset/segmentation dataset -> 0 files


In [9]:
train_images = "/content/segmentation_dataset/train/images"
train_masks  = "/content/segmentation_dataset/train/masks"
val_images   = "/content/segmentation_dataset/val/images"
val_masks    = "/content/segmentation_dataset/val/masks"


In [10]:
train_images = "/content/localization_dataset/train/images"
train_annots = "/content/localization_dataset/train/annotations.json"  # or xml, depends on format
val_images   = "/content/localization_dataset/val/images"
val_annots   = "/content/localization_dataset/val/annotations.json"


In [11]:
!pip install -q transformers timm opencv-python albumentations torchmetrics


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/983.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m983.0/983.2 kB[0m [31m33.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [12]:
import os, cv2, torch
from torch.utils.data import Dataset, DataLoader
from transformers import SegformerFeatureExtractor

class SpinalCordSegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, feature_extractor, size=256):
        self.images = sorted([os.path.join(images_dir, f) for f in os.listdir(images_dir)])
        self.masks  = sorted([os.path.join(masks_dir, f) for f in os.listdir(masks_dir)])
        self.feature_extractor = feature_extractor
        self.size = size

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = cv2.cvtColor(cv2.imread(self.images[idx]), cv2.COLOR_BGR2RGB)
        mask  = cv2.imread(self.masks[idx], cv2.IMREAD_UNCHANGED)
        mask  = cv2.resize(mask, (self.size, self.size), interpolation=cv2.INTER_NEAREST)
        mask  = torch.tensor(mask, dtype=torch.long)

        inputs = self.feature_extractor(images=image, return_tensors="pt", size=self.size)
        pixel_values = inputs["pixel_values"].squeeze()
        return pixel_values, mask


In [14]:
feature_extractor = SegformerFeatureExtractor(do_resize=True, size=256)

train_dataset = SpinalCordSegmentationDataset(
    "/content/segmentation_dataset/segmentation dataset/train_images",
    "/content/segmentation_dataset/segmentation dataset/train_masks",
    feature_extractor
)

val_dataset = SpinalCordSegmentationDataset(
    "/content/segmentation_dataset/segmentation dataset/val_images",
    "/content/segmentation_dataset/segmentation dataset/val_masks",
    feature_extractor
)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=4)


In [24]:
import numpy as np

class SpinalCordSegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, feature_extractor, size=256):
        self.images = sorted([os.path.join(images_dir, f) for f in os.listdir(images_dir)])
        self.masks  = sorted([os.path.join(masks_dir, f) for f in os.listdir(masks_dir)])
        self.feature_extractor = feature_extractor
        self.size = size

        # Map RGB colors → class IDs
        self.color2id = {
            (0, 0, 0): 0,            # background
            (128, 128, 128): 1,      # spinal cord
            (192, 192, 192): 2       # hematoma
        }

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # --- Image ---
        image = cv2.cvtColor(cv2.imread(self.images[idx]), cv2.COLOR_BGR2RGB)

        # --- Mask (RGB to class IDs) ---
        mask_rgb = cv2.cvtColor(cv2.imread(self.masks[idx]), cv2.COLOR_BGR2RGB)
        mask_rgb = cv2.resize(mask_rgb, (self.size, self.size), interpolation=cv2.INTER_NEAREST)

        mask = np.zeros((self.size, self.size), dtype=np.int64)
        for color, idx_class in self.color2id.items():
            mask[(mask_rgb == color).all(axis=-1)] = idx_class

        mask = torch.tensor(mask, dtype=torch.long)  # [H,W]

        # --- Features for image ---
        inputs = self.feature_extractor(images=image, return_tensors="pt", size=self.size)
        pixel_values = inputs["pixel_values"].squeeze()  # [3,H,W]

        return pixel_values, mask


In [25]:
pixel_values, mask = train_dataset[0]
print("pixel_values:", pixel_values.shape)   # [3,256,256]
print("mask:", mask.shape, "dtype:", mask.dtype)  # [256,256], long
print("unique mask values:", torch.unique(mask))  # should be [0,1,2]


pixel_values: torch.Size([3, 256, 256])
mask: torch.Size([256, 256, 3]) dtype: torch.int64
unique mask values: tensor([  0, 128, 192])


In [18]:
class SpinalCordSegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, feature_extractor, size=256):
        self.images = sorted([os.path.join(images_dir, f) for f in os.listdir(images_dir)])
        self.masks  = sorted([os.path.join(masks_dir, f) for f in os.listdir(masks_dir)])
        self.feature_extractor = feature_extractor
        self.size = size

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # --- Load image ---
        image = cv2.cvtColor(cv2.imread(self.images[idx]), cv2.COLOR_BGR2RGB)

        # --- Load mask in grayscale (1 channel, integers) ---
        mask = cv2.imread(self.masks[idx], cv2.IMREAD_GRAYSCALE)
        mask = cv2.resize(mask, (self.size, self.size), interpolation=cv2.INTER_NEAREST)
        mask = torch.tensor(mask, dtype=torch.long)   # [H,W]

        # --- Extract features for image ---
        inputs = self.feature_extractor(images=image, return_tensors="pt", size=self.size)
        pixel_values = inputs["pixel_values"].squeeze()  # [3,H,W]

        return pixel_values, mask


In [19]:
pixel_values, mask = train_dataset[0]
print("pixel_values:", pixel_values.shape)  # expect [3,256,256]
print("mask:", mask.shape, "dtype:", mask.dtype)  # expect [256,256], long
print("unique mask values:", torch.unique(mask))  # should show integers like 0,1,2...


pixel_values: torch.Size([3, 256, 256])
mask: torch.Size([256, 256, 3]) dtype: torch.int64
unique mask values: tensor([  0, 128, 192])


In [26]:
import os, cv2, torch
import numpy as np
from torch.utils.data import Dataset

class SpinalCordSegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, feature_extractor, size=256):
        self.images = sorted([os.path.join(images_dir, f) for f in os.listdir(images_dir)])
        self.masks  = sorted([os.path.join(masks_dir, f) for f in os.listdir(masks_dir)])
        self.feature_extractor = feature_extractor
        self.size = size

        # Map RGB → class ID
        self.color2id = {
            (0, 0, 0): 0,             # background
            (128, 128, 128): 1,       # spinal cord
            (192, 192, 192): 2        # hematoma
        }

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # --- Image ---
        image = cv2.cvtColor(cv2.imread(self.images[idx]), cv2.COLOR_BGR2RGB)

        # --- Mask (RGB to class IDs) ---
        mask_rgb = cv2.cvtColor(cv2.imread(self.masks[idx]), cv2.COLOR_BGR2RGB)
        mask_rgb = cv2.resize(mask_rgb, (self.size, self.size), interpolation=cv2.INTER_NEAREST)

        # convert each pixel’s RGB to a class index
        mask = np.zeros((self.size, self.size), dtype=np.int64)
        for color, class_id in self.color2id.items():
            mask[(mask_rgb == color).all(axis=-1)] = class_id

        mask = torch.tensor(mask, dtype=torch.long)  # [H,W]

        # --- Features for image ---
        inputs = self.feature_extractor(images=image, return_tensors="pt", size=self.size)
        pixel_values = inputs["pixel_values"].squeeze()  # [3,H,W]

        return pixel_values, mask


In [27]:
from transformers import SegformerFeatureExtractor

feature_extractor = SegformerFeatureExtractor(do_resize=True, size=256)

train_dataset = SpinalCordSegmentationDataset(
    "/content/segmentation_dataset/segmentation dataset/train_images",
    "/content/segmentation_dataset/segmentation dataset/train_masks",
    feature_extractor
)

val_dataset = SpinalCordSegmentationDataset(
    "/content/segmentation_dataset/segmentation dataset/val_images",
    "/content/segmentation_dataset/segmentation dataset/val_masks",
    feature_extractor
)

from torch.utils.data import DataLoader
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=4)


In [28]:
from transformers import SegformerForSemanticSegmentation
from torch.optim import Adam

device = "cuda" if torch.cuda.is_available() else "cpu"
num_labels = 3  # background, spinal cord, hematoma

model = SegformerForSemanticSegmentation.from_pretrained(
    "nvidia/segformer-b0-finetuned-ade-512-512",
    num_labels=num_labels,
    ignore_mismatched_sizes=True
).to(device)

optimizer = Adam(model.parameters(), lr=5e-5)

# --- Training ---
epochs = 3
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for pixel_values, labels in train_loader:
        pixel_values, labels = pixel_values.to(device), labels.to(device)
        outputs = model(pixel_values=pixel_values, labels=labels)
        loss = outputs.loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"[SegFormer] Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b0-finetuned-ade-512-512 and are newly initialized because the shapes did not match:
- decode_head.classifier.bias: found shape torch.Size([150]) in the checkpoint and torch.Size([3]) in the model instantiated
- decode_head.classifier.weight: found shape torch.Size([150, 256, 1, 1]) in the checkpoint and torch.Size([3, 256, 1, 1]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[SegFormer] Epoch 1, Loss: 0.0992
[SegFormer] Epoch 2, Loss: 0.0176
[SegFormer] Epoch 3, Loss: 0.0138


In [30]:
import torch.nn.functional as F

def evaluate_segmentation(model, dataloader):
    model.eval()
    dice_scores, iou_scores = [], []
    with torch.no_grad():
        for pixel_values, labels in dataloader:
            pixel_values, labels = pixel_values.to(device), labels.to(device)
            outputs = model(pixel_values=pixel_values)

            # Resize logits to match labels size
            logits = outputs.logits
            logits = F.interpolate(logits, size=labels.shape[1:], mode="bilinear", align_corners=False)

            preds = torch.argmax(logits, dim=1)  # [B,H,W]

            for c in range(num_labels):
                pred_c = (preds == c).float()
                label_c = (labels == c).float()
                intersection = (pred_c * label_c).sum()
                union = pred_c.sum() + label_c.sum()
                dice = (2. * intersection) / (union + 1e-6)
                iou  = intersection / (pred_c.sum() + label_c.sum() - intersection + 1e-6)
                dice_scores.append(dice.item())
                iou_scores.append(iou.item())

    return np.mean(dice_scores), np.mean(iou_scores)

dice, iou = evaluate_segmentation(model, val_loader)
print(f"[SegFormer] Validation Dice: {dice:.3f}, IoU: {iou:.3f}")


[SegFormer] Validation Dice: 0.523, IoU: 0.495


In [31]:
import torch.nn.functional as F

def compute_accuracy(model, dataloader):
    model.eval()
    total_correct = 0
    total_pixels = 0
    with torch.no_grad():
        for pixel_values, labels in dataloader:
            pixel_values, labels = pixel_values.to(device), labels.to(device)
            outputs = model(pixel_values=pixel_values)

            # Resize logits to match label size
            logits = outputs.logits
            logits = F.interpolate(logits, size=labels.shape[1:], mode="bilinear", align_corners=False)

            preds = torch.argmax(logits, dim=1)

            total_correct += (preds == labels).sum().item()
            total_pixels += labels.numel()

    accuracy = total_correct / total_pixels
    return accuracy

# Calculate accuracy on the validation set
accuracy = compute_accuracy(model, val_loader)
print(f"[SegFormer] Validation Accuracy: {accuracy:.4f}")


[SegFormer] Validation Accuracy: 0.9790


In [32]:
import torch.nn.functional as F

def compute_accuracy(model, dataloader):
    model.eval()
    total_correct = 0
    total_pixels = 0
    with torch.no_grad():
        for pixel_values, labels in dataloader:
            pixel_values, labels = pixel_values.to(device), labels.to(device)
            outputs = model(pixel_values=pixel_values)

            # Resize logits to match label size
            logits = outputs.logits
            logits = F.interpolate(logits, size=labels.shape[1:], mode="bilinear", align_corners=False)

            preds = torch.argmax(logits, dim=1)

            total_correct += (preds == labels).sum().item()
            total_pixels += labels.numel()

    accuracy = total_correct / total_pixels
    return accuracy

# Run on validation set
accuracy = compute_accuracy(model, val_loader)
print(f"[SegFormer] Validation Accuracy: {accuracy:.4f}")


[SegFormer] Validation Accuracy: 0.9790


LOCALIZATION SSD300

In [34]:
!pip install torch torchvision




In [35]:
import os
import torch
from torch.utils.data import Dataset
from PIL import Image
import xml.etree.ElementTree as ET

class SpinalCordLocalizationDataset(Dataset):
    def __init__(self, images_dir, annotations_dir, transforms=None):
        self.images_dir = images_dir
        self.annotations_dir = annotations_dir
        self.transforms = transforms
        self.images = sorted(os.listdir(images_dir))

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.images_dir, img_name)

        # Load image
        image = Image.open(img_path).convert("RGB")

        # Load annotation
        annot_path = os.path.join(self.annotations_dir, img_name.replace(".jpg", ".xml"))
        boxes, labels = self.parse_voc_xml(annot_path)

        target = {
            "boxes": boxes,
            "labels": labels
        }

        if self.transforms:
            image = self.transforms(image)

        return image, target

    def parse_voc_xml(self, annot_path):
        tree = ET.parse(annot_path)
        root = tree.getroot()
        boxes = []
        labels = []

        for obj in root.findall('object'):
            bbox = obj.find('bndbox')
            xmin = float(bbox.find('xmin').text)
            ymin = float(bbox.find('ymin').text)
            xmax = float(bbox.find('xmax').text)
            ymax = float(bbox.find('ymax').text)
            boxes.append([xmin, ymin, xmax, ymax])

            label = 1  # assuming one class: hematoma; background is handled separately
            labels.append(label)

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        return boxes, labels


In [1]:
!find /content/localization_dataset/ -name "*.json"
!find /content/localization_dataset/ -name "*.csv"


find: ‘/content/localization_dataset/’: No such file or directory
find: ‘/content/localization_dataset/’: No such file or directory
