<a href="https://colab.research.google.com/github/firdowsacige/brain_tumor/blob/main/Untitled17.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [28]:

!pip install -q git+https://github.com/huggingface/transformers.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [29]:

from transformers import AutoImageProcessor, DPTForDepthEstimation

image_processor = AutoImageProcessor.from_pretrained("facebook/dpt-dinov2-small-kitti")
model = DPTForDepthEstimation.from_pretrained("facebook/dpt-dinov2-small-kitti")

In [30]:
!pip install -q evaluate

In [None]:
!pip install wget

In [None]:
# Download the Kvasir-SEG dataset
import wget
import zipfile

In [None]:
url = "http://datasets.simula.no/downloads/kvasir-seg.zip"
zip_path = "kvasir-seg.zip"
wget.download(url, zip_path)

In [None]:
# Extract the dataset
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall("/content/kvasir_seg")

In [None]:
from datasets import Dataset, DatasetDict, Image
import glob
import os

# Define your directories (use raw strings for Windows paths)
image_dir = r"/content/kvasir_seg/Kvasir-SEG/images"
mask_dir = r"/content/kvasir_seg/Kvasir-SEG/masks"

# Collect file paths (adjust the file extension if needed)
image_paths = sorted(glob.glob(os.path.join(image_dir, "*.jpg")))  # or "*.png"
mask_paths = sorted(glob.glob(os.path.join(mask_dir, "*.jpg")))

def create_dataset(image_paths, mask_paths):
    # Create a dataset from a dictionary of image and mask paths
    dataset = Dataset.from_dict({"image": image_paths, "label": mask_paths})
    # Cast columns to Image objects for proper handling (to load the actual image data)
    dataset = dataset.cast_column("image", Image())
    dataset = dataset.cast_column("label", Image())
    return dataset

# Create a dataset for all your data
polyp_dataset_all = create_dataset(image_paths, mask_paths)

# Now, split the dataset into train and validation sets (e.g., 80%/20%)
split_dataset = polyp_dataset_all.train_test_split(test_size=0.2, seed=42)
polyp_dataset = DatasetDict({
    "train": split_dataset["train"],
    "validation": split_dataset["test"],
})

# You now have a dataset in the same format as before:
# polyp_dataset["train"] and polyp_dataset["validation"] can be used to create your SegmentationDataset wrappers.


In [None]:
def preprocess_mask(example):
    # Convert the segmentation mask to a NumPy array (if it's not already)
    mask = np.array(example["label"])

    # Convert NumPy array to PyTorch tensor and move to GPU (if available)
    mask = torch.tensor(mask).to(device)

    # Ensure the mask contains only 0s and 1s
    mask = (mask > 0).float()

    # Ensure that there are only 0s and 1s in the mask
    assert mask.max() <= 1, f"Mask contains values outside the range [0, 1]: {torch.unique(mask)}"

    # Store the tensor directly in the example (no need to move back to CPU)
    example["label"] = mask
    return example


In [None]:
polyp_dataset

In [None]:
example = polyp_dataset["train"][0]
image = example["image"]
image

In [None]:
segmentation_map = example["label"]
segmentation_map

In [None]:
id2label = {
    0: "background",
    1: "polyp"
}
print(id2label)

In [None]:
from torch.utils.data import Dataset
import torch

class SegmentationDataset(Dataset):
  def __init__(self, dataset, transform):
    self.dataset = dataset
    self.transform = transform
  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, idx):
    item = self.dataset[idx]
    original_image = np.array(item["image"])
    original_segmentation_map = np.array(item["label"])

        # Apply transforms
    transformed = self.transform(image=original_image, mask=original_segmentation_map)
    image = torch.tensor(transformed['image'])
    target = torch.LongTensor(transformed['mask'])

        # If target has 3 channels, take just one channel
    if target.ndim == 3 and target.shape[-1] == 3:
      target = target[..., 0]

        # Convert image from HWC to CHW
    image = image.permute(2, 0, 1)

    return image, target, original_image, original_segmentation_map


In [None]:
import albumentations as A

# Mean and std values for normalization (scaled between 0 and 1)
ADE_MEAN = [123.675 / 255, 116.280 / 255, 103.530 / 255]
ADE_STD = [58.395 / 255, 57.120 / 255, 57.375 / 255]

# Training transformations: Resize, horizontal flip, and normalization.
train_transform = A.Compose([
    A.Resize(width=448, height=448),
    A.HorizontalFlip(p=0.5),
    A.Normalize(mean=ADE_MEAN, std=ADE_STD),
], is_check_shapes=False)


val_transform = A.Compose([
    A.Resize(width=448, height=448),
    A.Normalize(mean=ADE_MEAN, std=ADE_STD),

])

train_dataset = SegmentationDataset(polyp_dataset["train"], transform=train_transform)
val_dataset = SegmentationDataset(polyp_dataset["validation"], transform=val_transform)

In [None]:
import numpy as np
pixel_values, target, original_image, original_segmentation_map = train_dataset[3]
print(pixel_values.shape)
print(target.shape)

In [None]:
from torch.utils.data import DataLoader

def collate_fn(inputs):
    batch = dict()
    batch["pixel_values"] = torch.stack([i[0] for i in inputs], dim=0)
    batch["labels"] = torch.stack([i[1] for i in inputs], dim=0)
    batch["original_images"] = [i[2] for i in inputs]
    batch["original_segmentation_maps"] = [i[3] for i in inputs]

    return batch

train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)
val_dataloader = DataLoader(val_dataset, batch_size=2, shuffle=False, collate_fn=collate_fn)

In [None]:
batch = next(iter(train_dataloader))
for k,v in batch.items():
  if isinstance(v,torch.Tensor):
    print(k,v.shape)

In [None]:
print(batch["labels"].unique())  # Check the unique values in the labels


In [None]:
batch = next(iter(train_dataloader))
for k,v in batch.items():
  if isinstance(v,torch.Tensor):
    print(k,v.shape)
print(batch["pixel_values"].dtype)
batch["labels"].dtype

In [None]:
batch["pixel_values"].dtype

In [None]:
import torch
from transformers import Dinov2Model, Dinov2PreTrainedModel
from transformers.modeling_outputs import SemanticSegmenterOutput

class LinearClassifier(torch.nn.Module):
    def __init__(self, in_channels, tokenW=32, tokenH=32, num_labels=1):
        super(LinearClassifier, self).__init__()

        self.in_channels = in_channels
        self.width = tokenW
        self.height = tokenH
        self.classifier = torch.nn.Conv2d(in_channels, num_labels, (1,1))

    def forward(self, embeddings):
        embeddings = embeddings.reshape(-1, self.height, self.width, self.in_channels)
        embeddings = embeddings.permute(0,3,1,2)

        return self.classifier(embeddings)


class Dinov2ForSemanticSegmentation(Dinov2PreTrainedModel):
  def __init__(self, config):
    super().__init__(config)

    self.dinov2 = Dinov2Model(config)
    self.classifier = LinearClassifier(config.hidden_size, 32, 32, config.num_labels)

  def forward(self, pixel_values, output_hidden_states=False, output_attentions=False, labels=None):
    # use frozen features
    outputs = self.dinov2(pixel_values,
                            output_hidden_states=output_hidden_states,
                            output_attentions=output_attentions)
    # get the patch embeddings - so we exclude the CLS token
    patch_embeddings = outputs.last_hidden_state[:,1:,:]

    # convert to logits and upsample to the size of the pixel values
    logits = self.classifier(patch_embeddings)
    logits = torch.nn.functional.interpolate(logits, size=pixel_values.shape[2:], mode="bilinear", align_corners=False)

    loss = None
    if labels is not None:
      # important: we're going to use 0 here as ignore index instead of the default -100
      # as we don't want the model to learn to predict background
      loss_fct = torch.nn.CrossEntropyLoss(ignore_index=0)
      loss = loss_fct(logits.squeeze(), labels.squeeze())

    return SemanticSegmenterOutput(
        loss=loss,
        logits=logits,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

In [None]:
model = Dinov2ForSemanticSegmentation.from_pretrained("facebook/dinov2-base", id2label=id2label, num_labels=len(id2label))

In [None]:
# Clamp the labels to ensure they are within [0, 1]
batch["labels"] = torch.clamp(batch["labels"], min=0, max=1)

In [None]:
for name, param in model.named_parameters():
  if name.startswith("dinov2"):
    param.requires_grad = False

In [None]:
outputs = model(pixel_values=batch["pixel_values"], labels=batch["labels"])
print(outputs.logits.shape)
print(outputs.loss)

In [None]:
from torch.optim import AdamW
from tqdm.auto import tqdm
import torch
from torch.nn import BCEWithLogitsLoss
import os

# Set CUDA_LAUNCH_BLOCKING=1 for debugging purposes
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# Training hyperparameters
learning_rate = 5e-5
epochs = 10

optimizer = AdamW(model.parameters(), lr=learning_rate)

# Put model on GPU (set runtime to GPU in Google Colab)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Put model in training mode
model.train()

# Use BCEWithLogitsLoss for binary classification
loss_fn = BCEWithLogitsLoss()

for epoch in range(epochs):
    print("Epoch:", epoch)
    for idx, batch in enumerate(tqdm(train_dataloader)):
        pixel_values = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        # Check if labels contain only 0 or 1
        assert torch.all((labels == 0) | (labels == 1)), "Labels contain values other than 0 or 1"

        # Forward pass
        outputs = model(pixel_values)
        logits = outputs.logits  # Output logits for binary classification

        # Debugging: Check if logits contain NaNs or Infs
        assert not torch.any(torch.isnan(logits)), "Logits contain NaN values"
        assert not torch.any(torch.isinf(logits)), "Logits contain Inf values"

        # Compute the binary cross-entropy loss
        loss = loss_fn(logits.view(-1), labels.view(-1).float())  # Flatten logits and convert labels to float

        # Backpropagate the loss
        loss.backward()
        optimizer.step()

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Evaluate
        with torch.no_grad():
            # Apply sigmoid to logits to get probabilities, and convert to binary predictions
            predictions = (torch.sigmoid(logits.view(-1)) > 0.5).long()  # 0 or 1 predictions

            # Note that the metric expects predictions + labels as numpy arrays
            metric.add_batch(predictions=predictions.detach().cpu().numpy(), references=labels.detach().cpu().numpy())

        # Print loss and metrics every 100 batches
        if idx % 100 == 0:
            metrics = metric.compute(num_labels=2,  # Only 2 classes (0 and 1 for binary classification)
                                     ignore_index=0,
                                     reduce_labels=False)

            print("Loss:", loss.item())
            print("Mean IoU:", metrics["mean_iou"])
            print("Mean accuracy:", metrics["mean_accuracy"])


In [None]:
from torch.optim import AdamW
from tqdm.auto import tqdm
import torch
from torch.nn import BCEWithLogitsLoss

# Training hyperparameters
learning_rate = 5e-5
epochs = 10

optimizer = AdamW(model.parameters(), lr=learning_rate)

# Put model on GPU (set runtime to GPU in Google Colab)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Put model in training mode
model.train()

# Use BCEWithLogitsLoss for binary classification
loss_fn = BCEWithLogitsLoss()

for epoch in range(epochs):
    print("Epoch:", epoch)
    for idx, batch in enumerate(tqdm(train_dataloader)):
        pixel_values = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        # Forward pass
        outputs = model(pixel_values)
        logits = outputs.logits  # Output logits for binary classification

        # Compute the binary cross-entropy loss
        loss = loss_fn(logits.squeeze(), labels.float())  # Flatten logits and convert labels to float

        # Backpropagate the loss
        loss.backward()
        optimizer.step()

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Evaluate
        with torch.no_grad():
            # Apply sigmoid to logits to get probabilities, and convert to binary predictions
            predictions = (torch.sigmoid(logits.squeeze()) > 0.5).long()  # 0 or 1 predictions

            # Note that the metric expects predictions + labels as numpy arrays
            metric.add_batch(predictions=predictions.detach().cpu().numpy(), references=labels.detach().cpu().numpy())

        # Print loss and metrics every 100 batches
        if idx % 100 == 0:
            metrics = metric.compute(num_labels=2,  # Only 2 classes (0 and 1 for binary classification)
                                     ignore_index=0,
                                     reduce_labels=False)

            print("Loss:", loss.item())
            print("Mean IoU:", metrics["mean_iou"])
            print("Mean accuracy:", metrics["mean_accuracy"])


In [None]:
from torch.nn import BCEWithLogitsLoss

# Define the loss function for binary classification
loss_fn = BCEWithLogitsLoss()

for epoch in range(epochs):
    print("Epoch:", epoch)
    for idx, batch in enumerate(tqdm(train_dataloader)):
        pixel_values = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        # Debugging: Print unique values of the labels
        print("Unique labels in batch:", labels.unique())

        # Forward pass
        outputs = model(pixel_values)
        logits = outputs.logits  # The raw output logits from the model

        # Compute binary cross-entropy loss
        loss = loss_fn(logits.view(-1), labels.view(-1).float())  # Flatten for binary classification

        # Backpropagate the loss
        loss.backward()
        optimizer.step()

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Evaluate the predictions
        with torch.no_grad():
            # Apply sigmoid to convert logits to probabilities (binary)
            predictions = (torch.sigmoid(logits) > 0.5).long()  # Binary prediction (0 or 1)

            # Compute metrics
            metric.add_batch(predictions=predictions.detach().cpu().numpy(), references=labels.detach().cpu().numpy())

        # Print loss and metrics every 100 batches
        if idx % 100 == 0:
            metrics = metric.compute(num_labels=2,  # Only 2 classes (0 and 1 for binary classification)
                                     ignore_index=0,
                                     reduce_labels=False)

            print("Loss:", loss.item())
            print("Mean IoU:", metrics["mean_iou"])
            print("Mean accuracy:", metrics["mean_accuracy"])


In [None]:
from torch.optim import AdamW
from tqdm.auto import tqdm
import torch

# Training hyperparameters
learning_rate = 5e-5
epochs = 10

optimizer = AdamW(model.parameters(), lr=learning_rate)

# Put model on GPU (set runtime to GPU in Google Colab)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Put model in training mode
model.train()

for epoch in range(epochs):
    print("Epoch:", epoch)
    for idx, batch in enumerate(tqdm(train_dataloader)):
        pixel_values = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        # Debugging: Print unique values of the labels
        print("Unique labels in batch:", labels.unique())

        # Ensure labels are in the correct range [0, 1] for binary segmentation
        assert labels.max() <= 1, "Labels contain values outside of the expected range for binary segmentation."
        batch["labels"] = torch.clamp(batch["labels"], min=0, max=1)  # Fix labels if necessary

        # Forward pass
        outputs = model(pixel_values, labels=labels)
        loss = outputs.loss

        # Backpropagate the loss
        loss.backward()
        optimizer.step()

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Evaluate the predictions
        with torch.no_grad():
            logits = outputs.logits
            # Apply sigmoid activation to convert logits to probabilities (binary)
            predictions = (torch.sigmoid(logits) > 0.5).long()  # Binary prediction (0 or 1)

            # Compute metrics
            metric.add_batch(predictions=predictions.detach().cpu().numpy(), references=labels.detach().cpu().numpy())

        # Print loss and metrics every 100 batches
        if idx % 100 == 0:
            metrics = metric.compute(num_labels=2,  # Only 2 classes (0 and 1 for binary classification)
                                     ignore_index=0,
                                     reduce_labels=False)

            print("Loss:", loss.item())
            print("Mean IoU:", metrics["mean_iou"])
            print("Mean accuracy:", metrics["mean_accuracy"])


In [None]:
from torch.optim import AdamW
from tqdm.auto import tqdm
import torch
from torch.nn import BCEWithLogitsLoss

# Training hyperparameters
learning_rate = 5e-5
epochs = 10

optimizer = AdamW(model.parameters(), lr=learning_rate)

# Put model on GPU (set runtime to GPU in Google Colab)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Put model in training mode
model.train()

# Use BCEWithLogitsLoss for binary classification
loss_fn = BCEWithLogitsLoss()



for epoch in range(epochs):
    print("Epoch:", epoch)
    for idx, batch in enumerate(tqdm(train_dataloader)):
        pixel_values = batch["pixel_values"].to(device, dtype=torch.float32)  # Typecast to float32
        labels = batch["labels"].to(device, dtype=torch.float32)  # Typecast to float32


        # Forward pass
        outputs = model(pixel_values)
        logits = outputs.logits  # Output logits for binary classification

        # Compute the binary cross-entropy loss
        loss = loss_fn(logits.view(-1), labels.view(-1).float())  # Flatten logits and convert labels to float

        # Backpropagate the loss
        loss.backward()
        optimizer.step()

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Evaluate
        with torch.no_grad():
            # Apply sigmoid to logits to get probabilities, and convert to binary predictions
            predictions = (torch.sigmoid(logits.view(-1)) > 0.5).long()  # 0 or 1 predictions

            # Note that the metric expects predictions + labels as numpy arrays
            metric.add_batch(predictions=predictions.detach().cpu().numpy(), references=labels.detach().cpu().numpy())

        # Print loss and metrics every 100 batches
        if idx % 100 == 0:
            metrics = metric.compute(num_labels=2,  # Only 2 classes (0 and 1 for binary classification)
                                     ignore_index=0,
                                     reduce_labels=False)

            print("Loss:", loss.item())
            print("Mean IoU:", metrics["mean_iou"])
            print("Mean accuracy:", metrics["mean_accuracy"])
