<a href="https://colab.research.google.com/github/Viratcode/Udayachal_Project_code/blob/Model_Training_Path/segformer_main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade transformers

In [None]:
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import torch
from torch import nn
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm
import os
from PIL import Image
from transformers import SegformerForSemanticSegmentation, SegformerFeatureExtractor
import pandas as pd
import cv2
import numpy as np
import albumentations as aug

In [None]:
from albumentations import Compose, HorizontalFlip, VerticalFlip

transform = Compose([
    HorizontalFlip(p=0.5),  # For horizontal flips
    VerticalFlip(p=0.5)   # For vertical flips (uncomment if needed)
])

In [None]:
WIDTH = 256
HEIGHT = 256

In [None]:
# from torch.utils.data import Dataset
# import os
# import cv2
# import albumentations as aug

# class ImageSegmentationDataset(Dataset):
#     """Image segmentation dataset."""

#     def __init__(self, root_dir, feature_extractor, transforms=None, split="train"):
#         """
#         Args:
#             root_dir (string): Root directory of the dataset.
#             feature_extractor (SegFormerFeatureExtractor): Feature extractor.
#             transforms (albumentations.Compose): Data augmentations.
#             split (string): "train", "val", or "test" to indicate the split.
#         """
#         self.root_dir = root_dir
#         self.feature_extractor = feature_extractor
#         self.transforms = transforms
#         self.split = split

#         # Assuming images in 'images/train' and masks in 'mask/train'
#         self.img_dir = os.path.join(self.root_dir, "source")
#         self.ann_dir = os.path.join(self.root_dir, "masks")

#         # Read image and annotation file names
#         self.images = sorted(os.listdir(self.img_dir))
#         self.annotations = sorted(os.listdir(self.ann_dir))

#         assert len(self.images) == len(self.annotations), "Unequal number of images and masks"

#     def __len__(self):
#         return len(self.images)

#     def __getitem__(self, idx):

#         image_path = os.path.join(self.img_dir, self.images[idx])
#         mask_path = os.path.join(self.ann_dir, self.annotations[idx])

#         image = cv2.imread(image_path)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

#         segmentation_map = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
#         image = cv2.resize(image, (WIDTH, HEIGHT), interpolation=cv2.INTER_LINEAR)
#         segmentation_map = cv2.resize(segmentation_map, (WIDTH, HEIGHT), interpolation=cv2.INTER_NEAREST)

#         # Apply transforms based on split
#         if self.split == "train" and self.transforms is not None:
#             augmented = self.transforms(image=image, mask=segmentation_map)
#             image, segmentation_map = augmented['image'], augmented['mask']

#         encoded_inputs = self.feature_extractor(image, segmentation_map, return_tensors="pt")

#         # Remove batch dimension
#         for k, v in encoded_inputs.items():
#             encoded_inputs[k].squeeze_()

#         return encoded_inputs

In [None]:
from torch.utils.data import Dataset
import os
import cv2
import albumentations as aug

class ImageSegmentationDataset(Dataset):
    """Image segmentation dataset."""

    def __init__(self, root_dir, feature_extractor, transforms=None, split="train", image_size=256):
        """
        Args:
            root_dir (string): Root directory of the dataset.
            feature_extractor (SegFormerFeatureExtractor): Feature extractor.
            transforms (albumentations.Compose): Data augmentations.
            split (string): "train", "val", or "test" to indicate the split.
            image_size (int): Desired size for resizing images (default: 256).
        """
        self.root_dir = root_dir
        self.feature_extractor = feature_extractor
        self.transforms = transforms
        self.split = split
        self.image_size = image_size  # Store image size

        # Assuming images in 'images/train' and masks in 'mask/train'
        self.img_dir = os.path.join(self.root_dir, "source")
        self.ann_dir = os.path.join(self.root_dir, "masks")

        # Read image and annotation file names
        self.images = sorted(os.listdir(self.img_dir))
        self.annotations = sorted(os.listdir(self.ann_dir))

        assert len(self.images) == len(self.annotations), "Unequal number of images and masks"

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):

        image_path = os.path.join(self.img_dir, self.images[idx])
        mask_path = os.path.join(self.ann_dir, self.annotations[idx])

        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        segmentation_map = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        # Resize images to the specified image_size
        image = cv2.resize(image, (self.image_size, self.image_size), interpolation=cv2.INTER_LINEAR)
        segmentation_map = cv2.resize(segmentation_map, (self.image_size, self.image_size), interpolation=cv2.INTER_NEAREST)

        # Apply transforms based on split
        if self.split == "train" and self.transforms is not None:
            augmented = self.transforms(image=image, mask=segmentation_map)
            image, segmentation_map = augmented['image'], augmented['mask']

        encoded_inputs = self.feature_extractor(image, segmentation_map, return_tensors="pt")

        # Remove batch dimension
        for k, v in encoded_inputs.items():
            encoded_inputs[k].squeeze_()

        return encoded_inputs

In [None]:
from torch.utils.data import random_split

In [None]:
import torch
from torch.utils.data import random_split, DataLoader
from albumentations import Compose, HorizontalFlip, VerticalFlip
from transformers import SegformerFeatureExtractor

# Set seed for reproducibility
seed = 42
torch.manual_seed(seed)

# Data augmentation transforms
transform = Compose([
    HorizontalFlip(p=0.5),
    VerticalFlip(p=0.5)
])

# Define root directory and feature extractor
root_dir = 'your dataset'
feature_extractor = SegformerFeatureExtractor(size=256, align=False, reduce_zero_label=False)

# Create the full dataset
dataset = ImageSegmentationDataset(root_dir=root_dir, feature_extractor=feature_extractor, transforms=transform)

# Split the dataset with a fixed seed
dataset_size = len(dataset)
train_size = int(0.7 * dataset_size)
val_size = int(0.15 * dataset_size)
test_size = dataset_size - train_size - val_size

generator = torch.Generator().manual_seed(seed)
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size], generator=generator)

# Create data loaders
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
valid_dataloader = DataLoader(val_dataset, batch_size=4)
test_dataloader = DataLoader(test_dataset, batch_size=4)


In [None]:
print("Number of training examples:", len(train_dataset))
print("Number of validation examples:", len(val_dataset))

In [None]:
encoded_inputs = train_dataset[0]

In [None]:
encoded_inputs["pixel_values"].shape

In [None]:
encoded_inputs["labels"].shape

In [None]:
encoded_inputs["labels"]

In [None]:
encoded_inputs["labels"].squeeze().unique()

In [None]:
mask = encoded_inputs["labels"].numpy()

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.imshow(mask)

In [None]:
batch = next(iter(train_dataloader))

In [None]:
for k,v in batch.items():
    print(k, v.shape)

In [None]:
batch["labels"].shape

In [None]:
# Install or upgrade PyTorch to a version >= 2.6
!pip install --upgrade torch torchvision torchaudio

In [None]:
# Assuming your dataset has 2 classes (0 for background, 1 for building)
id2label = {0: "background", 1: "building"}
label2id = {v: k for k, v in id2label.items()}

# Update model initialization with a larger SegFormer model (e.g., mit-b5)
model = SegformerForSemanticSegmentation.from_pretrained(
    "nvidia/mit-b5",  # Changed from mit-b0 to mit-b5 (or another larger version)
    ignore_mismatched_sizes=True,
    num_labels=len(id2label),
    id2label=id2label,
    label2id=label2id,
    reshape_last_stage=True,
    image_size=256
)

In [None]:
!pip install transformers
from torch.optim import AdamW # Import AdamW from transformers


In [None]:
optimizer = AdamW(model.parameters(), lr=0.00006)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print("Model Initialized!")

In [None]:
!pip install torch>=2.0

#Training Model- no Need To execute Everytime

In [None]:
criterion = nn.CrossEntropyLoss()  # Example using CrossEntropyLoss
accumulation_steps = 8
scaler = amp.GradScaler()

for epoch in range(1, 11):  # loop over the dataset multiple times
    print("Epoch:", epoch)
    pbar = tqdm(train_dataloader)
    accuracies = []
    losses = []
    val_accuracies = []
    val_losses = []
    model.train()
    for idx, batch in enumerate(pbar):
        # get the inputs;
        pixel_values = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        print("Before modification:")
        print("Minimum label value:", labels.min().item())
        print("Maximum label value:", labels.max().item())
        # Ensure labels only contain 0 and 1 (binary segmentation)
        labels = labels.long()  # Cast labels to Long type
        labels[labels > 1] = 0  # Force any values > 1 to be 0 (background)

        # zero the parameter gradients
        optimizer.zero_grad()

        # ---start of changes---
        # forward pass
        outputs = model(pixel_values=pixel_values, labels=labels)

        # interpolate the logits to the same size as the labels
        upsampled_logits = nn.functional.interpolate(
            outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
        )
        # ---end of changes---

        with torch.cuda.amp.autocast():
            # calculate loss with upsampled logits
            loss = criterion(upsampled_logits, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # evaluate (use upsampled logits here as well)
        predicted = upsampled_logits.argmax(dim=1)

        # Calculate loss using the weighted criterion
        loss = criterion(upsampled_logits, labels)  # Use upsampled logits here as well
        loss = loss / accumulation_steps

        mask = (labels != 255) # we don't include the background class in the accuracy calculation
        pred_labels = predicted[mask].detach().cpu().numpy()
        true_labels = labels[mask].detach().cpu().numpy()
        accuracy = accuracy_score(pred_labels, true_labels)
        accuracies.append(accuracy)
        losses.append(loss.item())
        pbar.set_postfix({'Batch': idx, 'Pixel-wise accuracy': sum(accuracies)/len(accuracies), 'Loss': sum(losses)/len(losses)})

        # backward + optimize
        if (idx + 1) % accumulation_steps == 0:
            optimizer.step()                            # Now we can do an optimizer step
            optimizer.zero_grad()

    else:
      model.eval()
      with torch.no_grad():
        for idx, batch in enumerate(valid_dataloader):
          pixel_values = batch["pixel_values"].to(device)
          labels = batch["labels"].to(device)

          # Ensure labels only contain 0 and 1 for validation as well
          labels = labels.long()
          labels[labels > 1] = 0

          outputs = model(pixel_values=pixel_values, labels=labels)
          upsampled_logits = nn.functional.interpolate(outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False)
          predicted = upsampled_logits.argmax(dim=1)
          mask = (labels != 255) # we don't include the background class in the accuracy calculation
          pred_labels = predicted[mask].detach().cpu().numpy()
          true_labels = labels[mask].detach().cpu().numpy()
          accuracy = accuracy_score(pred_labels, true_labels)
          val_loss = outputs.loss
          val_accuracies.append(accuracy)
          val_losses.append(val_loss.item())
    print(f"Train Pixel-wise accuracy: {sum(accuracies)/len(accuracies)}\
         Train Loss: {sum(losses)/len(losses)}\
         Val Pixel-wise accuracy: {sum(val_accuracies)/len(val_accuracies)}\
         Val Loss: {sum(val_losses)/len(val_losses)}")

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print("Model Initialized!")

#Training part No need to execute every time

In [None]:
import torch
from torch.cuda.amp import GradScaler, autocast
import torch.optim as optim

# Use CPU as the device
device = torch.device('cpu')

# Initialize GradScaler, but without using AMP since we're on CPU
scaler = GradScaler(enabled=False)  # No AMP on CPU

# Define the optimizer (SGD in your case)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 21):  # loop over the dataset multiple times
    print("Epoch:", epoch)
    pbar = tqdm(train_dataloader)
    accuracies = []
    losses = []
    val_accuracies = []
    val_losses = []
    val_ious = []  # To store IoU values for validation
    model.train()

    for idx, batch in enumerate(pbar):
        # get the inputs
        pixel_values = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        # Ensure labels only contain 0 and 1 (binary segmentation)
        labels = labels.long()  # Cast labels to Long type
        labels[labels > 1] = 0  # Force any values > 1 to be 0 (background)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward pass
        outputs = model(pixel_values=pixel_values, labels=labels)

        # interpolate the logits to the same size as the labels
        upsampled_logits = nn.functional.interpolate(
            outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
        )

        # No AMP: Direct loss calculation
        loss = criterion(upsampled_logits, labels)

        loss.backward()
        optimizer.step()

        # evaluate (use upsampled logits here as well)
        predicted = upsampled_logits.argmax(dim=1)

        # Calculate loss using the weighted criterion
        loss = criterion(upsampled_logits, labels)  # Use upsampled logits here as well
        loss = loss / accumulation_steps

        mask = (labels != 255)  # we don't include the background class in the accuracy calculation
        pred_labels = predicted[mask].detach().cpu().numpy()
        true_labels = labels[mask].detach().cpu().numpy()
        accuracy = accuracy_score(pred_labels, true_labels)
        accuracies.append(accuracy)
        losses.append(loss.item())

        # Calculate IoU for this batch
        iou_value = compute_iou(predicted, labels)
        val_ious.append(iou_value.item())  # Store IoU for this batch

        pbar.set_postfix({'Batch': idx, 'Pixel-wise accuracy': sum(accuracies)/len(accuracies), 'Loss': sum(losses)/len(losses)})

    # Validation phase
    else:
        model.eval()
        with torch.no_grad():
            for idx, batch in enumerate(valid_dataloader):
                pixel_values = batch["pixel_values"].to(device)
                labels = batch["labels"].to(device)

                # Ensure labels only contain 0 and 1 for validation as well
                labels = labels.long()
                labels[labels > 1] = 0

                outputs = model(pixel_values=pixel_values, labels=labels)
                upsampled_logits = nn.functional.interpolate(outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False)
                predicted = upsampled_logits.argmax(dim=1)

                mask = (labels != 255)  # we don't include the background class in the accuracy calculation
                pred_labels = predicted[mask].detach().cpu().numpy()
                true_labels = labels[mask].detach().cpu().numpy()
                accuracy = accuracy_score(pred_labels, true_labels)
                val_loss = outputs.loss
                val_accuracies.append(accuracy)
                val_losses.append(val_loss.item())

                # Calculate IoU for the validation batch
                iou_value = compute_iou(predicted, labels)
                val_ious.append(iou_value.item())

    print(f"Train Pixel-wise accuracy: {sum(accuracies)/len(accuracies)}\
         Train Loss: {sum(losses)/len(losses)}\
         Train IoU: {sum(val_ious)/len(val_ious)}\
         Val Pixel-wise accuracy: {sum(val_accuracies)/len(val_accuracies)}\
         Val Loss: {sum(val_losses)/len(val_losses)}\
         Val IoU: {sum(val_ious)/len(val_ious)}")


In [None]:
import torch

def compute_iou(pred, target, num_classes=2):
    """
    Compute the Intersection over Union (IoU) for a binary or multi-class segmentation task.

    Args:
        pred (Tensor): The predicted tensor of shape [batch_size, height, width]
        target (Tensor): The ground truth tensor of shape [batch_size, height, width]
        num_classes (int): Number of classes in the segmentation task (default is 2 for binary)

    Returns:
        iou (Tensor): The IoU for each class, of shape [num_classes]
    """
    iou = torch.zeros(num_classes).to(pred.device)

    for cls in range(num_classes):
        # Create binary masks for each class (foreground = 1, background = 0)
        pred_class = (pred == cls).float()
        target_class = (target == cls).float()

        # Compute intersection and union for this class
        intersection = (pred_class * target_class).sum()
        union = pred_class.sum() + target_class.sum() - intersection

        # Avoid division by zero by ensuring the union is not zero
        iou[cls] = intersection / (union + 1e-6)  # Adding epsilon to prevent divide-by-zero errors

    return iou.mean()  # Return the mean IoU over all classes



In [None]:
model_path = "where model is to be saved"
model.load_state_dict(torch.load(model_path))
print(f"Model loaded from {model_path}")


In [None]:
#show results of this saved model, take any one random imgae  , show accuracy and loss of predicted output too

import torch
import random
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

# Assuming you have the necessary variables and functions defined from the previous code
# ... (including model, feature_extractor, device, etc.)

# Choose a random image from the test dataset
random_image_index = random.randint(0, len(test_dataset) - 1)
encoded_inputs = test_dataset[random_image_index]

# Move inputs to the device
pixel_values = encoded_inputs["pixel_values"].unsqueeze(0).to(device)  # Add batch dimension
labels = encoded_inputs["labels"].unsqueeze(0).to(device)


# Ensure labels only contain 0 and 1 for validation as well
labels = labels.long()
labels[labels > 1] = 0

# Perform inference
with torch.no_grad():
    model.eval()
    outputs = model(pixel_values=pixel_values, labels=labels)
    upsampled_logits = nn.functional.interpolate(
        outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
    )
    predicted = upsampled_logits.argmax(dim=1)

# Calculate accuracy and loss
mask = (labels != 255)
pred_labels = predicted[mask].detach().cpu().numpy()
true_labels = labels[mask].detach().cpu().numpy()
accuracy = accuracy_score(pred_labels, true_labels)
loss = outputs.loss.item()

print(f"Accuracy: {accuracy}")
print(f"Loss: {loss}")

# Display the original image, ground truth mask, and predicted mask
original_image = Image.fromarray(np.transpose(pixel_values.squeeze(0).cpu().numpy(), (1, 2, 0)).astype(np.uint8))
ground_truth_mask = Image.fromarray(labels.squeeze(0).cpu().numpy().astype(np.uint8))
predicted_mask = Image.fromarray(predicted.squeeze(0).cpu().numpy().astype(np.uint8))

plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.title("Original Image")
plt.imshow(original_image)

plt.subplot(1, 3, 2)
plt.title("Ground Truth Mask")
plt.imshow(ground_truth_mask, cmap="gray")

plt.subplot(1, 3, 3)
plt.title("Predicted Mask")
plt.imshow(predicted_mask, cmap="gray")

plt.show()


# Try to Coorect

In [None]:
# Choose a random image from the test dataset
random_image_index = random.randint(0, len(test_dataset) - 1)
encoded_inputs = test_dataset[random_image_index]

plt.subplot(1, 3, 3)
plt.title("Predicted Mask")

plt.imshow(encoded_inputs['pixel_values'].squeeze(0).cpu().numpy().transpose(1, 2, 0).astype('uint8'))

In [None]:
#test the stored model on test data

import pandas as pd
from sklearn.metrics import jaccard_score

# ... (your existing code) ...

# Create an empty list to store results
results = []

# Test loop
model.eval()
with torch.no_grad():
    for idx, batch in enumerate(test_dataloader):
        pixel_values = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        # Ensure labels only contain 0 and 1 for testing as well
        labels = labels.long()
        labels[labels > 1] = 0

        outputs = model(pixel_values=pixel_values, labels=labels)
        upsampled_logits = nn.functional.interpolate(
            outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
        )
        predicted = upsampled_logits.argmax(dim=1)

        # Calculate accuracy
        mask = (labels != 255)
        pred_labels = predicted[mask].detach().cpu().numpy()
        true_labels = labels[mask].detach().cpu().numpy()
        accuracy = accuracy_score(pred_labels, true_labels)

        # Calculate IoU
        iou = jaccard_score(true_labels, pred_labels, average='macro') # or 'weighted'

        loss = outputs.loss.item()

        results.append({
            'Image Index': idx,
            'Accuracy': accuracy,
            'IoU': iou,
            'Loss': loss
        })

# Create a pandas DataFrame from the results
results_df = pd.DataFrame(results)

# Save results to an Excel file
results_df.to_excel('test_results.xlsx', index=False)
print("Test results saved to test_results.xlsx")


In [None]:
# Create an empty list to store results
results = []

# Test loop
model.eval()
with torch.no_grad():
    for idx, batch in enumerate(test_dataloader):
        pixel_values = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        # Ensure labels only contain 0 and 1 for testing as well
        labels = labels.long()
        labels[labels > 1] = 0

        outputs = model(pixel_values=pixel_values, labels=labels)
        upsampled_logits = nn.functional.interpolate(
            outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
        )
        predicted = upsampled_logits.argmax(dim=1)

        # Calculate accuracy
        mask = (labels != 255)
        pred_labels = predicted[mask].detach().cpu().numpy()
        true_labels = labels[mask].detach().cpu().numpy()
        accuracy = accuracy_score(pred_labels, true_labels)

        # Calculate IoU
        iou = jaccard_score(true_labels, pred_labels, average='macro') # or 'weighted'

        loss = outputs.loss.item()

        results.append({
            'Image Index': idx,
            'Accuracy': accuracy,
            'IoU': iou,
            'Loss': loss
        })

# Create a pandas DataFrame from the results
results_df = pd.DataFrame(results)

# Display the results DataFrame
results_df


In [None]:
from matplotlib import pyplot as plt
results_df['Loss'].plot(kind='hist', bins=20, title='Loss')
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
from matplotlib import pyplot as plt
results_df['IoU'].plot(kind='hist', bins=20, title='IoU')
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from matplotlib import pyplot as plt
results_df['Accuracy'].plot(kind='hist', bins=20, title='Accuracy')
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
from matplotlib import pyplot as plt
results_df['Image Index'].plot(kind='hist', bins=20, title='Image Index')
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
from matplotlib import pyplot as plt
results_df.plot(kind='scatter', x='IoU', y='Loss', s=32, alpha=.8)
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
from matplotlib import pyplot as plt
results_df.plot(kind='scatter', x='Accuracy', y='IoU', s=32, alpha=.8)
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
from matplotlib import pyplot as plt
results_df.plot(kind='scatter', x='Image Index', y='Accuracy', s=32, alpha=.8)
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
from matplotlib import pyplot as plt
results_df['Loss'].plot(kind='line', figsize=(8, 4), title='Loss')
plt.gca().spines[['top', 'right']].set_visible(False)

In [None]:
from matplotlib import pyplot as plt
results_df['IoU'].plot(kind='line', figsize=(8, 4), title='IoU')
plt.gca().spines[['top', 'right']].set_visible(False)

In [None]:
from matplotlib import pyplot as plt
results_df['Accuracy'].plot(kind='line', figsize=(8, 4), title='Accuracy')
plt.gca().spines[['top', 'right']].set_visible(False)

In [None]:
from matplotlib import pyplot as plt
results_df['Image Index'].plot(kind='line', figsize=(8, 4), title='Image Index')
plt.gca().spines[['top', 'right']].set_visible(False)

In [None]:
import matplotlib.pyplot as plt
import random

# ... (your existing code) ...

# Choose a random image from the test dataset
random_image_index = random.randint(0, len(test_dataset) - 1)
encoded_inputs = test_dataset[random_image_index]

# Move inputs to the device
pixel_values = encoded_inputs["pixel_values"].unsqueeze(0).to(device)  # Add batch dimension
labels = encoded_inputs["labels"].unsqueeze(0).to(device)

# Ensure labels only contain 0 and 1 for testing as well
labels = labels.long()
labels[labels > 1] = 0

# Perform inference
with torch.no_grad():
    model.eval()
    outputs = model(pixel_values=pixel_values, labels=labels)
    upsampled_logits = nn.functional.interpolate(
        outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
    )
    predicted = upsampled_logits.argmax(dim=1)

# ... (rest of your existing code for calculations and saving results) ...


# Display the original image, ground truth mask, and predicted mask
original_image = Image.fromarray(np.transpose(pixel_values.squeeze(0).cpu().numpy(), (1, 2, 0)).astype(np.uint8))
ground_truth_mask = Image.fromarray(labels.squeeze(0).cpu().numpy().astype(np.uint8))
predicted_mask = Image.fromarray(predicted.squeeze(0).cpu().numpy().astype(np.uint8))

plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.title("Original Image")
plt.imshow(original_image)

plt.subplot(1, 3, 2)
plt.title("Ground Truth Mask")
plt.imshow(ground_truth_mask, cmap="gray")

plt.subplot(1, 3, 3)
plt.title("Predicted Mask")
plt.imshow(predicted_mask, cmap="gray")

plt.show()


In [None]:
#result visualization
import matplotlib.pyplot as plt
import random

# ... (your existing code) ...

# Choose a random image from the test dataset
random_image_index = random.randint(0, len(test_dataset) - 1)
encoded_inputs = test_dataset[random_image_index]

# Move inputs to the device
pixel_values = encoded_inputs["pixel_values"].unsqueeze(0).to(device)  # Add batch dimension
labels = encoded_inputs["labels"].unsqueeze(0).to(device)

# Ensure labels only contain 0 and 1 for testing as well
# ***This is where the distortion might happen if your original masks have more than 2 classes***
# labels = labels.long()
# labels[labels > 1] = 0

# Perform inference
with torch.no_grad():
    model.eval()
    outputs = model(pixel_values=pixel_values, labels=labels)
    upsampled_logits = nn.functional.interpolate(
        outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
    )
    predicted = upsampled_logits.argmax(dim=1)

# ... (rest of your existing code for calculations and saving results) ...


# Display the original image, ground truth mask, and predicted mask
original_image = Image.fromarray(np.transpose(pixel_values.squeeze(0).cpu().numpy(), (1, 2, 0)).astype(np.uint8))
# ***Modified to keep original mask values***
ground_truth_mask = Image.fromarray(encoded_inputs["labels"].cpu().numpy().astype(np.uint8))
predicted_mask = Image.fromarray(predicted.squeeze(0).cpu().numpy().astype(np.uint8))

plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.title("Original Image")
plt.imshow(original_image)

plt.subplot(1, 3, 2)
plt.title("Ground Truth Mask")
plt.imshow(ground_truth_mask, cmap="gray") # You can adjust the colormap if needed

plt.subplot(1, 3, 3)
plt.title("Predicted Mask")
plt.imshow(predicted_mask, cmap="gray")

plt.show()