In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms.functional as TF
import os
import numpy as np

In [9]:
# Define your custom dataset for semantic segmentation
class CustomDataset(Dataset):
    def __init__(self, image_folder, mask_folder, transform=None):
        self.image_folder = image_folder
        self.mask_folder = mask_folder
        self.transform = transform

        self.images = [img for img in os.listdir(image_folder) if img.endswith(".jpg")]
        self.masks = [mask for mask in os.listdir(mask_folder) if mask.endswith(".layers.txt")]
        # Load the corresponding mask from the text file
        mask_path = "path/to/test/mask.txt"  # Replace with the path to your test mask

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_folder, self.images[idx])
        mask_path = os.path.join(self.mask_folder, self.masks[idx])

        image = Image.open(img_path).convert("RGB")
         # mask = Image.open(mask_path).convert("L")
        
        with open(mask_path, 'r') as file:
            lines = file.readlines()
            mask_data = [list(map(int, line.strip().split())) for line in lines]

        # Convert the mask data to a numpy array
        mask_np = np.array(mask_data)
        # Convert the numpy array to a PyTorch tensor
        mask_tensor = torch.from_numpy(mask_np).unsqueeze(0)  # Add batch dimension

        if self.transform:
            image, mask = self.transform(image, mask_tensor)

        return image, mask_tensor


In [3]:
# Define your custom transformation (you might need to adjust it based on your requirements)
class CustomTransform:
    def __init__(self):
        self.transform = transforms.Compose([
            transforms.ToTensor(),
        ])

    def __call__(self, image, mask):
        image = self.transform(image)
        mask = self.transform(mask)

        return image, mask


In [4]:
# Define the model using ResNet18 as the backbone
class SegmentationModel(nn.Module):
    def __init__(self, num_classes):
        super(SegmentationModel, self).__init__()
        resnet18 = models.resnet18(pretrained=True)
        self.encoder = nn.Sequential(*list(resnet18.children())[:-2])
        self.decoder = nn.Conv2d(512, num_classes, kernel_size=1)

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x



In [11]:
# Set your data and model paths
image_folder = "./iccv09data/images"
mask_folder = "./iccv09data/labels"
num_classes = 21  # Adjust based on your dataset

# Create dataset and DataLoader
transform = CustomTransform()
dataset = CustomDataset(image_folder, mask_folder, transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=False) #originally true shuffle

# Instantiate the model
model = SegmentationModel(num_classes)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    for images, masks in dataloader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}")

# Save the trained model
torch.save(model.state_dict(), "semantic_segmentation_model.pth")

TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>

In [None]:
model.load_state_dict(torch.load("semantic_segmentation_model.pth"))
model.eval()  # Set the model to evaluation mode

# # Define a transformation for the input image
# transform = transforms.Compose([
#     transforms.Resize((256, 256)),
#     transforms.ToTensor(),
# ])

# Load a new image for testing
test_image_path = "iccv09Data/images/0000047.jpg"  # Replace with the path to your test image
test_image = Image.open(test_image_path).convert("RGB")
# test_image = transform(test_image).unsqueeze(0)  # Add batch dimension

# Make predictions
with torch.no_grad():
    output = model(test_image)

# Convert the output to probabilities and get the predicted class for each pixel
probabilities = torch.softmax(output, dim=1)
predicted_class = torch.argmax(probabilities, dim=1).squeeze().numpy()

# You can now visualize or further process the predicted segmentation mask
# For example, you can use the predicted_class as an RGB image for visualization
predicted_rgb = np.zeros_like(test_image[0].numpy())
for i in range(num_classes):
    predicted_rgb[predicted_class == i] = np.array([i, i, i])

# Visualize the original image and the predicted segmentation mask
import matplotlib.pyplot as plt

plt.subplot(1, 2, 1)
plt.imshow(np.transpose(test_image[0].numpy(), (1, 2, 0)))
plt.title('Original Image')

plt.subplot(1, 2, 2)
plt.imshow(predicted_rgb)
plt.title('Predicted Segmentation Mask')

plt.show()