In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.io import read_image
from PIL import Image
import os

# Define custom dataset class
class CustomDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform

        self.images = os.listdir(image_dir)
        self.masks = os.listdir(mask_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.mask_dir, self.masks[idx])

        image = Image.open(img_path)
        mask = Image.open(mask_path)

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return image, mask

# Define SAM model
class SAM(nn.Module):
    def __init__(self, in_channels):
        super(SAM, self).__init__()
        
        # Spatial attention block
        self.conv1 = nn.Conv2d(in_channels, 1, kernel_size=1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        # Calculate spatial attention map
        attention_map = self.sigmoid(self.conv1(x))
        
        # Apply attention to the input feature map
        x = x * attention_map
        
        return x, attention_map

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transformations
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

# Define dataset and dataloader
image_dir = "/kaggle/input/dataset0000/data befor split/images"
mask_dir = "/kaggle/input/dataset0000/data befor split/masks"

dataset = CustomDataset(image_dir, mask_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# Initialize SAM model
input_channels = 3  # Assuming RGB images
sam_model = SAM(input_channels).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(sam_model.parameters(), lr=0.001)

# Training loop
num_epochs = 3
for epoch in range(num_epochs):
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    
    for images, masks in dataloader:
        images, masks = images.to(device), masks.to(device)

        # Forward pass
        outputs, _ = sam_model(images)
        loss = criterion(outputs, masks.argmax(dim=1))  # Compute loss
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
        # Compute accuracy
        _, predicted = torch.max(outputs, 1)
        total_predictions += masks.size(0) * 256 * 256
        correct_predictions += (predicted == masks.argmax(dim=1)).sum().item()

    epoch_accuracy = correct_predictions / total_predictions
    epoch_loss = running_loss / len(dataloader)
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")

print("Training finished.")


Epoch [1/3], Loss: 1.0890, Accuracy: 0.3586
Epoch [2/3], Loss: 1.0860, Accuracy: 0.3586
Epoch [3/3], Loss: 1.0848, Accuracy: 0.3586
Training finished.


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.io import read_image
from PIL import Image
import os
from sklearn.model_selection import train_test_split

# Define custom dataset class
class CustomDataset(Dataset):
    def __init__(self, image_paths, mask_paths, transform=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        mask_path = self.mask_paths[idx]

        image = Image.open(image_path)
        mask = Image.open(mask_path)

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return image, mask

# Define SAM model
class SAM(nn.Module):
    def __init__(self, in_channels):
        super(SAM, self).__init__()
        
        # Spatial attention block
        self.conv1 = nn.Conv2d(in_channels, 1, kernel_size=1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        # Calculate spatial attention map
        attention_map = self.sigmoid(self.conv1(x))
        
        # Apply attention to the input feature map
        x = x * attention_map
        
        return x, attention_map

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transformations
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

# Define dataset paths
image_dir = "/kaggle/input/dataset0000/data befor split/images"
mask_dir = "/kaggle/input/dataset0000/data befor split/masks"

# Get image and mask paths
image_paths = [os.path.join(image_dir, img) for img in os.listdir(image_dir)]
mask_paths = [os.path.join(mask_dir, mask) for mask in os.listdir(mask_dir)]

# Split data into train and test sets
train_image_paths, test_image_paths, train_mask_paths, test_mask_paths = train_test_split(
    image_paths, mask_paths, test_size=0.2, random_state=42
)

# Define datasets and dataloaders
train_dataset = CustomDataset(train_image_paths, train_mask_paths, transform=transform)
test_dataset = CustomDataset(test_image_paths, test_mask_paths, transform=transform)

train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False)

# Initialize SAM model
input_channels = 3  # Assuming RGB images
sam_model = SAM(input_channels).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(sam_model.parameters(), lr=0.001)

# Training loop
num_epochs = 3
for epoch in range(num_epochs):
    sam_model.train()  # Set model to training mode
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    
    for images, masks in train_dataloader:
        images, masks = images.to(device), masks.to(device)

        # Forward pass
        outputs, _ = sam_model(images)
        loss = criterion(outputs, masks.argmax(dim=1))  # Compute loss
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
        # Compute accuracy
        _, predicted = torch.max(outputs, 1)
        total_predictions += masks.size(0) * 256 * 256
        correct_predictions += (predicted == masks.argmax(dim=1)).sum().item()

    train_accuracy = correct_predictions / total_predictions
    train_loss = running_loss / len(train_dataloader)
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")



# Evaluate on test data
sam_model.eval()  # Set model to evaluation mode
test_running_loss = 0.0
test_correct_predictions = 0
test_total_predictions = 0

with torch.no_grad():
    for images, masks in test_dataloader:
        images, masks = images.to(device), masks.to(device)

        # Forward pass
        outputs, _ = sam_model(images)
        test_loss = criterion(outputs, masks.argmax(dim=1))  # Compute loss
        test_running_loss += test_loss.item()

        # Compute accuracy
        _, predicted = torch.max(outputs, 1)
        test_total_predictions += masks.size(0) * 256 * 256
        test_correct_predictions += (predicted == masks.argmax(dim=1)).sum().item()

test_accuracy = test_correct_predictions / test_total_predictions
test_loss = test_running_loss / len(test_dataloader)

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


Epoch [1/3], Train Loss: 1.0881, Train Accuracy: 0.3604
Epoch [2/3], Train Loss: 1.0859, Train Accuracy: 0.3604
Epoch [3/3], Train Loss: 1.0849, Train Accuracy: 0.3604
Test Loss: 1.0854, Test Accuracy: 0.3513


In [3]:
# Save model weights
torch.save(sam_model.state_dict(), "sam_model_weights.pth")
print("Model weights saved.")

Model weights saved.


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.io import read_image
from PIL import Image
import os

# Define custom dataset class
class CustomDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform

        self.images = os.listdir(image_dir)
        self.masks = os.listdir(mask_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.mask_dir, self.masks[idx])

        image = Image.open(img_path)
        mask = Image.open(mask_path)

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return image, mask

# Define SAM model with U-Net architecture
class SAM_UNet(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(SAM_UNet, self).__init__()
        
        # Encoder (downsampling)
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # SAM block
        self.sam = SAM(64)
        
        # Decoder (upsampling)
        self.decoder = nn.Sequential(
            nn.Conv2d(64, out_channels, kernel_size=3, padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        # Encoder
        x = self.encoder(x)
        
        # SAM block
        x, attention_map = self.sam(x)
        
        # Decoder
        x = self.decoder(x)
        
        return x, attention_map

# Define SAM model
class SAM(nn.Module):
    def __init__(self, in_channels):
        super(SAM, self).__init__()
        
        # Spatial attention block
        self.conv1 = nn.Conv2d(in_channels, 1, kernel_size=1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        # Calculate spatial attention map
        attention_map = self.sigmoid(self.conv1(x))
        
        # Apply attention to the input feature map
        x = x * attention_map
        
        return x, attention_map

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transformations
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

# Define dataset and dataloader
image_dir = "/kaggle/input/dataset0000/data befor split/images"
mask_dir = "/kaggle/input/dataset0000/data befor split/masks"

dataset = CustomDataset(image_dir, mask_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# Initialize SAM_UNet model
input_channels = 3  # Assuming RGB images
output_channels = 1  # Assuming grayscale masks
sam_unet_model = SAM_UNet(input_channels, output_channels).to(device)

# Define loss function and optimizer
criterion = nn.BCELoss()  # Binary cross-entropy loss for binary classification task
optimizer = optim.Adam(sam_unet_model.parameters(), lr=0.001)

# Training loop
num_epochs = 3
for epoch in range(num_epochs):
    running_loss = 0.0
    
    for images, masks in dataloader:
        images, masks = images.to(device), masks.to(device)

        # Forward pass
        outputs, _ = sam_unet_model(images)
        loss = criterion(outputs, masks)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader)}")

print("Training finished.")


ValueError: Using a target size (torch.Size([4, 3, 256, 256])) that is different to the input size (torch.Size([4, 1, 128, 128])) is deprecated. Please ensure they have the same size.