In [1]:
import os
import pandas as pd
import numpy as np
import cv2
from torchvision.io import read_image
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, random_split, DataLoader, ConcatDataset
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

In [None]:
from torchvision.transforms import ToTensor
from PIL import Image
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision 
from torchvision import transforms
from torchinfo import summary
import timm
class CustomImageDataset(Dataset):
    def __init__(self, img_dir, label_dir, resize=None, transform=None):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.resize = resize
        self.transform = transform
        self.images = os.listdir(self.img_dir)

    def __len__(self):
        return len(self.images)
    def read_mask(self, mask_path):
        image = cv2.imread(mask_path)
        image = cv2.resize(image, self.resize)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        # lower boundary RED color range values; Hue (0 - 10)
        lower1 = np.array([0, 100, 20])
        upper1 = np.array([10, 255, 255])
        # upper boundary RED color range values; Hue (160 - 180)
        lower2 = np.array([160,100,20])
        upper2 = np.array([179,255,255])
        lower_mask = cv2.inRange(image, lower1, upper1)
        upper_mask = cv2.inRange(image, lower2, upper2)
        
        red_mask = lower_mask + upper_mask;
        red_mask[red_mask != 0] = 1

        # boundary GREEN color range values; Hue (36 - 70)
        green_mask = cv2.inRange(image, (36, 25, 25), (70, 255, 255))
        green_mask[green_mask != 0] = 2

        full_mask = cv2.bitwise_or(red_mask, green_mask)
        full_mask = np.expand_dims(full_mask, axis=-1) 
        full_mask = full_mask.astype(np.uint8)
        return full_mask

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.images[idx])
        label_path = os.path.join(self.label_dir, self.images[idx])
        image = cv2.imread(img_path)  # Đọc ảnh dưới dạng BGR
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = self.read_mask(label_path)  # Đọc nhãn dưới dạng BGR
        image = cv2.resize(image, self.resize)
        if self.transform:
            transformed = self.transform(image=image, mask=label)
            image = transformed['image']
            label = transformed['mask']
        return image, label
    def show_image(self, idx):
        img_path = os.path.join(self.img_dir, self.images[idx])
        label_path = os.path.join(self.label_dir, self.images[idx])
        image = plt.imread(img_path)
        label = plt.imread(label_path)
        fig, axs = plt.subplots(1, 2, figsize=(10, 5))
        axs[0].imshow(image)
        axs[0].set_title('Image')
        axs[1].imshow(label)
        axs[1].set_title('Label')
        plt.show()

In [3]:
image_path = []
TRAIN_DIR = '/kaggle/input/bkai-igh-neopolyp/train/train'
for root, dirs, files in os.walk(TRAIN_DIR):
    for file in files:
        path = os.path.join(root,file)
        image_path.append(path)
mask_path = []
TRAIN_MASK_DIR = '/kaggle/input/bkai-igh-neopolyp/train_gt/train_gt'
for root, dirs, files in os.walk(TRAIN_MASK_DIR):
    for file in files:
        path = os.path.join(root,file)
        mask_path.append(path)

1000

In [5]:
trainsize = 352
batch_size = 8

dataset = CustomImageDataset(img_dir= TRAIN_DIR,
                             label_dir= TRAIN_MASK_DIR,
                             resize= (trainsize,trainsize),
                             transform = None)
                             total_size = len(dataset)
train_size = int(total_size * 0.9)
valid_size = total_size - train_size

# Split the dataset
train_dataset, val_dataset = random_split(dataset, [train_size, valid_size])

In [1]:
class CustomDataset(CustomImageDataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __getitem__(self, index):
        image, label = self.dataset[index] 
        if self.transform:
            transformed = self.transform(image=image, mask=label)
            image = transformed['image']
            label = transformed['mask']
            label = label.permute(2,0,1)
        return image, label

    def __len__(self):
        return len(self.dataset)
train_transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomGamma (gamma_limit=(70, 130), always_apply=False, p=0.2),
    A.RGBShift(p=0.3, r_shift_limit=10, g_shift_limit=10, b_shift_limit=10),
    A.OneOf([A.Blur(), A.GaussianBlur(), A.GlassBlur(), A.MotionBlur(), A.GaussNoise(), A.Sharpen(), A.MedianBlur(), A.MultiplicativeNoise()]),
    A.RandomSnow(snow_point_lower=0.1, snow_point_upper=0.15, brightness_coeff=1.5, p=0.09),
    A.RandomShadow(p=0.1),
    A.ShiftScaleRotate(p=0.45, border_mode=cv2.BORDER_CONSTANT, shift_limit=0.15, scale_limit=0.15),
    A.RandomCrop(384, 384),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

train_dataset_not_aug = CustomDataset(train_dataset,
                             transform = val_transform)
train_dataset_aug = CustomDataset(train_dataset,
                             transform = train_transform)
val_dataset = CustomDataset(val_dataset,
                             transform = val_transform)

train_dataset_new = ConcatDataset([train_dataset_not_aug, train_dataset_aug])

train_loader = DataLoader(train_dataset_new, batch_size= batch_size, shuffle= True)
val_loader = DataLoader(val_dataset, batch_size= batch_size, shuffle= False)
print(len(train_dataset_new))

NameError: name 'CustomImageDataset' is not defined

In [9]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, use_bottleneck=False):
        super(ConvBlock, self).__init__()
        if use_bottleneck:
            self.conv = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * 2, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels * 2),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels * 2, out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True)
            )
        else:
            self.conv = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True)
            )

    def forward(self, x):
        return self.conv(x)

class EncoderBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(EncoderBlock, self).__init__()
        self.conv_block = ConvBlock(in_channels, out_channels)
        self.pool = nn.MaxPool2d(2)

    def forward(self, x):
        skip_connection = self.conv_block(x)
        downsampled = self.pool(skip_connection)
        return downsampled, skip_connection

class DecoderBlock(nn.Module):
    def __init__(self, in_channels, out_channels, upsample_mode):
        super(DecoderBlock, self).__init__()
        if upsample_mode == 'conv_transpose':
            self.upsample = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
        else:
            self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        
        self.conv_block = ConvBlock(in_channels, out_channels)

    def forward(self, x, skip_connection):
        x = self.upsample(x)
        x = torch.cat([x, skip_connection], dim=1)
        return self.conv_block(x)

class PolypSegmentationModel(nn.Module):
    def __init__(self, num_classes=3, upsample_mode='conv_transpose'):
        super(PolypSegmentationModel, self).__init__()
        self.num_classes = num_classes
        self.encoder = timm.create_model("resnet152", pretrained=True, features_only=True)
        
        self.encoder_block1 = EncoderBlock(64, 128)
        self.encoder_block2 = EncoderBlock(256, 512)
        self.encoder_block3 = EncoderBlock(512, 1024)
        self.encoder_block4 = EncoderBlock(1024, 2048)
        
        self.bottleneck = ConvBlock(2048, 1024)
        
        self.decoder_block1 = DecoderBlock(2048, 512, upsample_mode)
        self.decoder_block2 = DecoderBlock(1024, 256, upsample_mode)
        self.decoder_block3 = DecoderBlock(512, 128, upsample_mode)
        self.decoder_block4 = DecoderBlock(256, 64, upsample_mode)
        
        self.final_conv = nn.Conv2d(128, num_classes, kernel_size=1)
        self.final_upsample = nn.Upsample(scale_factor=2, mode='bilinear')

    def forward(self, x):
        encoder_outputs = self.encoder(x)
        x1, x2, x3, x4, x5 = encoder_outputs

        x = self.bottleneck(x5)
        x = self.decoder_block1(x, x4)
        x = self.decoder_block2(x, x3)
        x = self.decoder_block3(x, x2)
        x = self.decoder_block4(x, x1)

        x = self.final_conv(x)
        x = self.final_upsample(x)
        return x

In [11]:
color_dict= {0: (0, 0, 0),
             1: (255, 0, 0),
             2: (0, 255, 0)}
def mask_to_rgb(mask, color_dict):
    output = np.zeros((mask.shape[0], mask.shape[1], 3))
#     print(output.shape)
    for k in color_dict.keys():
        output[mask==k] = color_dict[k]

    return np.uint8(output)    

In [12]:
torch.cuda.empty_cache()
model = PolypModel(3)


model.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]

In [13]:
learning_rate = 0.0001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [14]:
!pip install wandb
!wandb login 'cc8ac86b9aed43b8c570f6a4cceca495499e73f7'

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [15]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mhoanghaiphongngu2[0m ([33mhustt[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [16]:
wandb.init(
    project = 'Polyp',
    config = {
        'learning_rate': 0.0001,
        'architecture': 'Unet',
        'dataset': 'Polyp',
        'epoch': 200
    }
)

In [None]:
# Set the number of training epochs
num_epochs = 200

# Move the model to the device (e.g., GPU)
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
train_loss_array = []
val_loss_array = []
best_val_loss = 999
# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        # Forward pass
        labels = labels.squeeze(dim=1).long()

        outputs = model(images)

        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()  # Accumulate the loss
    train_loss_epoch = total_loss / len(train_loader)

        
# Perform validation
    model.eval()
    with torch.no_grad():
        val_loss = 0
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            labels = labels.squeeze(dim=1).long()

            # Forward pass
            outputs = model(images)
            val_loss += criterion(outputs.float(),labels.long()).item()
    val_loss_epoch = val_loss/len(val_loader)
    # Print the loss of valid_dataset for this epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], val_loss: {val_loss/len(val_loader):.10f}")
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        checkpoint = { 
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        save_path = f'checkpoint_final.pth'
        torch.save(checkpoint, save_path)
        print('SAVE +1')
    # Calculate average loss for the epoch
    
    wandb.log({'Val_loss': val_loss_epoch,
               'Train_loss': train_loss_epoch
              })
    train_loss_array.append(train_loss_epoch)
    val_loss_array.append(val_loss_epoch)

Epoch [1/200], val_loss: 0.2002771566
SAVE +1
Epoch [2/200], val_loss: 0.1171527837
SAVE +1
Epoch [3/200], val_loss: 0.0848681732
SAVE +1
Epoch [4/200], val_loss: 0.0728300231
SAVE +1
Epoch [5/200], val_loss: 0.0582184274
SAVE +1
Epoch [6/200], val_loss: 0.0735967138
Epoch [7/200], val_loss: 0.0495599097
SAVE +1
Epoch [8/200], val_loss: 0.0432764006
SAVE +1
Epoch [9/200], val_loss: 0.0508028328
Epoch [10/200], val_loss: 0.0395696165
SAVE +1
Epoch [11/200], val_loss: 0.0480875465
Epoch [12/200], val_loss: 0.0552731057
Epoch [13/200], val_loss: 0.0446315535
Epoch [14/200], val_loss: 0.0374848564
SAVE +1
Epoch [15/200], val_loss: 0.0412464636
Epoch [16/200], val_loss: 0.0420085890
Epoch [17/200], val_loss: 0.0558841385
Epoch [18/200], val_loss: 0.0353916963
SAVE +1
Epoch [19/200], val_loss: 0.0329898317
SAVE +1
Epoch [20/200], val_loss: 0.0371768039
Epoch [21/200], val_loss: 0.0387581135
Epoch [22/200], val_loss: 0.0353940721
Epoch [23/200], val_loss: 0.0421724975
Epoch [24/200], val_loss