In [1]:
# requirements

!pip install albumentations
!git clone https://github.com/CarnoZhao/Kaggle-UWMGIT && cd Kaggle-UWMGIT && pip install -e .
!pip install ipywidgets
!pip install tqdm
!pip install pandas
!pip install numpy
!pip install opencv-python
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 # Windows
!pip3 install torch torchvision torchaudio # Mac
!pip install segmentation-models-pytorch


fatal: destination path 'Kaggle-UWMGIT' already exists and is not an empty directory.


In [2]:
# Standard library imports
import os

# Third-party libraries for data handling and computation
import numpy as np
import pandas as pd

# Image processing and augmentation libraries
import cv2
from albumentations import Compose, Normalize, Resize
from albumentations.pytorch import ToTensorV2

# PyTorch and related libraries for deep learning
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor

# Libraries for neural network models and progress visualization
import segmentation_models_pytorch as smp
from tqdm import tqdm

# Plotting library
import matplotlib.pyplot as plt

from sklearn.model_selection import GroupKFold



## 3.1 Read csv and extract meta info

In [3]:
import glob  # Import the glob module

In [4]:

df_train = pd.read_csv("data/train_4.csv")
df_train = df_train.sort_values(["id", "class"]).reset_index(drop = True)
df_train["patient"] = df_train.id.apply(lambda x: x.split("_")[0])
df_train["days"] = df_train.id.apply(lambda x: "_".join(x.split("_")[:2]))

all_image_files = sorted(glob.glob("uw-madison-gi-tract-image-segmentation/train_4/*/*/scans/*.png"), key = lambda x: x.split("/")[3] + "_" + x.split("/")[5])
size_x = [int(os.path.basename(_)[:-4].split("_")[-4]) for _ in all_image_files]
size_y = [int(os.path.basename(_)[:-4].split("_")[-3]) for _ in all_image_files]
spacing_x = [float(os.path.basename(_)[:-4].split("_")[-2]) for _ in all_image_files]
spacing_y = [float(os.path.basename(_)[:-4].split("_")[-1]) for _ in all_image_files]
df_train["image_files"] = np.repeat(all_image_files, 3)
df_train["spacing_x"] = np.repeat(spacing_x, 3)
df_train["spacing_y"] = np.repeat(spacing_y, 3)
df_train["size_x"] = np.repeat(size_x, 3)
df_train["size_y"] = np.repeat(size_y, 3)
df_train["slice"] = np.repeat([int(os.path.basename(_)[:-4].split("_")[-5]) for _ in all_image_files], 3)
df_train

Unnamed: 0,id,class,segmentation,patient,days,image_files,spacing_x,spacing_y,size_x,size_y,slice
0,case2_day1_slice_0001,large_bowel,,case2,case2_day1,uw-madison-gi-tract-image-segmentation/train_4...,1.5,1.5,266,266,1
1,case2_day1_slice_0001,small_bowel,,case2,case2_day1,uw-madison-gi-tract-image-segmentation/train_4...,1.5,1.5,266,266,1
2,case2_day1_slice_0001,stomach,,case2,case2_day1,uw-madison-gi-tract-image-segmentation/train_4...,1.5,1.5,266,266,1
3,case2_day1_slice_0002,large_bowel,,case2,case2_day1,uw-madison-gi-tract-image-segmentation/train_4...,1.5,1.5,266,266,2
4,case2_day1_slice_0002,small_bowel,,case2,case2_day1,uw-madison-gi-tract-image-segmentation/train_4...,1.5,1.5,266,266,2
...,...,...,...,...,...,...,...,...,...,...,...
6043,case9_day22_slice_0143,small_bowel,,case9,case9_day22,uw-madison-gi-tract-image-segmentation/train_4...,1.5,1.5,360,310,143
6044,case9_day22_slice_0143,stomach,,case9,case9_day22,uw-madison-gi-tract-image-segmentation/train_4...,1.5,1.5,360,310,143
6045,case9_day22_slice_0144,large_bowel,,case9,case9_day22,uw-madison-gi-tract-image-segmentation/train_4...,1.5,1.5,360,310,144
6046,case9_day22_slice_0144,small_bowel,,case9,case9_day22,uw-madison-gi-tract-image-segmentation/train_4...,1.5,1.5,360,310,144


## 3.2 Make mmseg-format data (2.5D by default)

### Resizing
### Creating Mask
### Verifing the process

In [5]:
def rle_decode(mask_rle, shape):
    s = np.array(mask_rle.split(), dtype=int)
    starts, lengths = s[0::2] - 1, s[1::2]
    ends = starts + lengths
    h, w = shape
    img = np.zeros((h * w,), dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo: hi] = 1
    return img.reshape(shape)

# Ensure the output directories exist
output_base_path = "./mmseg_train"
os.makedirs(os.path.join(output_base_path, "images"), exist_ok=True)
os.makedirs(os.path.join(output_base_path, "labels"), exist_ok=True)
os.makedirs(os.path.join(output_base_path, "splits"), exist_ok=True)

# Verify df_train is loaded properly
if 'df_train' not in locals():
    print("DataFrame 'df_train' is not defined.")
    # Load or define df_train here
else:
    for day, group in tqdm(df_train.groupby("days")):
        patient = group.patient.iloc[0]
        imgs = []
        msks = []
        file_names = []
        
        for file_name in group.image_files.unique():
            img = cv2.imread(file_name, cv2.IMREAD_ANYDEPTH)
            if img is None:
                print(f"Failed to read image {file_name}, skipping...")
                continue

            segms = group.loc[group.image_files == file_name]
            masks = {}
            for segm, label in zip(segms.segmentation, segms["class"]):
                if not pd.isna(segm):
                    mask = rle_decode(segm, img.shape[:2])
                    masks[label] = mask
                else:
                    masks[label] = np.zeros(img.shape[:2], dtype=np.uint8)
            masks = np.stack([masks[k] for k in sorted(masks)], -1)
            imgs.append(img)
            msks.append(masks)
        
        if imgs and msks:
            imgs = np.stack(imgs, 0)
            msks = np.stack(msks, 0)
            for i in range(msks.shape[0]):
                img = imgs[[max(0, i - 2), i, min(imgs.shape[0] - 1, i + 2)]].transpose(1, 2, 0)  # 2.5d data
                msk = msks[i]
                new_file_name = f"{day}_{i}.png"
                if not cv2.imwrite(f"{output_base_path}/images/{new_file_name}", img):
                    print(f"Failed to write image file: {output_base_path}/images/{new_file_name}")
                if not cv2.imwrite(f"{output_base_path}/labels/{new_file_name}", msk):
                    print(f"Failed to write label file: {output_base_path}/labels/{new_file_name}")
        else:
            print(f"No images or masks found for day {day}")


100%|██████████| 14/14 [00:11<00:00,  1.17it/s]


## 3.3 Make fold splits

In [None]:
all_image_files = glob.glob("./mmseg_train/images/*")
patients = [os.path.basename(_).split("_")[0] for _ in all_image_files]


split = list(GroupKFold(5).split(patients, groups = patients))

for fold, (train_idx, valid_idx) in enumerate(split):
    with open(f"./mmseg_train/splits/fold_{fold}.txt", "w") as f:
        for idx in train_idx:
            f.write(os.path.basename(all_image_files[idx])[:-4] + "\n")
    with open(f"./mmseg_train/splits/holdout_{fold}.txt", "w") as f:
        for idx in valid_idx:
            f.write(os.path.basename(all_image_files[idx])[:-4] + "\n")

## 3.4

In [6]:


class CustomDataset(Dataset):
    def __init__(self, image_dir, mask_dir, augmentation=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.images = [img for img in os.listdir(image_dir) if img.endswith('.png')]
        self.augmentation = augmentation
        self.to_tensor = ToTensor()  # Converts numpy array (H x W x C) in the range [0, 255] to a torch.FloatTensor (C x H x W) in the range [0.0, 1.0]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.mask_dir, self.images[idx])

        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        if self.augmentation:
            augmented = self.augmentation(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        image = self.to_tensor(image)  # Ensure correct dimension order and scaling for model input
        mask = torch.from_numpy(mask).long()  # Ensure mask is a long tensor

        return image, mask

# Basic configurations
num_classes = 3
data_root = 'mmseg_train'
img_size = 256

# Define the model
model = smp.Unet(
    encoder_name="efficientnet-b0",
    encoder_weights="imagenet",
    in_channels=3,
    classes=num_classes
)

# Define training and validation data paths
train_img_dir = os.path.join(data_root, 'images')
train_ann_dir = os.path.join(data_root, 'labels')
val_img_dir = os.path.join(data_root, 'images')
val_ann_dir = os.path.join(data_root, 'labels')

# Define data transformations using albumentations
train_transform = Compose([
    Resize(img_size, img_size),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0)
])

val_transform = Compose([
    Resize(img_size, img_size),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0)
])

# Create datasets
train_dataset = CustomDataset(train_img_dir, train_ann_dir, augmentation=train_transform)
valid_dataset = CustomDataset(val_img_dir, val_ann_dir, augmentation=val_transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=8, shuffle=False, num_workers=0)

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop
def train_one_epoch(epoch, model, train_loader, optimizer):
    model.train()
    total_loss = 0
    for images, masks in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = torch.nn.functional.cross_entropy(outputs, masks)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch}, Loss: {total_loss / len(train_loader)}")

# Start training
num_epochs = 10
for epoch in range(num_epochs):
    train_one_epoch(epoch, model, train_loader, optimizer)


Epoch 0, Loss: 0.058967831355172196
Epoch 1, Loss: 0.0014153891573345939
Epoch 2, Loss: 0.0005352417700498971
Epoch 3, Loss: 0.0002747559075133823
Epoch 4, Loss: 0.00016843495373789678
Epoch 5, Loss: 0.00011382670391679904


KeyboardInterrupt: 

## 3.4.1 Handling gray image and RGB masks

In [None]:


# Custom dataset class
class CustomDataset(Dataset):
    def __init__(self, image_dir, mask_dir, augmentation=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.images = [img for img in os.listdir(image_dir) if img.endswith('.png')]
        self.augmentation = augmentation
        self.to_tensor = ToTensor()  # Converts numpy array (H x W x C) in the range [0, 255] to a torch.FloatTensor (C x H x W) in the range [0.0, 1.0]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.mask_dir, self.images[idx])

        # Load image in grayscale and expand to three channels
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        
        # Load mask in RGB
        mask = cv2.imread(mask_path)

        if self.augmentation:
            augmented = self.augmentation(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        image = self.to_tensor(image)  # Ensure correct dimension order and scaling for model input
        mask = torch.from_numpy(mask[:, :, 0]).long()  # Convert mask to long tensor, assuming mask is single-channel relevant info in red channel

        return image, mask

# Basic configurations
num_classes = 3
data_root = 'mmseg_train'
img_size = 256

# Define the model
model = smp.Unet(
    encoder_name="efficientnet-b0",
    encoder_weights="imagenet",
    in_channels=3,
    classes=num_classes
)

# Define training and validation data paths
train_img_dir = os.path.join(data_root, 'images')
train_ann_dir = os.path.join(data_root, 'labels')
val_img_dir = os.path.join(data_root, 'images')
val_ann_dir = os.path.join(data_root, 'labels')

# Define data transformations using albumentations
train_transform = Compose([
    Resize(img_size, img_size),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0)
])

val_transform = Compose([
    Resize(img_size, img_size),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0)
])

# Create datasets
train_dataset = CustomDataset(train_img_dir, train_ann_dir, augmentation=train_transform)
valid_dataset = CustomDataset(val_img_dir, val_ann_dir, augmentation=val_transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=8, shuffle=False, num_workers=0)

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop
def train_one_epoch(epoch, model, train_loader, optimizer):
    model.train()
    total_loss = 0
    for images, masks in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = torch.nn.functional.cross_entropy(outputs, masks)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch}, Loss: {total_loss / len(train_loader)}")

# Start training
num_epochs = 10
for epoch in range(num_epochs):
    train_one_epoch(epoch, model, train_loader, optimizer)


## 3.5 saving model 

In [None]:
# Saving the entire model
torch.save(model, '/Users/arahjou/Downloads/uw_madison/mmseg_train/model/model.pth')

# Loading the entire model
model = torch.load('/Users/arahjou/Downloads/uw_madison/mmseg_train/model/model.pth')
model.eval()

# Saving the state dictionary
torch.save(model.state_dict(), '/Users/arahjou/Downloads/uw_madison/mmseg_train/model/model_state_dict.pth')
num_classes = 3
# Loading the state dictionary
model = smp.Unet(
    encoder_name="efficientnet-b0",
    encoder_weights=None,  # Set to None to not load default pretrained weights
    in_channels=3,
    classes=num_classes
)
model.load_state_dict(torch.load('/Users/arahjou/Downloads/uw_madison/mmseg_train/model/model_state_dict.pth'))
model.eval()

## 3.4 (saving each cycle)

In [None]:
def train_one_epoch(epoch, model, train_loader, optimizer):
    model.train()
    total_loss = 0
    for images, masks in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = torch.nn.functional.cross_entropy(outputs, masks)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    average_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch}, Loss: {average_loss}")
    return average_loss

# Start training and save the model at the end of each epoch
num_epochs = 10
best_loss = float('inf')
for epoch in range(num_epochs):
    avg_loss = train_one_epoch(epoch, model, train_loader, optimizer)
    if avg_loss < best_loss:
        best_loss = avg_loss
        # Save the best model
        torch.save(model.state_dict(), f'/Users/arahjou/Downloads/uw_madison/model/best_model_epoch_{epoch}.pth')
        print(f"Saved Best Model at Epoch {epoch} with Loss {best_loss}")

## 3.6 Using Segmentation

In [None]:
# Step 1: Load Your Pre-trained Model
num_classes = 3
model = smp.Unet(
    encoder_name="efficientnet-b0",
    encoder_weights=None,  # Assuming you are loading your custom trained weights
    in_channels=3,
    classes=num_classes
)
model.load_state_dict(torch.load('/Users/arahjou/Downloads/uw_madison/model/model_state_dict.pth'))
model.eval()

In [None]:
# Step 2: Prepare the Image
# Define the image size
img_size = 266  # Assuming the size of the image you want

# Define the transformation using only albumentations
transform = Compose([
    Resize(height=img_size, width=img_size),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0),
    ToTensorV2()
])

# Load and preprocess the image
def load_and_transform_image(image_path):
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(f"No image found at {image_path}")
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    transformed = transform(image=image)  # Apply the transformations
    image_tensor = transformed['image'].unsqueeze(0)  # Add batch dimension
    return image_tensor

# Example usage
image_path = '/Users/arahjou/Downloads/uw_madison/train/case9/case9_day22/scans/slice_0071_360_310_1.50_1.50.png'
image_tensor = load_and_transform_image(image_path)
print(image_tensor.shape)


In [None]:
# Step 3: Perform Inference
with torch.no_grad():  # Turn off gradients to speed up this part
    output = model(image_tensor)
    prediction = torch.argmax(output, dim=1)  # Get the most likely class for each pixel

In [None]:
# Step 4: Post-process the Output
predicted_mask = prediction.squeeze().cpu().numpy()  # Remove batch dimension and convert to numpy

In [None]:
# Step 5: Visualize the Results

plt.imshow(predicted_mask, cmap='gray')  # Assuming the mask is grayscale
plt.show()