In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms.functional as TF

    
class Down(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Down, self).__init__()
        self.down_layer = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, 1, 1, bias = False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, 1, 1, bias = False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
        # self.max_pool = nn.MaxPool2d(2, 2)
    def forward(self, X):
        return self.down_layer(X)
    
class BottleNeck(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(BottleNeck, self).__init__()
        self.down_layer = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    def forward(self, X):
        return self.down_layer(X)
    
class Up(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Up, self).__init__()
        # eg in_channels = 1024, out_channels = 512
        # 1024 in_channels from prev layer
        '''
        In ConvTrans : 1024 --> 512
        through skip_connection's 512 : 512 + 512 = 1024 (which is equal to in_channels)
        So input of DoubleConv is also in_channels
        In DoubleConv:
            Conv2D: 1024 --> 512
            Conv2D:  512 --> 512
        
        In ConvTrans : N --> N/2
        through skip_connection's N/2 : N/2 + N/2 = N (which is equal to in_channels)
        So input of DoubleConv is also in_channels
        In DoubleConv:
            Conv2D: N   --> N/2
            Conv2D: N/2 --> N/2
        for next layer, it will go from N/2 --> N/4
        '''
        self.up_conv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2) # 1024 --> 512
        self.DoubleConv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, 1, 1, bias = False),  # 1024 --> 512
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, 1, 1, bias = False), # 1024 --> 512
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
        
    def forward(self, X, skip_connection):
        X1 = self.up_conv(X)
        if(X1.shape != skip_connection.shape):
            X1 = TF.resize(X1, skip_connection.shape[2:]) # X1 height and width might not remain still same if max_pooling floors the dimension, so match it with the skip_connection height and width
                
        X2 = torch.cat((X1, skip_connection), dim=1) # concatenate skip_connection along channel dimension
        
        return self.DoubleConv(X2)

class FinalConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(FinalConv, self).__init__()
        self.finalConv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1)
        # kernel size = 1, since the height and width of the final layer and this should be same(given in paper)
        
    def forward(self, X):
        return self.finalConv(X)
    
class UNet(nn.Module):
    def __init__(self, in_channels = 3, out_channels = 1):
        super(UNet, self).__init__()
        self.max_pool = nn.MaxPool2d(2, 2)
        self.down1 = Down(in_channels, 64)
        self.down2 = Down(64, 128)
        self.down3 = Down(128, 256)
        self.down4 = Down(256, 512)
        
        self.bottleNeck = Down(512, 1024) 
        
        self.up1 = Up(1024, 512)
        self.up2 = Up(512, 256)
        self.up3 = Up(256, 128)
        self.up4 = Up(128, 64)
        
        self.finalConv = FinalConv(64, out_channels)
        
        
    def forward(self, X):
        
        ### DownSampling
        x1_skip = self.down1(X)          # 003-->064
        x1 = self.max_pool(x1_skip)
        
        x2_skip = self.down2(x1)         # 064->128
        x2 = self.max_pool(x2_skip)
        
        x3_skip = self.down3(x2)         # 128-->256
        x3 = self.max_pool(x3_skip)
        
        x4_skip = self.down4(x3)         # 256-->512
        x4 = self.max_pool(x4_skip)
        
        
        ### BottleNeck Layer
        x5 = self.bottleNeck(x4)         # 512-->1024
        
        
        ### UpSampling        
        x  = self.up1(x5, x4_skip)       # [x5(1024, up_conv will take it to 512) + x4(512)] --> 512
        
        x  = self.up2(x , x3_skip)       # [x ( 512, up_conv will take it to 256) + x3(256)] --> 256
        
        x  = self.up3(x , x2_skip)       # [x ( 256, up_conv will take it to 128) + x2(128)] --> 128
        
        x  = self.up4(x , x1_skip)       # [x ( 128, up_conv will take it to  64) + x1( 64)] --> 64
        
        x  = self.finalConv(x)           # 64 --> 2
        
        return x


# def test():
#     x = torch.randn(3, 1, 572, 572)
#     model = UNet(in_channels=1, out_channels=1)
#     preds = model(x)
#     print(f"Preds shape: {preds.shape}")
#     print(x.shape == preds.shape)

# if __name__ == "__main__":
#     test()



In [None]:
import os
from PIL import Image
import pathlib

from torch.utils.data import Dataset
import numpy as np

class CarvanaDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform = None):
        super().__init__()
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(image_dir)
        self.masks = os.listdir(mask_dir)
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, key: int):
        image_path = os.path.join(self.image_dir, self.images[key])
        # mask_path = os.path.join(self.mask_dir, self.masks[key])
        mask_path = os.path.join(self.mask_dir, self.images[key].replace(".jpg", "_mask.gif"))
        image = np.array(Image.open(image_path).convert("RGB")) # we are using np array since we will be using Albumentations library which req np array
        mask = np.array(Image.open(mask_path).convert("L"), dtype=np.float32)
        mask[mask == 255.0] = 1.0
        
        if self.transform:
            augmentations = self.transform(image = image, mask = mask)
            image = augmentations["image"]
            mask = augmentations["mask"]
        return image, mask

In [None]:
import torch
# from dataset import CarvanaDataset
from torch.utils.data import DataLoader
import torchvision

def get_loaders(
    train_dir,
    train_maskdir,
    val_dir, 
    val_maskdir,
    train_transform,
    val_transform,
    batch_size,
    num_workers,
    pin_memory = True
):
    train_data = CarvanaDataset(image_dir=train_dir,
                                mask_dir=train_maskdir, 
                                transform=train_transform)
    val_data = CarvanaDataset(image_dir=val_dir, 
                                mask_dir=val_maskdir, 
                                transform=val_transform)
    
    train_dataloader = DataLoader(dataset=train_data,
                                batch_size=batch_size,
                                shuffle=True,
                                num_workers=num_workers,
                                pin_memory=pin_memory)
    val_dataloader = DataLoader(dataset=val_data,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=num_workers,
                                pin_memory=pin_memory)
    
    return train_dataloader, val_dataloader

def save_checkpoint(state, filename = "my_checkpoint.pth.tar"):
    print("==> Saving CheckPoint")
    torch.save(state, filename)

def load_checkpoint(checkpoint, model):
    print("==> Loading CheckPoint")
    model.load_state_dict(checkpoint["state_dict"])
    
def check_accuracy(loader, model, device):
    num_correct = 0
    num_pixels = 0
    dice_score = 0
    model.eval()
    
    with torch.inference_mode():
        for X, y in loader:
            X = X.to(device)
            y = y.to(device).unsqueeze(1)
            preds = torch.sigmoid(model(X))
            preds = (preds > 0.5).float()
            num_correct += (preds == y).sum()
            num_pixels += torch.numel(preds)
            dice_score += (2*(preds*y).sum())/((preds + y).sum()+ 1e-8)
    print(
        f"Got {num_correct}/{num_pixels} with accuracy {(num_correct/num_pixels)*100: .3f}"
    )
    print(f"Dice score: {dice_score/len(loader)}")
    model.train()
    
def save_predictions_as_imgs(loader, model,device, folder_dir = "saved_images/"):
    model.eval()
    
    for idx, (X, y) in enumerate(loader):
        X, y = X.to(device), y.to(device)
        with torch.inference_mode():
            preds = torch.sigmoid(model(X))
            preds = (preds > 0.5).float()
            
        torchvision.utils.save_image(preds, f"{folder_dir}/pred_{idx}.png")
        torchvision.utils.save_image(y.unsqueeze(1), f"{folder_dir}{idx}.png")
    
    model.train()        

In [None]:
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import torch.nn as nn
import torch.optim
# from u_net_model import UNet
# # from model import UNET as UNet
# from torch.utils.data import DataLoader
# from dataset import CarvanaDataset

# from utils import(
#     load_checkpoint,
#     save_checkpoint,
#     get_loaders,
#     check_accuracy,
#     save_predictions_as_imgs
# )

LEARNING_RATE = 1e-6
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 32
EPOCHS = 12
NUM_WORKERS = 2
IMAGE_HEIGHT = 160
IMAGE_WIDTH = 240
PIN_MEMORY = True
LOAD_MODEL = False
TRAIN_IMG_DIR = "/kaggle/input/unetcarvanadataset/data/train"
TRAIN_MASK_DIR = "/kaggle/input/unetcarvanadataset/data/train_masks"
VAL_IMG_DIR = "/kaggle/input/unetcarvanadataset/data/val"
VAL_MASK_DIR = "/kaggle/input/unetcarvanadataset/data/val_masks"

def train_fn(loader, model, optimizer, loss_fn, scaler):
    loop = tqdm(loader)
    
    for batch_idx, (data, targets) in enumerate(loop):
        data = data.to(device = DEVICE)
        targets = targets.float().unsqueeze(1).to(device = DEVICE)
        
        # forward
        model.train()
        predictions = model(data)
        loss = loss_fn(predictions, targets)
        
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loop.set_postfix(loss = loss.item())        


train_transform = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Rotate(limit=35, p=1.0),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.1),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std = [1.0, 1.0, 1.0],
            max_pixel_value=255.0
        ),
        ToTensorV2()
    ]
)

val_transform = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std = [1.0, 1.0, 1.0],
            max_pixel_value=255.0
        ),
        ToTensorV2()
    ]
)

model = UNet(in_channels=3, out_channels=1).to(device=DEVICE)
loss_fn = nn.BCEWithLogitsLoss() # with logits because in our model we didn't perform sigmoid after finalConv
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

train_loader, val_loader = get_loaders(train_dir=TRAIN_IMG_DIR,
                                        train_maskdir=TRAIN_MASK_DIR,
                                        val_dir=VAL_IMG_DIR,
                                        val_maskdir=VAL_MASK_DIR,
                                        train_transform=train_transform,
                                        val_transform=val_transform,
                                        batch_size=BATCH_SIZE,
                                        num_workers=NUM_WORKERS,
                                        pin_memory=PIN_MEMORY)

if LOAD_MODEL:
    load_checkpoint(torch.load("my_checkpoint.pth.tar"), model)
scaler = torch.cuda.amp.GradScaler()

for epoch in range(EPOCHS):
    print(f"------Epoch: {epoch}------")

    train_fn(train_loader, model, optimizer, loss_fn, scaler)

    # save the model
    checkpoint = {
        "state_dict": model.state_dict(),
        "optimizer": optimizer.state_dict()
    }
    save_checkpoint(checkpoint)

    # check accuracy
    check_accuracy(val_loader, model, device=DEVICE)

    # print some examples to a folder
    save_predictions_as_imgs(
        val_loader,
        model,
        folder_dir="/kaggle/working/",
        device=DEVICE
    )
