In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!unzip "/content/drive/MyDrive/실용적딥러닝/buildings.zip" -d "/content/"
%cd /content/

Archive:  /content/drive/MyDrive/실용적딥러닝/buildings.zip
   creating: /content/buildings/
  inflating: /content/__MACOSX/._buildings  
  inflating: /content/buildings/.DS_Store  
  inflating: /content/__MACOSX/buildings/._.DS_Store  
   creating: /content/buildings/train_images/
   creating: /content/buildings/train_masks/
  inflating: /content/buildings/train_images/48.png  
  inflating: /content/buildings/train_images/49.png  
  inflating: /content/buildings/train_images/8.png  
  inflating: /content/buildings/train_images/9.png  
  inflating: /content/buildings/train_images/28.png  
  inflating: /content/buildings/train_images/15.png  
  inflating: /content/buildings/train_images/17.png  
  inflating: /content/buildings/train_images/39.png  
  inflating: /content/buildings/train_images/11.png  
  inflating: /content/buildings/train_images/10.png  
  inflating: /content/buildings/train_images/38.png  
  inflating: /content/__MACOSX/buildings/train_images/._38.png  
  inflatin

In [3]:
import os

print(os.getcwd())

/content


In [4]:
pip install torch torchvision segmentation-models-pytorch albumentations

Collecting segmentation-models-pytorch
  Downloading segmentation_models_pytorch-0.3.3-py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)


In [5]:
pip install albumentations



In [6]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [7]:
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import segmentation_models_pytorch as smp
import numpy as np
from PIL import Image
import os
import matplotlib.pyplot as plt


In [47]:
class BuildingsDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None, patch_size=256, stride=256):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.images = os.listdir(images_dir)
        self.transform = transform
        self.patch_size = patch_size
        self.stride = stride
        self.image_patches = []
        self.mask_patches = []

        for image_name in self.images:
            img_path = os.path.join(self.images_dir, image_name)
            mask_path = os.path.join(self.masks_dir, image_name.replace('.jpg', '.png'))
            image = np.array(Image.open(img_path).convert("RGB"))
            mask = np.array(Image.open(mask_path), dtype=np.float32)
            mask = np.expand_dims(mask, axis=-1)

            img_patches, mask_patches = self.crop_to_patches(image, mask, self.patch_size, self.stride)
            self.image_patches.extend(img_patches)
            self.mask_patches.extend(mask_patches)

    def __len__(self):
        return len(self.image_patches)

    def __getitem__(self, idx):
        image = self.image_patches[idx]
        mask = self.mask_patches[idx]

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

    def crop_to_patches(self, image, mask, patch_size, stride):
        patches_img = []
        patches_mask = []
        img_height, img_width = image.shape[:2]
        for y in range(0, img_height, stride):
            for x in range(0, img_width, stride):
                patch_img = image[y:y+patch_size, x:x+patch_size]
                patch_mask = mask[y:y+patch_size, x:x+patch_size]
                if patch_img.shape[0] == patch_size and patch_img.shape[1] == patch_size:
                    patches_img.append(patch_img)
                    patches_mask.append(patch_mask)
        return patches_img, patches_mask

In [48]:
def get_training_augmentation():
    train_transform = A.Compose([
        # A.HorizontalFlip(p=0.5),
        # A.VerticalFlip(p=0.5),
        A.Normalize(),
        ToTensorV2()
    ])
    return train_transform


In [49]:
from torch import nn

In [50]:
class DiceLoss(nn.Module):
    def __init__(self, weight=None, size_average=True):
        super(DiceLoss, self).__init__()

    def forward(self, inputs, targets, smooth=1):

        # comment out if your model contains a sigmoid or equivalent activation layer
        inputs = torch.sigmoid(inputs)

        # flatten label and prediction tensors
        inputs = inputs.view(-1)
        targets = targets.view(-1)

        intersection = (inputs * targets).sum()
        dice = (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)

        return 1 - dice

In [51]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

dataset = BuildingsDataset(
    images_dir='buildings/train_images',
    masks_dir='buildings/train_masks',
    transform=get_training_augmentation()
)

dataloader = DataLoader(dataset, batch_size=32, shuffle=True)



In [52]:
print(len(dataset))

1960


In [53]:
import torchvision

resnet = torchvision.models.resnet.resnet50(pretrained=True)

class ConvBlock(nn.Module):
    """
    Helper module that consists of a Conv -> BN -> ReLU
    """

    def __init__(self, in_channels, out_channels, padding=1, kernel_size=3, stride=1, with_nonlinearity=True):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, padding=padding, kernel_size=kernel_size, stride=stride)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.with_nonlinearity = with_nonlinearity

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        if self.with_nonlinearity:
            x = self.relu(x)
        return x


class Bridge(nn.Module):
    """
    This is the middle layer of the UNet which just consists of some
    """

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.bridge = nn.Sequential(
            ConvBlock(in_channels, out_channels),
            ConvBlock(out_channels, out_channels)
        )

    def forward(self, x):
        return self.bridge(x)


class UpBlockForUNetWithResNet50(nn.Module):
    """
    Up block that encapsulates one up-sampling step which consists of Upsample -> ConvBlock -> ConvBlock
    """

    def __init__(self, in_channels, out_channels, up_conv_in_channels=None, up_conv_out_channels=None,
                 upsampling_method="conv_transpose"):
        super().__init__()

        if up_conv_in_channels == None:
            up_conv_in_channels = in_channels
        if up_conv_out_channels == None:
            up_conv_out_channels = out_channels

        if upsampling_method == "conv_transpose":
            self.upsample = nn.ConvTranspose2d(up_conv_in_channels, up_conv_out_channels, kernel_size=2, stride=2)
        elif upsampling_method == "bilinear":
            self.upsample = nn.Sequential(
                nn.Upsample(mode='bilinear', scale_factor=2),
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1)
            )
        self.conv_block_1 = ConvBlock(in_channels, out_channels)
        self.conv_block_2 = ConvBlock(out_channels, out_channels)

    def forward(self, up_x, down_x):
        """

        :param up_x: this is the output from the previous up block
        :param down_x: this is the output from the down block
        :return: upsampled feature map
        """
        x = self.upsample(up_x)
        x = torch.cat([x, down_x], 1)
        x = self.conv_block_1(x)
        x = self.conv_block_2(x)
        return x


class UNetWithResnet50Encoder(nn.Module):
    DEPTH = 6

    def __init__(self, n_classes=1):
        super().__init__()
        resnet = torchvision.models.resnet.resnet50(pretrained=True)
        down_blocks = []
        up_blocks = []
        self.input_block = nn.Sequential(*list(resnet.children()))[:3]
        self.input_pool = list(resnet.children())[3]
        for bottleneck in list(resnet.children()):
            if isinstance(bottleneck, nn.Sequential):
                down_blocks.append(bottleneck)
        self.down_blocks = nn.ModuleList(down_blocks)
        self.bridge = Bridge(2048, 2048)
        up_blocks.append(UpBlockForUNetWithResNet50(2048, 1024))
        up_blocks.append(UpBlockForUNetWithResNet50(1024, 512))
        up_blocks.append(UpBlockForUNetWithResNet50(512, 256))
        up_blocks.append(UpBlockForUNetWithResNet50(in_channels=128 + 64, out_channels=128,
                                                    up_conv_in_channels=256, up_conv_out_channels=128))
        up_blocks.append(UpBlockForUNetWithResNet50(in_channels=64 + 3, out_channels=64,
                                                    up_conv_in_channels=128, up_conv_out_channels=64))

        self.up_blocks = nn.ModuleList(up_blocks)

        self.out = nn.Conv2d(64, n_classes, kernel_size=1, stride=1)

    def forward(self, x, with_output_feature_map=False):
        pre_pools = dict()
        pre_pools[f"layer_0"] = x
        x = self.input_block(x)
        pre_pools[f"layer_1"] = x
        x = self.input_pool(x)

        for i, block in enumerate(self.down_blocks, 2):
            x = block(x)
            if i == (UNetWithResnet50Encoder.DEPTH - 1):
                continue
            pre_pools[f"layer_{i}"] = x

        x = self.bridge(x)

        for i, block in enumerate(self.up_blocks, 1):
            key = f"layer_{UNetWithResnet50Encoder.DEPTH - 1 - i}"
            x = block(x, pre_pools[key])
        output_feature_map = x
        x = self.out(x)
        del pre_pools
        if with_output_feature_map:
            return x, output_feature_map
        else:
            return x



In [55]:
from tqdm import tqdm
# 하이퍼파라미터
epoch_num = 10
learning_rate = 0.001
patience = 4
early_stopping_counter = 0  # Early stopping counter
best_val_loss = float('inf')

# model 초기화
model = UNetWithResnet50Encoder().to(device)

# loss function과 optimizer 정의
criterion = DiceLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=patience)


# training loop
for epoch in range(epoch_num):  # 30 에폭 동안 학습합니다.
    model.train()
    epoch_loss = 0
    for images, masks in tqdm(dataloader):
        images = images.float().to(device)
        masks = masks.float().to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks.unsqueeze(1))
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Train Epoch {epoch+1}, Loss: {1+epoch_loss/len(dataloader)}')


100%|██████████| 62/62 [00:53<00:00,  1.16it/s]


Train Epoch 1, Loss: 0.18984179342946694


100%|██████████| 62/62 [00:53<00:00,  1.17it/s]


Train Epoch 2, Loss: 0.06973429841379963


100%|██████████| 62/62 [00:53<00:00,  1.16it/s]


Train Epoch 3, Loss: 0.059140268833406506


100%|██████████| 62/62 [00:53<00:00,  1.17it/s]


Train Epoch 4, Loss: 0.04434964926012097


100%|██████████| 62/62 [00:53<00:00,  1.16it/s]


Train Epoch 5, Loss: 0.0377617132279181


100%|██████████| 62/62 [00:53<00:00,  1.17it/s]


Train Epoch 6, Loss: 0.035733061452065784


100%|██████████| 62/62 [00:53<00:00,  1.17it/s]


Train Epoch 7, Loss: 0.03469276812768751


100%|██████████| 62/62 [00:53<00:00,  1.17it/s]


Train Epoch 8, Loss: 0.03396014436598749


100%|██████████| 62/62 [00:53<00:00,  1.17it/s]


Train Epoch 9, Loss: 0.03240152135972052


100%|██████████| 62/62 [00:53<00:00,  1.16it/s]

Train Epoch 10, Loss: 0.033913625824836036



