## Import

In [11]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

#압축파일 풀 곳
!mkdir data_2023

#데이터있는 주소
!unzip -qq drive/MyDrive/open.zip -d./data_2023

%cd data_2023

Mounted at /content/drive
/content/data_2023


In [2]:
%pip install segmentation-models-pytorch
%pip install adamp

Collecting segmentation-models-pytorch
  Downloading segmentation_models_pytorch-0.3.3-py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting pretrainedmodels==0.7.4 (from segmentation-models-pytorch)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting efficientnet-pytorch==0.7.1 (from segmentation-models-pytorch)
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting timm==0.9.2 (from segmentation-models-pytorch)
  Downloading timm-0.9.2-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
Collecting munch (from pretrainedmodels==0.7.4->segmen

In [3]:
import os
import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

from typing import List, Union
from joblib import Parallel, delayed

import segmentation_models_pytorch as smp
import argparse

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## Utils

In [4]:

# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

## Custom dataset

In [5]:
class SatelliteDataset(Dataset):
    def __init__(self, csv_file, transform=None, infer=False):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.infer = infer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 1]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
            return image

        mask_rle = self.data.iloc[idx, 2]
        mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

## Data Loader

In [20]:
transform = A.Compose(
    [
        #A.CenterCrop(224, 224, p=0.7),
        A.RandomCrop(224, 224, p=1),

        #A.HorizontalFlip(p = 0.5),
        #A.HorizontalFlip(p = 0.5),

        #A.IAAAdditiveGaussianNoise(p=0.2),
        #A.IAAPerspective(p=0.5),

        #A.OneOf([
            #A.CLAHE(p=1),
            #A.RandomBrightness(p = 1),
            #A.RandomGamma(p = 1)
        #], p = 0.5),

        #A.OneOf([
            #A.IAASharpen(p = 1),
            #A.Blur(blur_limit=3, p=1),
            #A.GaussianBlur(p = 1),
            #A.MotionBlur(blur_limit=3, p=1),
            #A.GaussNoise(p = 1)
        #], p = 0.5),

        #A.OneOf([
            #A.RandomContrast(p=1),
            #A.HueSaturationValue(p=1),
        #], p = 0.5),

        A.Resize(224, 224),

        A.Normalize(),
        ToTensorV2()
    ]
)

transform_test = A.Compose(
    [
        A.Resize(224, 224),

        A.Normalize(),
        ToTensorV2()
    ]
)

##Define Model

In [6]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

ENCODER = 'efficientnet-b7'
ENCODER_WEIGHTS = 'imagenet'
ACTIVATION = 'sigmoid'
DEVICE = 'cuda'

model = smp.Unet(
    encoder_name = ENCODER,
    encoder_weights = ENCODER_WEIGHTS,
    in_channels = 3,
    classes = 1,
    activation = ACTIVATION,
)

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b7-dcc49843.pth
100%|██████████| 254M/254M [00:03<00:00, 73.7MB/s]


##CutMix

In [None]:
def cutmix(batch, alpha=1.0, p=0):
    '''
    alpha 값을 1.0으로 설정하여 beta 분포가 uniform 분포가 되도록 함으로써,
    두 이미지를 랜덤하게 combine하는 Cutmix
    '''

    data, targets = batch

    # cutmix 확률 설정
    if np.random.random() > p:
        return data, (targets, torch.zeros_like(targets), 1.0)

    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]
    lam = np.random.beta(alpha, alpha)

    image_h, image_w = data.shape[2:]
    cx = np.random.uniform(0, image_w)
    cy = np.random.uniform(0, image_h)
    w = image_w * np.sqrt(1 - lam)
    h = image_h * np.sqrt(1 - lam)
    x0 = int(np.round(max(cx - w / 2, 0)))
    x1 = int(np.round(min(cx + w / 2, image_w)))
    y0 = int(np.round(max(cy - h / 2, 0)))
    y1 = int(np.round(min(cy + h / 2, image_h)))

    data[:, :, y0:y1, x0:x1] = shuffled_data[:, :, y0:y1, x0:x1]
    targets = (targets, shuffled_targets, lam)

    return data, targets


class CutMixCollator:
    def __init__(self, alpha, p):
        self.alpha = alpha
        self.p = p

    def __call__(self, batch):
        batch = torch.utils.data.dataloader.default_collate(batch)
        batch = cutmix(batch, self.alpha, self.p)
        return batch

In [None]:
import segmentation_models_pytorch.utils

dice = smp.utils.losses.DiceLoss().to(device)
bce = smp.utils.losses.BCELoss().to(device)
criterion = dice + bce

class CutMixCriterion:
    def __init__(self):
        self.criterion = criterion

    def __call__(self, preds, targets):
        targets1, targets2, lam = targets
        targets1 = targets1.unsqueeze(1)
        targets2 = targets2.unsqueeze(1)
        return lam * self.criterion(
            preds, targets1) + (1 - lam) * self.criterion(preds, targets2)

In [None]:
collator = CutMixCollator(alpha=1.0, p=0.5)

In [None]:
train_ds = SatelliteDataset(csv_file='train.csv', transform=transform)
train_dl = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=2, collate_fn = collator)

In [None]:
train_dl2 = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=2)

##Model Train

In [None]:


# loss function과 optimizer 정의
#dice = smp.utils.losses.DiceLoss()
#bce = smp.utils.losses.BCELoss()
#criterion = dice + bce

train_criterion = CutMixCriterion()

#optimizer = torch.optim.Adam([dict(params=model.parameters(), lr=0.0001),])
optimizer = torch.optim.AdamW([dict(params=model.parameters(), lr=0.0001, weight_decay = 0.001),])
#optimizer = AdamP(params=model.parameters(), lr=0.001, betas=(0.9, 0.999), weight_decay=1e-4)

# training loop
for epoch in range(12):  # 10 에폭 동안 학습합니다.
    model.train()
    model.to('cuda')
    epoch_loss = 0
    for images, masks in tqdm(train_dl):
        images = images.float().to(device)
        targets1, targets2, lam = masks
        masks = (targets1.float().to(device), targets2.float().to(device), lam)

        optimizer.zero_grad()
        outputs = model(images)
        loss = train_criterion(outputs, masks)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {epoch_loss/len(train_dl)}')

100%|██████████| 447/447 [06:35<00:00,  1.13it/s]


Epoch 1, Loss: 0.47684793257606645


100%|██████████| 447/447 [06:35<00:00,  1.13it/s]


Epoch 2, Loss: 0.46629898013417886


100%|██████████| 447/447 [06:36<00:00,  1.13it/s]


Epoch 3, Loss: 0.4696188758416997


100%|██████████| 447/447 [06:34<00:00,  1.13it/s]


Epoch 4, Loss: 0.46042601827540386


100%|██████████| 447/447 [06:35<00:00,  1.13it/s]


Epoch 5, Loss: 0.46407618942010054


100%|██████████| 447/447 [06:36<00:00,  1.13it/s]


Epoch 6, Loss: 0.45874292686814966


100%|██████████| 447/447 [06:36<00:00,  1.13it/s]


Epoch 7, Loss: 0.4844204377741355


100%|██████████| 447/447 [06:36<00:00,  1.13it/s]


Epoch 8, Loss: 0.4412586076780987


100%|██████████| 447/447 [06:36<00:00,  1.13it/s]


Epoch 9, Loss: 0.4649586028667371


100%|██████████| 447/447 [06:35<00:00,  1.13it/s]


Epoch 10, Loss: 0.4499515781566601


100%|██████████| 447/447 [06:34<00:00,  1.13it/s]


Epoch 11, Loss: 0.46342988278908487


100%|██████████| 447/447 [06:35<00:00,  1.13it/s]

Epoch 12, Loss: 0.45735307176174467





In [None]:
torch.save(model.state_dict(), '../drive/MyDrive/ef7+dicebce+adamW+rand224_1+cutmix.pth')

##Inference

In [12]:
model.load_state_dict(torch.load('../drive/MyDrive/ef7+dicebce+adamW+rand224_1+cutmix.pth'))

<All keys matched successfully>

In [34]:
def apply_densecrf(image, mask):
    # Convert mask to 1D label array
    mask = mask.cpu().numpy()
    mask = np.argmax(mask, axis=1)

    # Convert image and mask to 1D arrays
    image = image.cpu().numpy()
    image = np.transpose(image.squeeze(), (1, 2, 0))  # [C, H, W] -> [H, W, C]
    mask = mask.squeeze()

    # Create a dense CRF object
    d = dcrf.DenseCRF2D(image.shape[1], image.shape[0], num_classes)
    U = -np.log(mask)

    # Set unary potentials (neg log probability)
    d.setUnaryEnergy(U)

    # Add pairwise potentials (image-dependent features)
    d.addPairwiseBilateral(sxy=(80, 80), srgb=(13, 13, 13), rgbim=np.copy(image), compat=10)

    # Run inference
    Q = d.inference(5)

    # Convert the results to 1-hot encoding
    one_hot = np.zeros((num_classes, image.shape[0], image.shape[1]), dtype=np.uint8)
    for i in range(num_classes):
        one_hot[i, :, :] = (Q == i).reshape((image.shape[0], image.shape[1]))

    # Convert back to the original shape [C, H, W]
    one_hot = torch.from_numpy(np.expand_dims(one_hot, axis=0)).float()
    return one_hot

In [28]:
pip install ttach



In [15]:
import ttach as tta
transforms = tta.Compose(
    [
        tta.HorizontalFlip(),
        #tta.Rotate90(angles=[0, 90]),
        #tta.Scale(scales=[1,2,4])
    ]
)

In [21]:
test_dataset = SatelliteDataset(csv_file='test.csv', transform=transform_test, infer=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2)

In [37]:
with torch.no_grad():
    model.eval()

    tta_model = tta.SegmentationTTAWrapper(model, transforms)
    tta_model.to(device)
    result = []
    for images in tqdm(test_dataloader):
        images = images.float().to(device)

        outputs = tta_model(images)
        print(images.size())
        output_with_crf = apply_densecrf(images, outputs)
        masks = output_with_crf.cpu().numpy()
        masks = np.squeeze(masks, axis=1)
        masks = (masks > 0.35).astype(np.uint8) # Threshold = 0.35

        for i in range(len(images)):
            mask_rle = rle_encode(masks[i])
            if mask_rle == '': # 예측된 건물 픽셀이 아예 없는 경우 -1
                result.append(-1)
            else:
                result.append(mask_rle)

  0%|          | 0/3790 [00:00<?, ?it/s]

torch.Size([16, 3, 224, 224])


  0%|          | 0/3790 [00:00<?, ?it/s]


ValueError: ignored

In [None]:
with torch.no_grad():
    model.eval()
    model.to(device)
    result = []
    for images in tqdm(test_dataloader):
        images = images.float().to(device)

        outputs = model(images)
        masks = outputs.cpu().numpy()
        masks = np.squeeze(masks, axis=1)
        masks = (masks > 0.35).astype(np.uint8) # Threshold = 0.35

        for i in range(len(images)):
            mask_rle = rle_encode(masks[i])
            if mask_rle == '': # 예측된 건물 픽셀이 아예 없는 경우 -1
                result.append(-1)
            else:
                result.append(mask_rle)

100%|██████████| 3790/3790 [14:52<00:00,  4.25it/s]


##Submission

In [17]:
submit = pd.read_csv('sample_submission.csv')
submit['mask_rle'] = result

NameError: ignored

In [18]:
submit.to_csv('../drive/MyDrive/densecrf_try1.csv', index=False)

In [None]:
test_data = pd.read_csv('test.csv')
submit1 = pd.read_csv('../drive/MyDrive/ef7+dicebce+adamW+rand224_1+cutmix_tta.csv')
submit2 = pd.read_csv('../drive/MyDrive/densecrf_try1.csv')

import matplotlib.pyplot as plt

for i in range(10050,10100):
    test_image_path = test_data['img_path'][i]
    test_image = cv2.imread(test_image_path)
    test_mask1 = rle_decode(submit1['mask_rle'][i], (224,224))
    test_mask2 = rle_decode(submit2['mask_rle'][i], (224,224))

    plt.figure(figsize=(10,10))
    plt.subplot(131)
    plt.imshow(test_image)
    plt.axis("off")
    plt.subplot(132)
    plt.imshow(test_mask1)
    plt.axis("off")
    plt.subplot(133)
    plt.imshow(test_mask2)
    plt.axis("off")

Output hidden; open in https://colab.research.google.com to view.