In [1]:
!pip install torchsummary

Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1
[0m

In [2]:
import os
import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision import models
import torch.optim as optim
from torchsummary import summary as model_summary

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Utils

In [3]:
# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

### Custom Dataset

In [4]:
class SatelliteDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None, infer=False):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform
        self.infer = infer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_filename = self.data.iloc[idx, 1].lstrip('.')
        img_path = self.image_dir + img_filename
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
            return image

        mask_rle = self.data.iloc[idx, 2]
        mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

### DataLoader

In [5]:
transform = A.Compose(
    [   
        A.Resize(224, 224),
        A.Normalize(),
        ToTensorV2()
    ]
)

dataset = SatelliteDataset(csv_file='/kaggle/input/dacon-building-data/train.csv', image_dir='/kaggle/input/dacon-building-data', transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=2)

### Build Model

In [6]:
class FCN8s(nn.Module):
    def __init__(self, num_classes=1):
        super(FCN8s, self).__init__()

        ##### Build VGG #####
        # 3x3 Conv 1 Block
        self.conv1 = nn.Sequential(
            # 1 Conv
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1), # 512
            nn.ReLU(inplace=True),
            # 2 Conv
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1), # 512
            nn.ReLU(inplace=True),
            # 3 Max Pooling
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True) # 256
        )
        # 3x3 Conv 2 Block
        self.conv2 = nn.Sequential(
            # 1 Conv
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1), # 256
            nn.ReLU(inplace=True),
            # 2 Conv
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1), # 256
            nn.ReLU(inplace=True),
            # 3 Max Pooling
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True) # 128
        )
        # 3x3 Conv 3 Block
        self.conv3 = nn.Sequential(
            # 1 Conv
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), # 128
            nn.ReLU(inplace=True),
            # 2 Conv
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1), # 128
            nn.ReLU(inplace=True),
            # 3 Conv
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1), # 128
            nn.ReLU(inplace=True),
            # 4 Max Pooling
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True) # 64
        )
        # Score Pool 3 Block
        self.score_3b = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=1, stride=1, padding=0)
        )        
        # 3x3 Conv 4 Block
        self.conv4 = nn.Sequential(
            # 1 Conv
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1), # 64
            nn.ReLU(inplace=True),
            # 2 Conv
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), # 64
            nn.ReLU(inplace=True),
            # 3 Conv
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), # 64
            nn.ReLU(inplace=True),
            # 4 Max Pooling
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True) # 32
        )
        # Score Pool 4 Block
        self.score_4b = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1, stride=1, padding=0)
        )     
        # 3x3 Conv 5 Block
        self.conv5 = nn.Sequential(
            # 1 Conv
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), # 32
            nn.ReLU(inplace=True),
            # 2 Conv
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), # 32
            nn.ReLU(inplace=True),
            # 3 Conv
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), # 32
            nn.ReLU(inplace=True),
            # 4 Max Pooling
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True) # 16
        )
        ##### Build FCN ####

        # 1x1 Conv 6 FC
        self.fc6 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=4096, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
            nn.Dropout2d()
        )
        # 1x1 Conv 7 FC
        self.fc7 = nn.Sequential(
            nn.Conv2d(in_channels=4096, out_channels=4096, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
            nn.Dropout2d()
        )
        # 1x1 Conv 8 FC (Score)
        self.fc8 = nn.Conv2d(in_channels=4096, out_channels=num_classes, kernel_size=1, stride=1, padding=0)
        
        # Up Score
        self.upscore = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=4, stride=2, padding=1)
        # Up Score Conv4
        self.upscore_4b = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=4, stride=2, padding=1)
        # Up Score Conv3
        self.upscore_3b = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=16, stride=8, padding=4)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        
        x = self.conv3(x)        
        s3 = self.score_3b(x)
        
        x = self.conv4(x)
        s4 = self.score_4b(x)
        
        x = self.conv5(x)
        x = self.fc6(x)
        x = self.fc7(x)
        x = self.fc8(x)
        
        us = self.upscore(x)
        sum_us_s4 = us + s4
        
        us4 = self.upscore_4b(sum_us_s4)
        sum_us4_s3 = us4 + s3
        
        us3 = self.upscore_3b(sum_us4_s3)
        
        return us3
    

model = FCN8s()
model_summary(model, input_size=(3, 256, 256), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 256]           1,792
              ReLU-2         [-1, 64, 256, 256]               0
            Conv2d-3         [-1, 64, 256, 256]          36,928
              ReLU-4         [-1, 64, 256, 256]               0
         MaxPool2d-5         [-1, 64, 128, 128]               0
            Conv2d-6        [-1, 128, 128, 128]          73,856
              ReLU-7        [-1, 128, 128, 128]               0
            Conv2d-8        [-1, 128, 128, 128]         147,584
              ReLU-9        [-1, 128, 128, 128]               0
        MaxPool2d-10          [-1, 128, 64, 64]               0
           Conv2d-11          [-1, 256, 64, 64]         295,168
             ReLU-12          [-1, 256, 64, 64]               0
           Conv2d-13          [-1, 256, 64, 64]         590,080
             ReLU-14          [-1, 256,

### Train Model

In [7]:
# model 초기화
model = FCN8s().to(device)
# if torch.cuda.device_count() == 2:
#     _net = FCN8s().cuda()
#     model = nn.DataParallel(_net).to(device)

# loss function과 optimizer 정의
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# training loop
best_loss = float('inf')  # 초기 최소 손실값을 무한대로 설정
patience = 3  # 손실이 감소하지 않더라도 몇 번까지 기다릴지 지정
num_bad_epochs = 0  # 기다리는 동안 손실이 감소하지 않은 에폭 수

for epoch in range(10):  # 10 에폭 동안 학습합니다.
    model.train()
    epoch_loss = 0
    for images, masks in tqdm(dataloader):
        images = images.float().to(device)
        masks = masks.float().to(device)

        optimizer.zero_grad()
        # Forward pass
        outputs = model(images)
        
        # Compute loss
        loss = criterion(outputs, masks.unsqueeze(1))
        
        # Backward pass
        loss.backward()
        
        # Update weights
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(dataloader)
    print(f'Epoch {epoch+1}, Loss: {avg_loss}')
    
    # Early stopping check
    if avg_loss < best_loss:
        best_loss = avg_loss
        num_bad_epochs = 0
    else:
        num_bad_epochs += 1
        if num_bad_epochs >= patience:
            print('Early stopping triggered. Training stopped.')
            break

100%|██████████| 447/447 [04:09<00:00,  1.79it/s]


Epoch 1, Loss: 0.22316076051468817


100%|██████████| 447/447 [03:46<00:00,  1.97it/s]


Epoch 2, Loss: 0.13991661446473208


100%|██████████| 447/447 [03:54<00:00,  1.91it/s]


Epoch 3, Loss: 0.1268857919196421


100%|██████████| 447/447 [03:53<00:00,  1.91it/s]


Epoch 4, Loss: 0.1205821404154402


100%|██████████| 447/447 [03:57<00:00,  1.88it/s]


Epoch 5, Loss: 0.11473511367769583


100%|██████████| 447/447 [03:58<00:00,  1.88it/s]


Epoch 6, Loss: 0.11156692125853276


100%|██████████| 447/447 [03:55<00:00,  1.90it/s]


Epoch 7, Loss: 0.10806163453749096


100%|██████████| 447/447 [03:56<00:00,  1.89it/s]


Epoch 8, Loss: 0.10592983003864086


100%|██████████| 447/447 [03:53<00:00,  1.91it/s]


Epoch 9, Loss: 0.10364015119554479


100%|██████████| 447/447 [03:53<00:00,  1.92it/s]

Epoch 10, Loss: 0.10205700956868378





### Save & Load Model

In [9]:
#Save the model
save_path = '/kaggle/working/model.pth'
torch.save(model.state_dict(), save_path)

In [None]:
# Load the model
# model = FCN8s()
# model.load_state_dict(torch.load('/kaggle/input/trained-model/' + 'model.pth'))

### Inference

In [10]:
test_dataset = SatelliteDataset(csv_file='/kaggle/input/dacon-building-data/test.csv', image_dir='/kaggle/input/dacon-building-data', transform=transform, infer=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2)

In [11]:
with torch.no_grad():
    model.eval()
    result = []
    for images in tqdm(test_dataloader):
        images = images.float().to(device)
        
        outputs = model(images)
        masks = torch.sigmoid(outputs).cpu().numpy()
        masks = np.squeeze(masks, axis=1)
        masks = (masks > 0.35).astype(np.uint8) # Threshold = 0.35
        
        for i in range(len(images)):
            mask_rle = rle_encode(masks[i])
            if mask_rle == '': # 예측된 건물 픽셀이 아예 없는 경우 -1
                result.append(-1)
            else:
                result.append(mask_rle)

100%|██████████| 3790/3790 [06:25<00:00,  9.83it/s]


### Submission

In [12]:
submit = pd.read_csv('/kaggle/input/dacon-building-data/sample_submission.csv')
submit['mask_rle'] = result

In [13]:
submit.to_csv('./submit.csv', index=False)