In [1]:
!pip install transformers



In [2]:
!pip install lightning

Collecting lightning
  Downloading lightning-2.0.6-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting croniter<1.5.0,>=1.3.0 (from lightning)
  Downloading croniter-1.4.1-py2.py3-none-any.whl (19 kB)
Collecting dateutils<2.0 (from lightning)
  Downloading dateutils-0.6.12-py2.py3-none-any.whl (5.7 kB)
Collecting deepdiff<8.0,>=5.7.0 (from lightning)
  Downloading deepdiff-6.3.1-py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.7/70.7 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
Collecting inquirer<5.0,>=2.10.0 (from lightning)
  Downloading inquirer-3.1.3-py3-none-any.whl (18 kB)
Collecting lightning-cloud>=0.5.37 (from lightning)
  Downloading lightning_cloud-0.5.37-py3-none-any.whl (596 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m596.7/596.7 kB[0m [31m40.5 MB/s[0m eta [36m0:00:00[0m
Collecting 

In [3]:
!pip install -U git+https://github.com/qubvel/segmentation_models.pytorch

Collecting git+https://github.com/qubvel/segmentation_models.pytorch
  Cloning https://github.com/qubvel/segmentation_models.pytorch to /tmp/pip-req-build-t3wf2dtx
  Running command git clone --filter=blob:none --quiet https://github.com/qubvel/segmentation_models.pytorch /tmp/pip-req-build-t3wf2dtx
  Resolved https://github.com/qubvel/segmentation_models.pytorch to commit e5d3db20e9c2ddb76f88642409e527239943c983
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting pretrainedmodels==0.7.4 (from segmentation-models-pytorch==0.3.3)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting efficientnet-pytorch==0.7.1 (from segmentation-models-pytorch==0.3.3)
  Downloading efficientnet_py

In [4]:
pip install wandb

Note: you may need to restart the kernel to use updated packages.


In [5]:
import os
import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision import models
from datasets import load_metric
from torchmetrics.functional import dice
import torch.optim as optim
import lightning as pl
import segmentation_models_pytorch as smp

from transformers import SegformerForSemanticSegmentation
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


### Utils

In [6]:
# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

#RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

### Custom Dataset

In [7]:
class SatelliteDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None, infer=False):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform
        self.infer = infer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_filename = self.data.iloc[idx, 1].lstrip('.')
        img_path = self.image_dir + img_filename
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
                
            sample = {'pixel_values': image}
            
            return sample

        mask_rle = self.data.iloc[idx, 2]
        mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']
        
        mask = torch.unsqueeze(mask, dim=0)
    
        sample = {'pixel_values': image, 'labels': mask}
        
        return sample

### DataLoader

In [8]:
transform = A.Compose(
    [
        A.RandomCrop(224, 224),
        A.Rotate(limit=60),
        A.CoarseDropout(max_holes=8, max_height=0.25, max_width=0.25, min_holes=3, min_height=0.125, min_width=0.125, fill_value=0, mask_fill_value=0, p=1),
        ToTensorV2()
    ]
)

In [9]:
dataset = SatelliteDataset(csv_file='/kaggle/input/dacon-building-data/train.csv', image_dir='/kaggle/input/dacon-building-data', transform=transform)
## 배치 사이즈 설정
dataloader = DataLoader(dataset, batch_size=16, shuffle=False, num_workers=2)

### Build Model

In [10]:
class SegFormerModel(pl.LightningModule):
    def __init__(self, train_dataloader=None, val_dataloader=None, test_dataloader=None, metrics_interval=10, model=None):
        super(SegFormerModel, self).__init__()
        #self.learningrate = learning_rate
        self.metrics_interval = metrics_interval
        self.train_dl = train_dataloader
        self.val_dl = val_dataloader
        self.test_dl = test_dataloader
        self.model = model or get_initial_model()
        #self.loss_module = smp.losses.SoftBCEWithLogitsLoss(weight=None, reduction=None, smooth_factor=None, pos_weight=None)
        self.loss_module = smp.losses.DiceLoss(mode="binary", smooth=1.0, from_logits=True)
        self.train_step_ious= []
        self.validation_step_ious = []
        self.validation_step_outputs = []
        self.test_step_outputs = []
        self.save_hyperparameters()
        
    def forward(self, images, masks=None):
        outputs = self.model(pixel_values=images)
        return outputs
    
    def training_step(self, batch, batch_idx):
        masks = torch.squeeze(batch['labels']).float().to(device)
        masks = masks.unsqueeze(1).to(device)
        images = batch['pixel_values'].float().to(device)
            
        outputs = self.model(pixel_values=images, return_dict=True)
        
        upsampled_logits = nn.functional.interpolate(
            outputs.logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        ).contiguous().to(device)
        
    # predicted = upsampled_logits.argmax(dim=1)
        loss = self.loss_module(upsampled_logits, masks)
        tp, fp, fn, tn = smp.metrics.get_stats((upsampled_logits.sigmoid()>0.5).long(), masks.long(), mode='binary')
        iou = smp.metrics.iou_score(tp, fp, fn, tn, reduction="micro-imagewise")
        self.train_step_ious.append(iou)
    
        if batch_idx % self.metrics_interval == 0:
            mean_iou = torch.stack(self.train_step_ious).mean()
            # Log loss and metric
            self.log('train_loss', loss)
            self.log('train_mean_iou',  mean_iou)
            
            #print(f"Training loss: {loss:.5f}")
            #print("\n-----------------------")

        return {'loss': loss}
    
    
#     def validation_step(self, batch, batch_idx):
#         masks =  torch.squeeze(batch['labels']).long().to(device)
#         masks = nn.functional.one_hot(masks, num_classes=self.num_classes).permute(0, 3, 1, 2).contiguous().to(device)
#         images = batch['pixel_values'].float().to(device)
        
#         outputs = self.model(pixel_values=images, return_dict=True)
        
#         upsampled_logits = nn.functional.interpolate(
#             outputs.logits, 
#             size=masks.shape[-2:], 
#             mode="bilinear", 
#             align_corners=False
#         ).contiguous()

#         predicted = upsampled_logits.argmax(dim=1).to(device)
#         loss = self.loss_module(upsampled_logits, masks)
    
#         tp, fp, fn, tn = smp.metrics.get_stats((upsampled_logits.sigmoid()>0.5).long(), masks.long(), mode='binary')
#         iou = smp.metrics.iou_score(tp, fp, fn, tn, reduction="micro-imagewise")
        
#         self.validation_step_ious.append(iou)
#         self.validation_step_outputs.append(loss)
        
#         # Log loss and metric
#         self.log('val_loss', loss)
#         self.log(f"IoU", iou)
        
#         print(f"Val Batch {batch_idx+1}: Metrics")
#         print(f"-----------------------\nStep Validation Loss: {loss:.5f}")
#         print("\n-----------------------")
        
#         return {'val_loss': loss, 'predicted': predicted}
    
    
#     def on_validation_epoch_end(self):
#         epoch_average_loss = torch.stack(self.validation_step_outputs).mean()
#         val_step_mean_iou = torch.stack(self.validation_step_ious).mean()
 
#         metrics = {"val_loss": epoch_average_loss, "val_mean_iou":val_step_mean_iou, }
        
#         print(f"Val Epoch Metrics")
#         print(f"Epoch IoU score: {val_step_mean_iou:.3f}\n-----------------------")    
#         self.validation_step_outputs.clear()  # free memory
#         return metrics
    
#     def test_step(self, batch, batch_idx):
#         images = batch['pixel_values'].float().to(device)
#         outputs = self.model(images, return_dict=True)
        
#         upsampled_logits = nn.functional.interpolate(
#             outputs.logits, 
#             size=images.shape[-2:], 
#             mode="bilinear", 
#             align_corners=False
#         ).contiguous()
        
#         return upsampled_logits
        
        
    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        images = batch['pixel_values'].float().to(device)
        return self.model(images, return_dict=True)
        
    def configure_optimizers(self):
        ## lr 설정 필요
        optimizer = torch.optim.AdamW([p for p in self.parameters() if p.requires_grad], lr=5e-04, eps=1e-07)
        #scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, 1e-5, 5e-3, step_size_up=2000, step_size_down=None, mode='triangular', gamma=1.0, scale_fn=None, scale_mode='cycle', base_momentum=0.8, max_momentum=0.9, last_epoch=- 1, verbose=False)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max= 500, eta_min= 1e-06, last_epoch= -1)
        #return {"optimizer": optimizer, "monitor": "train_loss"}
        
        return {"optimizer": optimizer, "lr_scheduler": {"scheduler": scheduler, "interval": "step"}, "monitor": "train_loss"}
    
    def train_dataloader(self):
        return self.train_dl
    
#     def val_dataloader(self):
#         return self.val_dl
    
    def test_dataloader(self):
        return self.test_dl

In [11]:
def get_initial_model():
    return SegformerForSemanticSegmentation.from_pretrained(
            "nvidia/mit-b4", 
            return_dict=True, 
            num_labels=1,
            ignore_mismatched_sizes=True,
            )

In [12]:
import wandb

# loss 모니터링 외부api wanb사이트에서 키 받아서 수정
wandb_api = '0b6338174b96d25fc3dd9e12a4cf72eef795d891'
wandb.login(key=wandb_api)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [13]:
from lightning.pytorch.callbacks import Callback
from lightning.pytorch.callbacks import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import WandbLogger
from lightning.pytorch.callbacks import LearningRateMonitor


# 얼리스탑 여부 수정 부분
# min_delta -> 최소 로스 변화량
# patience -> 값이 k번 이내로 줄어야함
early_stop_callback = EarlyStopping(
    monitor="train_loss", 
    min_delta=0.01, 
    patience=100, 
    verbose=False, 
    mode="min",
)

checkpoint_callback = ModelCheckpoint(dirpath='/kaggle/working/checkpoint', save_top_k=1, monitor="train_loss")

wandb_logger = WandbLogger(project='seg-b4-test', log_model='all')

# class FineTuneBatchSizeFinder(BatchSizeFinder):
#     def __init__(self, milestones, *args, **kwargs):
#         super().__init__(*args, **kwargs)
#         self.milestones = milestones

#     def on_fit_start(self, *args, **kwargs):
#         return

#     def on_train_epoch_start(self, trainer, pl_module):
#         if trainer.current_epoch in self.milestones or trainer.current_epoch == 0:
#             self.scale_batch_size(trainer, pl_module)
            
            
# batch_size_callback = FineTuneBatchSizeFinder(milestones=(5, 10))

lr_monitor_callback = LearningRateMonitor(logging_interval='step')

[34m[1mwandb[0m: Currently logged in as: [33mihobbang250[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [14]:
segformer = SegFormerModel( 
    train_dataloader = dataloader,
    metrics_interval = 5
)

trainer = pl.Trainer(
    callbacks=[early_stop_callback, checkpoint_callback, lr_monitor_callback],
    max_epochs=20,
    accelerator="gpu",
    devices=1,
    logger=wandb_logger
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/70.0k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/246M [00:00<?, ?B/s]

Some weights of the model checkpoint at nvidia/mit-b4 were not used when initializing SegformerForSemanticSegmentation: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b4 and are newly initialized: ['decode_head.classifier.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.3.proj.bias', 'decode_head.batch_norm.running_mean', 'decode_head.linear_c.2.proj.weight', 'decode_head.line

In [15]:
checkpoint_model = segformer.load_from_checkpoint('/kaggle/input/ckckckck/epoch68-step15456.ckpt', map_location=device)

Some weights of the model checkpoint at nvidia/mit-b4 were not used when initializing SegformerForSemanticSegmentation: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b4 and are newly initialized: ['decode_head.classifier.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.3.proj.bias', 'decode_head.batch_norm.running_mean', 'decode_head.linear_c.2.proj.weight', 'decode_head.line

In [16]:
trainer.fit(checkpoint_model)

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name        | Type                             | Params
-----------------------------------------------------------------
0 | model       | SegformerForSemanticSegmentation | 64.0 M
1 | loss_module | DiceLoss                         | 0     
-----------------------------------------------------------------
64.0 M    Trainable params
0         Non-trainable params
64.0 M    Total params
255.975   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=20` reached.


### Save & Load Model

In [17]:
#checkpoint_callback.best_model_path

In [18]:
#checkpoint_model = segformer.load_from_checkpoint(checkpoint_callback.best_model_path, map_location=torch.device('cpu'))

### Inference

In [19]:
# load checkpoint
#checkpoint_model.eval()

In [25]:
test_dataset = SatelliteDataset(csv_file='/kaggle/input/dacon-building-data/test.csv', image_dir='/kaggle/input/dacon-building-data', transform=transform, infer=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2)

In [26]:
# Not Use ckpt
outputs = trainer.predict(segformer, test_dataloader)

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [22]:
result = []

for i,data in tqdm(enumerate(test_dataloader)):
    image = data['pixel_values'].to('cpu')
    upsampled_logits = nn.functional.interpolate(
        outputs[i].logits, 
        size=image.shape[-2:], 
        mode="bilinear", 
        align_corners=False
    ).contiguous().to('cpu')
    predicted = (upsampled_logits.sigmoid()>0.5).long().cpu().numpy()
    predicted = np.squeeze(predicted, axis=1)
    for i in range(len(image)):
        mask_rle = rle_encode(predicted[i])
        if mask_rle == '': 
            result.append(-1)
        else:
            result.append(mask_rle)

3790it [05:17, 11.93it/s]


### Submission

In [23]:
submit = pd.read_csv('/kaggle/input/dacon-building-data/sample_submission.csv')
submit['mask_rle'] = result

In [24]:
submit.to_csv('./submit1.csv', index=False)