In [None]:
!pip install torchcontrib

In [None]:
!pip install segmentation-models-pytorch

In [1]:
import os
from time import time

import torch
import torch.nn as nn

import segmentation_models_pytorch as smp
from torchcontrib.optim import SWA

import albumentations as A
from albumentations.pytorch import ToTensorV2

# dataset.py 파일 필요
from dataset import *
from utils import label_accuracy_score, seed_everything, get_current_time

In [2]:
# wandb 사용여부 결정
use_wandb =  # True / False

if use_wandb:
    import wandb
    
    # wandb run
    run = wandb.init(project='p3-img-seg', entity='boostcamp-simple')

[34m[1mwandb[0m: Currently logged in as: [33mjaegyeong[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.28 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


## 하이퍼파라미터 세팅 및 seed 고정

In [3]:
model = "EfficientUnet"
encoder_name = 'timm-efficientnet-b3'
encoder_weight = 'noisy-student'

batch_size = 8
num_epochs = 20
learning_rate = 0.0005
random_seed = 21
weight_decay = 1e-6

In [4]:
if use_wandb:
    # wandb에 사용할 하이퍼파라미터 저장
    config = wandb.config
    config.update({
        "model": model,
        "encoder_name": encoder_name,
        "encoder_weight": encoder_weight,

        "batch_size": batch_size,
        "num_epochs": num_epochs,
        "learning_rate": learning_rate,
        "random_seed": random_seed,
        "weight_decay": weight_decay,
    })

In [5]:
# seed 고정
seed_everything(random_seed)

## 파일명 설정

In [6]:
dd, hh, mm = get_current_time()
# 모델 저장 파일 이름
saved_dir = '/opt/ml/code/saved'
model_file_name = f'Unet_best_model(efficient_net)_swa_{dd}{hh}{mm}.pt'

# 제출 파일 이름
submission_dir = '/opt/ml/code/submission'
submission_file_name = f'Baseline_FCN8s(pretrained)_swa_{dd}{hh}{mm}.csv'

## DataLoader

In [7]:
# train.json / validation.json / test.json 디렉토리 설정
dataset_path = '/opt/ml/input/data'

train_path = os.path.join(dataset_path, 'train.json')
val_path = os.path.join(dataset_path, 'val.json')
test_path = os.path.join(dataset_path, 'test.json')

# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))

train_transform = A.Compose([
                            ToTensorV2()
                            ])

val_transform = A.Compose([
                          ToTensorV2()
                          ])

test_transform = A.Compose([
                           ToTensorV2()
                           ])

# train dataset
train_dataset = CustomDataLoader(data_dir=train_path, mode='train', transform=train_transform)

# validation dataset
val_dataset = CustomDataLoader(data_dir=val_path, mode='val', transform=val_transform)

# test dataset
test_dataset = CustomDataLoader(data_dir=test_path, mode='test', transform=test_transform)


# DataLoader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=4,
                                           collate_fn=collate_fn)

val_loader = torch.utils.data.DataLoader(dataset=val_dataset, 
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=4,
                                         collate_fn=collate_fn)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          num_workers=4,
                                          collate_fn=collate_fn)

loading annotations into memory...
Done (t=3.74s)
creating index...
index created!
loading annotations into memory...
Done (t=1.52s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


## 모델 생성
### Using SMP
[git: segmentation-models-pytorch](https://github.com/qubvel/segmentation_models.pytorch)

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = smp.Unet(
    encoder_name=encoder_name,  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights=encoder_weight,      # use 'noisy-student' pre-trained weights for encoder initialization
    in_channels=3,                        # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=12,                           # model output channels (number of classes in your dataset)
)

model.to(device)
print('Done.')

Done.


## Loss function, Optimizer 정의
### Using SWA
[pytorch: Stochastic Weight Averaging in PyTorch](https://pytorch.org/blog/stochastic-weight-averaging-in-pytorch/)

In [9]:
swa_start = 10
swa_freq = 5
swa_lr = 0.005

In [10]:
if use_wandb:
    # wandb에 사용할 loss, optimizer, 하이퍼 파라미터 저장
    config.update({
        "criterion": "nn.CrossEntropyLoss()"
        "base_optimizer": "Adam"
        "optimizer": "SWA"
        
        "swa_start": swa_start,
        "swa_freq": swa_freq,
        "swa_lr": swa_lr,
    })

In [11]:
# criterion = smp.utils.losses.DiceLoss()
criterion = nn.CrossEntropyLoss()
metrics = [
    smp.utils.metrics.IoU(threshold=0.5),
]

base_optimizer = torch.optim.Adam([ 
    dict(params=model.parameters(), lr=learning_rate),
])
optimizer = SWA(base_optimizer, swa_start=swa_start, swa_freq=swa_freq, swa_lr=swa_lr)

## 모델 저장 함수 정의

In [12]:
# 모델 저장 함수 정의
val_every = 1 

if not os.path.isdir(saved_dir):                                                           
    os.mkdir(saved_dir)
    
def save_model(model, saved_dir, file_name=model_file_name):
    check_point = {'net': model.state_dict()}
    output_path = os.path.join(saved_dir, file_name)
    torch.save(model.state_dict(), output_path)

## train, validation 함수 정의

In [13]:
def train(num_epochs, model, data_loader, val_loader, criterion, optimizer, saved_dir, val_every, device):
    print('Start training..')
    best_loss = 9999999
    for epoch in range(num_epochs):
        model.train()
        for step, (images, masks, _) in enumerate(data_loader):
            images = torch.stack(images)       # (batch, channel, height, width)
            masks = torch.stack(masks).long()  # (batch, channel, height, width)
            
            # gpu 연산을 위해 device 할당
            images, masks = images.to(device), masks.to(device)
                  
            # inference
            outputs = model(images)
            
            # loss 계산 (cross entropy loss)
            loss = criterion(outputs, masks)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if step > 10 and step % 5 == 0:
                optimizer.update_swa()
            
            # step 주기에 따른 loss 출력
            if (step + 1) % 25 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(
                    epoch+1, num_epochs, step+1, len(train_loader), loss.item()))
                if use_wandb:
                    wandb.log({"train_loss": loss})
                
        # validation 주기에 따른 loss 출력 및 best model 저장
        if (epoch + 1) % val_every == 0:
            avrg_loss = validation(epoch + 1, model, val_loader, criterion, device)
            if avrg_loss < best_loss:
                print('Best performance at epoch: {}'.format(epoch + 1))
                print('Save model in', saved_dir)
                best_loss = avrg_loss
                save_model(model, saved_dir)
                
    optimizer.swap_swa_sgd()

In [14]:
def validation(epoch, model, data_loader, criterion, device):
    print('Start validation #{}'.format(epoch))
    model.eval()
    with torch.no_grad():
        total_loss = 0
        cnt = 0
        mIoU_list = []
        for step, (images, masks, _) in enumerate(data_loader):
            
            images = torch.stack(images)       # (batch, channel, height, width)
            masks = torch.stack(masks).long()  # (batch, channel, height, width)

            images, masks = images.to(device), masks.to(device)   

            outputs = model(images)
            loss = criterion(outputs, masks)
            total_loss += loss
            cnt += 1
            
            outputs = torch.argmax(outputs.squeeze(), dim=1).detach().cpu().numpy()

            mIoU = label_accuracy_score(masks.detach().cpu().numpy(), outputs, n_class=12)[2]
            mIoU_list.append(mIoU)
            
        avrg_loss = total_loss / cnt
        mIoU = np.mean(mIoU_list)
        print('Validation #{}  Average Loss: {:.4f}, mIoU: {:.4f}'.format(epoch, avrg_loss, mIoU))
        if use_wandb:
            wandb.log({"val_loss": avrg_loss, "val_mIoU": mIoU})

    return avrg_loss

In [15]:
# 학습시작 시간 출력
s_dd, s_hh, s_mm = get_current_time()

# 학습 시작
train(num_epochs, model, train_loader, val_loader, criterion, optimizer, saved_dir, val_every, device)

# 학습종료 시간 출력
e_dd, e_hh, e_mm = get_current_time()
print(f'start: {s_dd}일 {s_hh}시 {s_mm}분')
print(f'end: {e_dd}일 {e_hh}시 {e_mm}분')

start: 30일 19시 48분
Start training..
Epoch [1/20], Step [25/328], Loss: 0.8298
Epoch [1/20], Step [50/328], Loss: 0.7281
Epoch [1/20], Step [75/328], Loss: 0.6412
Epoch [1/20], Step [100/328], Loss: 0.5672
Epoch [1/20], Step [125/328], Loss: 0.6438
Epoch [1/20], Step [150/328], Loss: 0.8134
Epoch [1/20], Step [175/328], Loss: 0.6566
Epoch [1/20], Step [200/328], Loss: 1.0190
Epoch [1/20], Step [225/328], Loss: 0.6752
Epoch [1/20], Step [250/328], Loss: 0.8437
Epoch [1/20], Step [275/328], Loss: 0.5398
Epoch [1/20], Step [300/328], Loss: 0.5871
Epoch [1/20], Step [325/328], Loss: 0.9595
Start validation #1
Validation #1  Average Loss: 0.5960, mIoU: 0.2355
Best performance at epoch: 1
Save model in /opt/ml/code/saved
Epoch [2/20], Step [25/328], Loss: 0.4969
Epoch [2/20], Step [50/328], Loss: 0.7750
Epoch [2/20], Step [75/328], Loss: 0.6191
Epoch [2/20], Step [100/328], Loss: 1.0470
Epoch [2/20], Step [125/328], Loss: 0.7540
Epoch [2/20], Step [150/328], Loss: 0.6821
Epoch [2/20], Step [1

In [16]:
if use_wandb:
    # wandb 종료
    run.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_loss,0.22455
_runtime,4959.0
_timestamp,1619784618.0
_step,279.0
val_loss,0.42778
val_mIoU,0.32232


0,1
train_loss,▄█▄▆▃▃▅▄▃▂▁▃█▃▂▃▆▃▁▃▂▃▂▃▄▂▃▂▄▁▃▄▂▄▆▂▁▃▂▁
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
val_loss,▇▆▇▅█▄▃▃▂▂▃▂▃▁▂▁▂▃▂▁
val_mIoU,▁▂▁▂▂▅▃▅▅▅▄▅▆▃▆▇▆▅▆█


## 저장된 model 불러오기 (학습된 이후)

In [17]:
# best model 저장된 경로
model_path = os.path.join(saved_dir, model_file_name)

# best model 불러오기
checkpoint = torch.load(model_path, map_location=device)
model.load_state_dict(checkpoint)

# 추론을 실행하기 전에는 반드시 설정 (batch normalization, dropout 를 평가 모드로 설정)
# model.eval() # test 함수에서 model.eval() 실행
print('Done.')

Done.


## submission을 위한 test 함수 정의

In [18]:
def test(model, data_loader, device):
    size = 256
    transform = A.Compose([A.Resize(256, 256)])
    print('Start prediction.')
    model.eval()
    
    file_name_list = []
    preds_array = np.empty((0, size*size), dtype=np.long)
    
    with torch.no_grad():
        for step, (imgs, image_infos) in enumerate(test_loader):

            # inference (512 x 512)
            outs = model(torch.stack(imgs).to(device))
            oms = torch.argmax(outs.squeeze(), dim=1).detach().cpu().numpy()
            
            # resize (256 x 256)
            temp_mask = []
            for img, mask in zip(np.stack(imgs), oms):
                transformed = transform(image=img, mask=mask)
                mask = transformed['mask']
                temp_mask.append(mask)

            oms = np.array(temp_mask)
            
            oms = oms.reshape([oms.shape[0], size*size]).astype(int)
            preds_array = np.vstack((preds_array, oms))
            
            file_name_list.append([i['file_name'] for i in image_infos])
    print("End prediction.")
    file_names = [y for x in file_name_list for y in x]
    
    return file_names, preds_array

## submission.csv 생성

In [19]:
# sample_submisson.csv 열기
submission = pd.read_csv(os.path.join(submission_dir, 'sample_submission.csv'), index_col=None)

# test set에 대한 prediction
file_names, preds = test(model, test_loader, device)

# PredictionString 대입
for file_name, string in zip(file_names, preds):
    submission = submission.append({"image_id" : file_name, "PredictionString" : ' '.join(str(e) for e in string.tolist())}, 
                                   ignore_index=True)

# submission.csv로 저장
submission.to_csv(os.path.join(submission_dir, submission_file_name), index=False)

Start prediction.
End prediction.
