In [3]:
import os
import random
import time
import json
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from pycocotools.coco import COCO
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2

import matplotlib.pyplot as plt
from torch.cuda.amp import GradScaler, autocast
from sklearn.model_selection import StratifiedKFold
import segmentation_models_pytorch as smp

from utils import *
from train import *

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print (f"This notebook use {device}")

This notebook use cuda:0


In [22]:
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
    
def killmemory():
    gc.collect()
    torch.cuda.empty_cache()

def get_classname(classID, cats):
    for i in range(len(cats)):
        if cats[i]['id']==classID:
            return cats[i]['name']
    return "None"
    
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [23]:
class PseudoKFoldDataset(Dataset):
    """COCO format"""
    def __init__(self, dataset, transform = None):
        super().__init__()
        self.dataset = dataset
        self.transform = transform
        self.coco = COCO('/opt/ml/input/data/train_all.json')
        self.dataset_path = '/opt/ml/input/data/'
        self.category_names = ['Backgroud', 'UNKNOWN', 'General trash', 'Paper', 'Paper pack', 'Metal', 'Glass', 'Plastic', 'Styrofoam', 'Plastic bag', 'Battery', 'Clothing']
        
    def __getitem__(self, index: int):
        
        ### load image ###
        image_infos = self.dataset[index]
        images = cv2.imread(self.dataset_path+image_infos['file_name'])
        images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.float32)
        images /= 255.0
        ### Pseudo mask ###
        if image_infos['pseudo']:
            masks = np.load(self.dataset_path+image_infos['mask_path'])
            
        ### Train mask ###
        else:
            ann_ids = self.coco.getAnnIds(imgIds=image_infos['id'])
            anns = self.coco.loadAnns(ann_ids)
            cat_ids = self.coco.getCatIds()
            cats = self.coco.loadCats(cat_ids)
            
            masks = np.zeros((image_infos["height"], image_infos["width"]))
            for i in range(len(anns)):
                className = get_classname(anns[i]['category_id'], cats)
                pixel_value = self.category_names.index(className)
                masks = np.maximum(self.coco.annToMask(anns[i])*pixel_value, masks)            
        masks = masks.astype(np.float32)

        ###  augmentation ###
        if self.transform is not None:
            transformed = self.transform(image=images, mask=masks)
            images = transformed["image"]
            masks = transformed["mask"]
        return images, masks
    
    def __len__(self):
        return len(self.dataset)

In [26]:
config_name = 'config26'

args = EasyDict()
with open(f'./config/{config_name}.json', 'r') as f:
    args.update(json.load(f))

In [27]:
kfold_dataset = np.load('/opt/ml/input/data/pseudo_kfold_all.npy', allow_pickle=True)
anns_cnt = np.load('/opt/ml/input/data/pseudo_kfold_anns.npy')

In [31]:
train_kfold(args)

loading annotations into memory...
Done (t=4.65s)
creating index...
index created!
loading annotations into memory...
Done (t=5.58s)
creating index...
index created!
* Start Training...
--------------------------------------------------------------------------------
* Epoch 1
Epoch [1/20], Step [25/403], Loss: 2.4334, LR:0.000002
Epoch [1/20], Step [50/403], Loss: 2.1346, LR:0.000002
Epoch [1/20], Step [75/403], Loss: 1.9050, LR:0.000002
Epoch [1/20], Step [100/403], Loss: 1.9665, LR:0.000002
Epoch [1/20], Step [125/403], Loss: 1.7513, LR:0.000002
Epoch [1/20], Step [150/403], Loss: 1.4076, LR:0.000002
Epoch [1/20], Step [175/403], Loss: 1.5962, LR:0.000002
Epoch [1/20], Step [200/403], Loss: 1.5084, LR:0.000002
Epoch [1/20], Step [225/403], Loss: 1.7570, LR:0.000002
Epoch [1/20], Step [250/403], Loss: 1.5724, LR:0.000002
Epoch [1/20], Step [275/403], Loss: 1.4122, LR:0.000002
Epoch [1/20], Step [300/403], Loss: 1.4251, LR:0.000002
Epoch [1/20], Step [325/403], Loss: 1.6514, LR:0.00000

In [None]:
config_name = 'config28'

args = EasyDict()
with open(f'./config/{config_name}.json', 'r') as f:
    args.update(json.load(f))

train_kfold(args)

loading annotations into memory...
Done (t=4.56s)
creating index...
index created!
loading annotations into memory...
Done (t=5.42s)
creating index...
index created!
* Start Training...
--------------------------------------------------------------------------------
* Epoch 1
Epoch [1/20], Step [25/403], Loss: 2.3865, LR:0.000002
Epoch [1/20], Step [50/403], Loss: 2.0117, LR:0.000002
Epoch [1/20], Step [75/403], Loss: 1.7653, LR:0.000002
Epoch [1/20], Step [100/403], Loss: 1.7177, LR:0.000002
Epoch [1/20], Step [125/403], Loss: 1.6374, LR:0.000002
Epoch [1/20], Step [150/403], Loss: 1.2940, LR:0.000002
Epoch [1/20], Step [175/403], Loss: 1.4389, LR:0.000002
Epoch [1/20], Step [200/403], Loss: 1.3721, LR:0.000002
Epoch [1/20], Step [225/403], Loss: 1.6448, LR:0.000002
Epoch [1/20], Step [250/403], Loss: 1.4354, LR:0.000002
Epoch [1/20], Step [275/403], Loss: 1.3102, LR:0.000002
Epoch [1/20], Step [300/403], Loss: 1.2596, LR:0.000002
Epoch [1/20], Step [325/403], Loss: 1.4315, LR:0.00000

In [30]:
def killmemory():
    gc.collect()
    torch.cuda.empty_cache()

def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

def create_dir(path):
    if not os.path.isdir(path):
        os.mkdir(path)

def collate_fn(batch):
    return tuple(zip(*batch))

def train_kfold(args):
    seed_everything(args.seed)
    warnings.filterwarnings(action='ignore')
    create_dir('./saved_model')
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    
    kfold_dataset = np.load('/opt/ml/input/data/pseudo_kfold_all.npy', allow_pickle=True)
    anns_cnt = np.load('/opt/ml/input/data/pseudo_kfold_anns.npy')
    
    skf = StratifiedKFold(n_splits=5)
    for k, (train_idx, valid_idx) in enumerate(skf.split(kfold_dataset, anns_cnt)):
    
        # setting 
        killmemory()

        # augmentation
        train_transform_module = getattr(import_module("augmentation"), args.augmentation)
        train_transform = train_transform_module(args.augp, args.resize)
        val_transform_module = getattr(import_module("augmentation"), 'ValAugmentation')
        val_transform = val_transform_module(args.resize)

        # dataset
        train_dataset = PseudoKFoldDataset(dataset=kfold_dataset[train_idx], transform=train_transform)
        val_dataset = PseudoKFoldDataset(dataset=kfold_dataset[valid_idx], transform=train_transform)
        
        # data loader
        train_loader = DataLoader(
            dataset=train_dataset,
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=1,
            collate_fn=collate_fn,
            drop_last=True
        )
        val_loader = DataLoader(
            dataset=val_dataset,
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=1,
            collate_fn=collate_fn
        )

        # model
        model_module = getattr(import_module("model"), args.model)
        model = model_module(num_classes=12)
        model.to(device)

        # training
        print('* Start Training...')

        # criterion_module = getattr(import_module("torch.nn"), args.loss)
        # custom loss를 사용하고 싶으면 아래처럼 사용
        criterion_module = getattr(import_module("loss"), args.loss)   
        criterion = criterion_module()

        optimizer_module = getattr(import_module("torch.optim"), args.optimizer)
        optimizer = optimizer_module(params = model.parameters(), lr=args.learning_rate)

        scheduler_module = getattr(import_module("scheduler"), args.scheduler)
        scheduler = scheduler_module(optimizer, T_0=args.epochs, eta_max=args.max_learning_rate, T_up=2, gamma=0.5)

        best_mIoU = -1
        for epoch in range(args.epochs):
            print('-' * 80)
            print(f'* Epoch {epoch+1}')
            start_time = time.time()

            model.train()
            for step, (images, masks) in enumerate(train_loader):
                images = torch.stack(images)        # (batch, channel, height, width)
                masks = torch.stack(masks).long()   # (batch, channel, height, width)

                images, masks = images.to(device), masks.to(device)

                outputs = model(images).to(device)

                loss = criterion(outputs, masks)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                if (step+1)%25==0:
                    print(f'Epoch [{epoch+1}/{args.epochs}], Step [{step+1}/{len(train_loader)}], Loss: {loss.item():.4f}, LR:{scheduler.get_lr()[0]:6f}')

            scheduler.step()

            if (epoch+1) % args.val_every == 0:
                print()
                avg_loss, avg_mIoU1, avg_mIoU2 = validation(model, val_loader, criterion, device)
                train_time = time.time()-start_time
                print(f"\n* epoch {epoch+1} training and validation time : {train_time:.4f} sec \n")

                if avg_mIoU2 > best_mIoU:
                    print(f'Best performance at epoch {epoch+1}')
                    print(f'Save model in saved_model/[fold{k+1}]{args.save_file_name}.pt \n')
                    best_mIoU = avg_mIoU2
                    save_model(model, './saved_model', f'[fold{k+1}]{args.save_file_name}.pt')

                # if epoch>15:
                #     print(f'Save model at epoch {epoch+1} \n')
                #     save_model(model, './saved_model', f'{args.save_file_name}_{epoch+1}.pt')


def validation(model, data_loader, criterion, device):
    print('* Start validation...')
    model.eval()
    with torch.no_grad():
        total_loss = 0
        cnt = 0
        mIoU_list1 = []
        hist = np.zeros((12, 12))
        for step, (images, masks) in enumerate(data_loader):
            
            images = torch.stack(images)       # (batch, channel, height, width)
            masks = torch.stack(masks).long()  # (batch, channel, height, width)

            images, masks = images.to(device), masks.to(device)            

            outputs = model(images).to(device)
            loss = criterion(outputs, masks)
            total_loss += loss
            cnt += 1
            
            outputs = torch.argmax(outputs, dim=1).detach().cpu().numpy()

            mIoU1 = label_accuracy_score1(masks.detach().cpu().numpy(), outputs, n_class=12)[2]
            mIoU_list1.append(mIoU1)
            
            for lt, lp in zip(outputs, masks.detach().cpu().numpy()):
                hist += fast_hist2(lt.flatten(), lp.flatten(), 12)

        avrg_loss = total_loss / cnt
        print(f'Validation Average Loss: {avrg_loss:.4f}, mIoU1: {np.mean(mIoU_list1):.4f}, mIoU2: {label_accuracy_score2(hist):.4f}')

    return avrg_loss, np.mean(mIoU_list1), label_accuracy_score2(hist)


def save_model(model, saved_dir, file_name):
    # check_point = {'net': model.state_dict()}
    output_path = os.path.join(saved_dir, file_name)
    torch.save(model.state_dict(), output_path) 