In [2]:
cd /opt/ml/code

/opt/ml/code


In [3]:
import random
import json

import torch
import torch.nn as nn
import segmentation_models_pytorch as smp
from torch.utils.data import Dataset, DataLoader
from utils_modified import label_accuracy_score, add_hist

import cv2

import numpy as np
import pandas as pd

from pycocotools.coco import COCO
import torchvision
import torchvision.transforms as transforms

import albumentations as A
from albumentations.pytorch import ToTensorV2

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.notebook import tqdm
import timeit

print(f'pytorch version: {torch.__version__}')
print(f'GPU 사용 가능 여부: {torch.cuda.is_available()}')
print(torch.cuda.get_device_name(0))
print(torch.cuda.device_count())

device = 'cuda' if torch.cuda.is_available() else 'cpu'

pytorch version: 1.7.1
GPU 사용 가능 여부: True
Tesla P40
1


In [4]:
def set_seed(seed=21):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #torch.backends.cudnn.deterministic = True
    #torch.backends.cudnn.benchmark = False
    #torch.set_deterministic(True)

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

In [5]:
def EDA():
    with open(anns_file_path, 'r') as f:
        dataset = json.loads(f.read())

    categories = dataset['categories']
    anns = dataset['annotations']
    imgs = dataset['images']
    nr_cats = len(categories)
    nr_annotations = len(anns)
    nr_images = len(imgs)

    cat_names = []
    super_cat_names = []
    super_cat_ids = {}
    super_cat_last_name = ''
    nr_super_cats = 0
    for cat_it in categories:
        cat_names.append(cat_it['name'])
        super_cat_name = cat_it['supercategory']
        if super_cat_name != super_cat_last_name:
            super_cat_names.append(super_cat_name)
            super_cat_ids[super_cat_name] = nr_super_cats
            nr_super_cats += 1
    print('Number of super categories:', nr_super_cats)
    print('Number of categories:', nr_cats)
    print('Number of annotations:', nr_annotations)
    print('Number of images:', nr_images)

    cat_histogram = np.zeros(nr_cats, dtype=int)
    for ann_it in anns:
        cat_histogram[ann_it['category_id']] += 1

    f, ax = plt.subplots(figsize=(5, 5))
    df = pd.DataFrame({'Categories': cat_names, 'Number of annotations':cat_histogram})

    plt.title('category distribution of train set')
    sns.barplot(x='Number of annotations', y='Categories', data=df.sort_values('Number of annotations', ascending=False), label='Total', color='b')
    plt.show()

In [6]:
class CustomDataset(Dataset):
    def __init__(self, data_dir, mode, transform):
        super().__init__()
        self.mode = mode
        self.transform = transform
        self.coco = COCO(data_dir)
    
    def __getitem__(self, index:int):
        image_infos = self.coco.loadImgs(index)[0]

        images = cv2.imread(os.path.join(dataset_path, image_infos['file_name']))
        images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.float32)
        #images /= 255.0

        if self.mode in ('train', 'val'):
            ann_ids = self.coco.getAnnIds(imgIds=image_infos['id'])
            anns = self.coco.loadAnns(ann_ids)

            masks = np.zeros((image_infos['height'], image_infos['width']))
            for ann in anns:
                masks = np.maximum(self.coco.annToMask(ann) * (ann['category_id'] + 1), masks)
            
            images, masks = self.transform(image=images, mask=masks).values()
            return images, masks, image_infos
        
        if self.mode == 'test':
            images = self.transform(image=images)['image']
            return images, image_infos

    def __len__(self) -> int:
        return len(self.coco.getImgIds())

In [7]:
def get_dataloader():
    train_dataset = CustomDataset(data_dir=train_path, mode='train',transform=train_transform)
    val_dataset = CustomDataset(data_dir=val_path, mode='val', transform=val_transform)
    test_dataset = CustomDataset(data_dir=test_path, mode='test', transform=test_transform)

    def collate_fn(batch):
        return tuple(zip(*batch))

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                            batch_size=batch_size,
                                            shuffle=True,
                                            num_workers=4,
                                            collate_fn=collate_fn,
                                            drop_last=True,
                                            worker_init_fn=seed_worker)

    val_loader = torch.utils.data.DataLoader(dataset=val_dataset, 
                                            batch_size=batch_size,
                                            shuffle=False,
                                            num_workers=4,
                                            collate_fn=collate_fn,
                                            worker_init_fn=seed_worker)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                            batch_size=batch_size,
                                            num_workers=4,
                                            collate_fn=collate_fn,
                                            worker_init_fn=seed_worker)
    return train_loader, val_loader, test_loader

In [8]:
def get_folded_dataloader(kfold=5):
    train_dataset = CustomDataset(data_dir=train_all_path, mode='train',transform=train_transform)

    train_all_size = len(train_dataset)
    size_list = [train_all_size // kfold] * kfold
    size_list = [train_all_size // kfold] * kfold
    for i in range(train_all_size % kfold):
        size_list[i] += 1
    train_dataset_list = torch.utils.data.random_split(train_dataset, size_list)

    def collate_fn(batch):
        return tuple(zip(*batch))

    for k in range(kfold):
        train_loader = torch.utils.data.ConcatDataset(train_dataset_list[:k] + train_dataset_list[k + 1:])
        val_loader = train_dataset_list[k]

        train_loader = torch.utils.data.DataLoader(dataset=train_loader, 
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    num_workers=4,
                                                    collate_fn=collate_fn,
                                                    drop_last=True,
                                                    worker_init_fn=seed_worker)
        
        val_loader = torch.utils.data.DataLoader(dataset=val_loader, 
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    num_workers=4,
                                                    collate_fn=collate_fn,
                                                    drop_last=True,
                                                    worker_init_fn=seed_worker)

        yield train_loader, val_loader

In [9]:
def test_dataloader(dataloader):
    data = iter(dataloader).next()
    if len(data) == 3:
        imgs, masks, image_infos = data
        img = imgs[0]
        mask = masks[0]
        image_info = image_infos[0]

        fig, axes = plt.subplots(1, 2, figsize=(12, 12))
        print('image shape:', list(img.shape))
        print('mask shape:', list(mask.shape))
        print('Unique values, category of transformed mask:\n', {int(i):category_names[int(i)] for i in list(np.unique(mask))})

        axes[0].imshow(img.permute([1, 2, 0]))
        axes[0].grid(False)
        axes[0].set_title('imput image:' + str(image_info['file_name']), fontsize=15)

        axes[1].imshow(mask)
        axes[1].grid(False)
        axes[1].set_title('masks :' + str(image_info['file_name']), fontsize=15)

        plt.show()
    elif len(data) == 2:
        imgs, image_infos = iter(dataloader).next()
        img = imgs[0]
        image_info = image_infos[0]

        fig, ax = plt.subplots(figsize=(6, 6))
        print('image shape:', list(img.shape))

        ax.imshow(img.permute([1, 2, 0]))
        ax.grid(False)
        ax.set_title('imput image:' + str(image_info['file_name']), fontsize=15)

        plt.show()

In [10]:
def train(num_epochs, model, data_loader, val_loader, criterion, optimizer, saved_dir, val_every, device, print_log=True):
    if print_log:
        print('Start training')
    best_mIoU = 0
    hist_mIoU = []
    epoch_begin = 0

    name = str(type(model.encoder)).split('.')[-1]
    name += ' ' + str(type(model.decoder)).split('.')[-1]

    for epoch in tqdm(range(0, num_epochs), desc=name):
        model.train()
        for step, (images, masks, _) in tqdm(enumerate(data_loader), desc='Training', leave=False, total=len(data_loader)):
            images = torch.stack(images).to(device)
            masks = torch.stack(masks).long().to(device)
            outputs = model(images)
            
            loss = criterion(outputs, masks)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if print_log:
                if (step + 1) % 25 == 0:
                    print(f'Epoch[{epoch + 1}/{num_epochs}], Step[{step + 1}/{len(data_loader)}], Loss: {loss.item():.4f}')
                    
        if (epoch + 1) % val_every == 0:
            mIoU = validation(epoch + 1, model, val_loader, criterion, device, print_log)
            hist_mIoU.append(mIoU)
            if mIoU > best_mIoU:
                if print_log:
                    print(f'Best performance at epoch: {epoch + 1}')
                    print('Save model in', saved_dir)
                best_mIoU = mIoU
                save_model(model, saved_dir)

    return hist_mIoU

In [11]:
def validation(epoch, model, data_loader, criterion, device, print_log=True):
    if print_log:
        print(f'Start validation #{epoch}')
    model.eval()
    with torch.no_grad():
        hist = np.zeros((N_CLASSES, N_CLASSES))
        for step, (images, masks, _) in tqdm(enumerate(data_loader), desc='Validation', leave=False, total=len(data_loader)):
            images = torch.stack(images).to(device)
            masks = torch.stack(masks).long().to(device)
            outputs = model(images)

            loss = criterion(outputs, masks)

            outputs = torch.argmax(outputs.squeeze(), dim=1)
            outputs = outputs.detach().cpu().numpy()
            masks = masks.detach().cpu().numpy()
            hist = add_hist(hist, masks, outputs, n_class=N_CLASSES)
        
        acc, acc_cls, mIoU, fwavacc = label_accuracy_score(hist)
        if print_log:
            print(f'Validation #{epoch} mIoU: {mIoU:.4f}')
    return mIoU

In [12]:
def save_model(model, saved_dir):
    file_name = f'{model_name}.pt'
    os.makedirs(saved_dir, exist_ok=True)
    check_point = {'net':model.state_dict()}
    output_path = os.path.join(saved_dir, file_name)
    torch.save(model.state_dict(), output_path)

In [13]:
def load_model(model_name, model):
    model_path = saved_path + f'/{model_name}.pt'
    checkpoint = torch.load(model_path, map_location=device)
    model.load_state_dict(checkpoint)
    return model

In [14]:
def test_eval_model(model, dataloader, idx=0):
    for imgs, image_infos in dataloader:
        model.eval()
        outs = model(torch.stack(imgs).to(device))
        outs = torch.argmax(outs.squeeze(), dim=1).detach().cpu().numpy()
        break

    imgs = imgs[idx]
    image_infos = image_infos[idx]
    outs = outs[idx]

    fig, axes = plt.subplots(1, 2, figsize=(16, 16))
    print('Shape of Original Image:', list(imgs.shape))
    print('Shape of Predicted:', list(outs.shape))
    print('Unique values, category of transformed mask\n', {int(i):category_names[int(i)] for i in list(np.unique(outs))})

    axes[0].imshow(imgs.permute([1,2,0]))
    axes[0].grid(False)
    axes[0].set_title('Original image:' + str(image_infos['file_name']), fontsize=15)

    axes[1].imshow(outs)
    axes[1].grid(False)
    axes[1].set_title('Predicted:' + str(image_infos['file_name']), fontsize=15)

    plt.show()

In [15]:
def test(model_list, data_loader, device):
    size = 256
    transform = A.Compose([A.Resize(256, 256)])
    print('Start prediction')
    for model in model_list:
        model.eval()

    file_name_list = []
    preds_array = np.empty((0, size*size), dtype=np.long)
    
    with torch.no_grad():
        for step, (imgs, image_infos) in tqdm(enumerate(data_loader), total=len(data_loader)):
            outs = None
            for model in model_list:
                if outs is None:
                    outs = model(torch.stack(imgs).to(device))
                else:
                    outs += model(torch.stack(imgs).to(device))
            outs = torch.argmax(outs, dim=1).detach().cpu().numpy()

            masks = []
            for img, mask in zip(np.stack(imgs), outs):
                img, mask = transform(image=img, mask=mask).values()
                masks.append(mask)
            outs = np.array(masks)
            outs = outs.reshape([outs.shape[0], size * size]).astype(int)
            preds_array = np.vstack((preds_array, outs))

            file_name_list.append([i['file_name'] for i in image_infos])
    print('End prediction.')
    file_names = [y for x in file_name_list for y in x]
    return file_names, preds_array

In [17]:
def make_submission(model_list, dataloader):
    submission = pd.read_csv('./submission/sample_submission.csv', index_col=None)
    file_names, preds = test(model_list, dataloader, device)

    for file_name, string in zip(file_names, preds):
        submission = submission.append({'image_id':file_name, 'PredictionString':' '.join(str(e) for e in string.tolist())}, ignore_index=True)

    submission.to_csv(submission_path + f'/{model_name}.csv', index=False)

In [18]:
def model_test(encoder_list, decoder_list):
    model_test_result = []
    for encoder in encoder_list:
        for  decoder in decoder_list:
            set_seed(random_seed)
            try:
                if decoder == 'DeepLabV3Plus':
                    model = smp.DeepLabV3Plus(
                        encoder_name=encoder,
                        encoder_weights="imagenet",
                        in_channels=3,
                        classes=N_CLASSES,
                    )
                else:
                    raise 'decoder does not exist'
                criterion = nn.CrossEntropyLoss()
                optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=1e-6)
                model = model.to(device)
                start_time = timeit.default_timer()
                mIoU = train(num_epochs, model, train_loader, val_loader, criterion, optimizer, saved_path, val_every, device, print_log=False)
                end_time = timeit.default_timer()
                time = end_time - start_time
                stat = {'encoder' : encoder, 'decoder' : decoder, 'mIoU' : mIoU, 'time' : time}
            except Exception as e:
                stat = {'encoder' : encoder, 'decoder' : decoder, 'error' : e}
                pass
            print(stat)
            model_test_result.append(stat)
    return model_test_result

In [19]:
N_CLASSES = 12
batch_size = 16
num_epochs = 10
image_size = 256
learning_rate = 0.0001
model_name = 'model_test'
random_seed = 21
val_every = 1

set_seed(random_seed)

model = smp.DeepLabV3Plus(
    encoder_name="timm-regnetx_064",
    encoder_weights="imagenet",
    in_channels=3,
    classes=N_CLASSES,
).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=1e-6)

train_transform = A.Compose([
    A.Resize(image_size, image_size),
    A.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0
    ),
    A.HorizontalFlip(),
    A.VerticalFlip(),
    A.RandomRotate90(),
    A.OneOf([
        A.MotionBlur(p=1.0),
        A.OpticalDistortion(p=1.0)
    ], p=2/3),
    ToTensorV2()
])

val_transform = A.Compose([
    A.Resize(image_size, image_size),
    A.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0
    ),
    ToTensorV2()
])

test_transform = A.Compose([
    A.Resize(image_size, image_size),
    A.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0
    ),
    ToTensorV2()
])

dataset_path = '../input/data'
anns_file_path = dataset_path + '/train.json'
train_path = dataset_path + '/train.json'
train_all_path = dataset_path + '/train_all.json'
val_path = dataset_path + '/val.json'
test_path = dataset_path + '/test.json'
saved_path = './saved'
submission_path = './submission'
category_names = ['Background','UNKNOWN','General trash','Paper','Paper pack','Metal','Glass','Plastic','Styrofoam','Plastic bag','Battery','Clothing']


In [38]:
kfold = 5
for k, (train_loader, val_loader) in enumerate(get_folded_dataloader(kfold)):
    model_name = f'timm-regnetx_064_{k}-{kfold}fold'
    model = smp.DeepLabV3Plus(
        encoder_name="timm-regnetx_064",
        encoder_weights="imagenet",
        in_channels=3,
        classes=N_CLASSES,
    ).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=1e-6)

    train(num_epochs, model, train_loader, val_loader, criterion, optimizer, saved_path, val_every, device, False)

loading annotations into memory...
Done (t=4.22s)
creating index...
index created!


RegNetEncoder'> DeepLabV3PlusDecoder'>:   0%|          | 0/10 [00:00<?, ?it/s]

Training:   0%|          | 0/163 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [21]:
_, _, test_loader = get_dataloader()

loading annotations into memory...
Done (t=3.60s)
creating index...
index created!
loading annotations into memory...
Done (t=0.85s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [22]:
ensamble_path = [
    '/opt/ml/code/saved/timm-regnetx_064_0-5fold.pt',
    '/opt/ml/code/saved/timm-regnetx_064_1-5fold.pt',
    '/opt/ml/code/saved/timm-regnetx_064_2-5fold.pt',
    '/opt/ml/code/saved/timm-regnetx_064_3-5fold.pt',
    '/opt/ml/code/saved/timm-regnetx_064_4-5fold.pt'
]
model_list = []
for p in ensamble_path:
    model_path = p
    checkpoint = torch.load(model_path, map_location=device)
    model.load_state_dict(checkpoint)
    model_list.append(model)
make_submission(model_list, test_loader)

Start prediction


  0%|          | 0/53 [00:00<?, ?it/s]

End prediction.


In [23]:
model_path = saved_path + f'/{model_name}.pt'
checkpoint = torch.load(model_path, map_location=device)
model.load_state_dict(checkpoint)
#model = load_model(model_name, model)
make_submission(model, test_loader)

Start prediction
DeepLabV3Plus(
  (encoder): RegNetEncoder(
    (stem): ConvBnAct(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNormAct2d(
        32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
        (act): ReLU(inplace=True)
      )
    )
    (s1): RegStage(
      (b1): Bottleneck(
        (conv1): ConvBnAct(
          (conv): Conv2d(32, 168, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNormAct2d(
            168, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (act): ReLU(inplace=True)
          )
        )
        (conv2): ConvBnAct(
          (conv): Conv2d(168, 168, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=3, bias=False)
          (bn): BatchNormAct2d(
            168, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (act): ReLU(inplace=True)
          )
        )
        (conv3): ConvBnAct(
          (con

0it [00:00, ?it/s]

End prediction.
