In [1]:
cd /opt/ml/code

/opt/ml/code


In [2]:
import random
import json

import torch
import torch.nn as nn
import segmentation_models_pytorch as smp
from torch.utils.data import Dataset, DataLoader
from utils_modified import label_accuracy_score, add_hist

import cv2

import numpy as np
import pandas as pd

from pycocotools.coco import COCO
import torchvision
import torchvision.transforms as transforms

import albumentations as A
from albumentations.pytorch import ToTensorV2

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.notebook import tqdm

print(f'pytorch version: {torch.__version__}')
print(f'GPU 사용 가능 여부: {torch.cuda.is_available()}')
print(torch.cuda.get_device_name(0))
print(torch.cuda.device_count())

device = 'cuda' if torch.cuda.is_available() else 'cpu'

pytorch version: 1.4.0
GPU 사용 가능 여부: True
Tesla P40
1


In [4]:
def set_seed(random_seed=21):
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    #torch.cuda.manual_seed_all(random_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)
    random.seed(random_seed)

In [5]:
def EDA():
    with open(anns_file_path, 'r') as f:
        dataset = json.loads(f.read())

    categories = dataset['categories']
    anns = dataset['annotations']
    imgs = dataset['images']
    nr_cats = len(categories)
    nr_annotations = len(anns)
    nr_images = len(imgs)

    cat_names = []
    super_cat_names = []
    super_cat_ids = {}
    super_cat_last_name = ''
    nr_super_cats = 0
    for cat_it in categories:
        cat_names.append(cat_it['name'])
        super_cat_name = cat_it['supercategory']
        if super_cat_name != super_cat_last_name:
            super_cat_names.append(super_cat_name)
            super_cat_ids[super_cat_name] = nr_super_cats
            nr_super_cats += 1
    print('Number of super categories:', nr_super_cats)
    print('Number of categories:', nr_cats)
    print('Number of annotations:', nr_annotations)
    print('Number of images:', nr_images)

    cat_histogram = np.zeros(nr_cats, dtype=int)
    for ann_it in anns:
        cat_histogram[ann_it['category_id']] += 1

    f, ax = plt.subplots(figsize=(5, 5))
    df = pd.DataFrame({'Categories': cat_names, 'Number of annotations':cat_histogram})

    plt.title('category distribution of train set')
    sns.barplot(x='Number of annotations', y='Categories', data=df.sort_values('Number of annotations', ascending=False), label='Total', color='b')
    plt.show()

In [6]:
class CustomDataset(Dataset):
    def __init__(self, data_dir, mode, transform):
        super().__init__()
        self.mode = mode
        self.transform = transform
        self.coco = COCO(data_dir)
    
    def __getitem__(self, index:int):
        image_infos = self.coco.loadImgs(index)[0]

        images = cv2.imread(os.path.join(dataset_path, image_infos['file_name']))
        images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.float32)
        #images /= 255.0

        if self.mode in ('train', 'val'):
            ann_ids = self.coco.getAnnIds(imgIds=image_infos['id'])
            anns = self.coco.loadAnns(ann_ids)

            masks = np.zeros((image_infos['height'], image_infos['width']))
            for ann in anns:
                masks = np.maximum(self.coco.annToMask(ann) * ann['category_id'], masks)
            
            images, masks = self.transform(image=images, mask=masks).values()
            return images, masks, image_infos
        
        if self.mode == 'test':
            images = self.transform(image=images)['image']
            return images, image_infos

    def __len__(self) -> int:
        return len(self.coco.getImgIds())

In [7]:
def get_dataloader():
    train_dataset = CustomDataset(data_dir=train_path, mode='train',transform=train_transform)
    val_dataset = CustomDataset(data_dir=val_path, mode='val', transform=val_transform)
    test_dataset = CustomDataset(data_dir=test_path, mode='test', transform=test_transform)

    def collate_fn(batch):
        return tuple(zip(*batch))

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                            batch_size=batch_size,
                                            shuffle=True,
                                            num_workers=4,
                                            collate_fn=collate_fn,
                                            drop_last=True)

    val_loader = torch.utils.data.DataLoader(dataset=val_dataset, 
                                            batch_size=batch_size,
                                            shuffle=False,
                                            num_workers=4,
                                            collate_fn=collate_fn)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                            batch_size=batch_size,
                                            num_workers=4,
                                            collate_fn=collate_fn)
    return train_loader, val_loader, test_loader

In [8]:
def test_dataloader(dataloader):
    data = iter(dataloader).next()
    if len(data) == 3:
        imgs, masks, image_infos = data
        img = imgs[0]
        mask = masks[0]
        image_info = image_infos[0]

        fig, axes = plt.subplots(1, 2, figsize=(12, 12))
        print('image shape:', list(img.shape))
        print('mask shape:', list(mask.shape))
        print('Unique values, category of transformed mask:\n', {int(i):category_names[int(i)] for i in list(np.unique(mask))})

        axes[0].imshow(img.permute([1, 2, 0]))
        axes[0].grid(False)
        axes[0].set_title('imput image:' + str(image_info['file_name']), fontsize=15)

        axes[1].imshow(mask)
        axes[1].grid(False)
        axes[1].set_title('masks :' + str(image_info['file_name']), fontsize=15)

        plt.show()
    elif len(data) == 2:
        imgs, image_infos = iter(dataloader).next()
        img = imgs[0]
        image_info = image_infos[0]

        fig, ax = plt.subplots(figsize=(6, 6))
        print('image shape:', list(img.shape))

        ax.imshow(img.permute([1, 2, 0]))
        ax.grid(False)
        ax.set_title('imput image:' + str(image_info['file_name']), fontsize=15)

        plt.show()

In [9]:
def train(num_epochs, model, data_loader, val_loader, criterion, optimizer, saved_dir, val_every, device, print_log=True):
    if print_log:
        print('Start training')
    best_mIoU = 0
    hist_mIoU = []
    epoch_begin = 0

    for epoch in tqdm(range(0, num_epochs)):
        model.train()
        for step, (images, masks, _) in tqdm(enumerate(data_loader)):
            images = torch.stack(images).to(device)
            masks = torch.stack(masks).long().to(device)
            outputs = model(images)
            
            loss = criterion(outputs, masks)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if print_log:
                if (step + 1) % 25 == 0:
                    print(f'Epoch[{epoch + 1}/{num_epochs}], Step[{step + 1}/{len(data_loader)}], Loss: {loss.item():.4f}')
                    
        if (epoch + 1) % val_every == 0:
            mIoU = validation(epoch + 1, model, val_loader, criterion, device)
            hist_mIoU.append(mIoU)
            if mIoU > best_mIoU:
                if print_log:
                    print(f'Best performance at epoch: {epoch + 1}')
                    print('Save model in', saved_dir)
                best_mIoU = mIoU
                save_model(model, saved_dir)

    return hist_mIoU

In [10]:
def validation(epoch, model, data_loader, criterion, device):
    print(f'Start validation #{epoch}')
    model.eval()
    with torch.no_grad():
        hist = np.zeros((N_CLASSES, N_CLASSES))
        for step, (images, masks, _) in enumerate(data_loader):
            images = torch.stack(images).to(device)
            masks = torch.stack(masks).long().to(device)
            outputs = model(images)

            loss = criterion(outputs, masks)

            outputs = torch.argmax(outputs.squeeze(), dim=1)
            outputs = outputs.detach().cpu().numpy()
            masks = masks.detach().cpu().numpy()
            hist = add_hist(hist, masks, outputs, n_class=N_CLASSES)
        
        acc, acc_cls, mIoU, fwavacc = label_accuracy_score(hist)
        print(f'Validation #{epoch} mIoU: {mIoU:.4f}')
    return mIoU

In [11]:
def save_model(model, saved_dir, file_name=f'{model_name}.pt'):
    os.makedirs(saved_dir, exist_ok=True)
    check_point = {'net':model.state_dict()}
    output_path = os.path.join(saved_dir, file_name)
    torch.save(model.state_dict(), output_path)

In [13]:
def load_model(model_name):
    model_path = saved_path + f'/{model_name}.pt'
    checkpoint = torch.load(model_path, map_location=device)
    model.load_state_dict(checkpoint)

In [14]:
def test_eval_model(model, dataloader):
    for imgs, image_infos in dataloader:
        model.eval()
        outs = model(torch.stack(imgs).to(device))
        outs = torch.argmax(outs.squeeze(), dim=1).detach().cpu().numpy()
        break

    imgs = imgs[0]
    image_infos = image_infos[0]
    outs = outs[0]

    fig, axes = plt.subplots(1, 2, figsize=(16, 16))
    print('Shape of Original Image:', list(imgs.shape))
    print('Shape of Predicted:', list(outs.shape))
    print('Unique values, category of transformed mask\n', {int(i):category_names[int(i)] for i in list(np.unique(outs))})

    axes[0].imshow(imgs.permute([1,2,0]))
    axes[0].grid(False)
    axes[0].set_title('Original image:' + str(image_infos['file_name']), fontsize=15)

    axes[1].imshow(outs)
    axes[1].grid(False)
    axes[1].set_title('Predicted:' + str(image_infos['file_name']), fontsize=15)

    plt.show()

In [15]:
def test(model, data_loader, device):
    size = 256
    transform = A.Compose([A.Resize(256, 256)])
    print('Start prediction')
    model.eval()

    file_name_list = []
    preds_array = np.empty((0, size*size), dtype=np.long)
    
    with torch.no_grad():
        for step, (imgs, image_infos) in tqdm(enumerate(data_loader)):
            outs = model(torch.stack(imgs).to(device))
            outs = torch.argmax(outs.squeeze(), dim=1).detach().cpu().numpy()

            masks = []
            for img, mask in zip(np.stack(imgs), outs):
                img, mask = transform(image=img, mask=mask).values()
                masks.append(mask)
            outs = np.array(masks)
            outs = outs.reshape([outs.shape[0], size * size]).astype(int)
            preds_array = np.vstack((preds_array, outs))

            file_name_list.append([i['file_name'] for i in image_infos])
    print('End prediction.')
    file_names = [y for x in file_name_list for y in x]
    return file_names, preds_array

In [16]:
def make_submission(model, dataloader):
    submission = pd.read_csv('./submission/sample_submission.csv', index_col=None)
    file_names, preds = test(model, dataloader, device)

    for file_name, string in zip(file_names, preds):
        submission = submission.append({'image_id':file_name, 'PredictionString':' '.join(str(e) for e in string.tolist())}, ignore_index=True)

    submission.to_csv(submission_path + f'/{model_name}.csv', index=False)

In [25]:
N_CLASSES = 12
batch_size = 8
num_epochs = 8
image_size = 256
learning_rate = 0.0001
model_name = 'model_test'
random_seed = 21
val_every = 1

model = smp.DeepLabV3Plus(
    encoder_name="resnext50_32x4d",
    encoder_weights="imagenet",
    in_channels=3,
    classes=N_CLASSES,
).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=1e-6)

train_transform = A.Compose([
    A.Resize(image_size, image_size),
    A.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0
    ),
    A.HorizontalFlip(),
    A.VerticalFlip(),
    A.RandomRotate90(),
    A.OneOf([
        A.MotionBlur(p=1.0),
        A.OpticalDistortion(p=1.0)
    ], p=2/3),
    ToTensorV2()
])

val_transform = A.Compose([
    A.Resize(image_size, image_size),
    A.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0
    ),
    ToTensorV2()
])

test_transform = A.Compose([
    A.Resize(image_size, image_size),
    A.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0
    ),
    ToTensorV2()
])

dataset_path = '../input/data'
anns_file_path = dataset_path + '/train.json'
train_path = dataset_path + '/train.json'
val_path = dataset_path + '/val.json'
test_path = dataset_path + '/test.json'
saved_path = './saved'
submission_path = './submission'
category_names = ['Background','UNKNOWN','General trash','Paper','Paper pack','Metal','Glass','Plastic','Styrofoam','Plastic bag','Battery','Clothing']


In [26]:
set_seed()

In [27]:
train_loader, val_loader, test_loader = get_dataloader()

loading annotations into memory...
Done (t=3.33s)
creating index...
index created!
loading annotations into memory...
Done (t=0.85s)
creating index...
index created!
loading annotations into memory...
Done (t=1.08s)
creating index...
index created!


In [28]:
train(num_epochs, model, train_loader, val_loader, criterion, optimizer, saved_path, val_every, device)

Start training


  0%|          | 0/8 [00:00<?, ?it/s]

0it [00:00, ?it/s]

Epoch[1/8], Step[25/327], Loss: 1.7211
Epoch[1/8], Step[50/327], Loss: 1.3508
Epoch[1/8], Step[75/327], Loss: 1.0605
Epoch[1/8], Step[100/327], Loss: 1.1311
Epoch[1/8], Step[125/327], Loss: 1.0520
Epoch[1/8], Step[150/327], Loss: 0.5973
Epoch[1/8], Step[175/327], Loss: 1.0081
Epoch[1/8], Step[200/327], Loss: 0.6834
Epoch[1/8], Step[225/327], Loss: 0.6153
Epoch[1/8], Step[250/327], Loss: 0.5215
Epoch[1/8], Step[275/327], Loss: 0.6252
Epoch[1/8], Step[300/327], Loss: 0.4137
Epoch[1/8], Step[325/327], Loss: 0.5960
Start validation #1
Validation #1 mIoU: 0.3119
Best performance at epoch: 1
Save model in ./saved


0it [00:00, ?it/s]

Epoch[2/8], Step[25/327], Loss: 0.3097
Epoch[2/8], Step[50/327], Loss: 0.4387
Epoch[2/8], Step[75/327], Loss: 0.4734
Epoch[2/8], Step[100/327], Loss: 0.5127
Epoch[2/8], Step[125/327], Loss: 0.5353
Epoch[2/8], Step[150/327], Loss: 0.7251
Epoch[2/8], Step[175/327], Loss: 0.4467
Epoch[2/8], Step[200/327], Loss: 0.5023
Epoch[2/8], Step[225/327], Loss: 0.4381
Epoch[2/8], Step[250/327], Loss: 0.4261
Epoch[2/8], Step[275/327], Loss: 0.4292
Epoch[2/8], Step[300/327], Loss: 0.3893
Epoch[2/8], Step[325/327], Loss: 0.5046
Start validation #2
Validation #2 mIoU: 0.3851
Best performance at epoch: 2
Save model in ./saved


0it [00:00, ?it/s]

Epoch[3/8], Step[25/327], Loss: 0.4275
Epoch[3/8], Step[50/327], Loss: 0.3513
Epoch[3/8], Step[75/327], Loss: 0.2965
Epoch[3/8], Step[100/327], Loss: 0.3348
Epoch[3/8], Step[125/327], Loss: 0.2890
Epoch[3/8], Step[150/327], Loss: 0.2922
Epoch[3/8], Step[175/327], Loss: 0.6304
Epoch[3/8], Step[200/327], Loss: 0.6884
Epoch[3/8], Step[225/327], Loss: 0.2351
Epoch[3/8], Step[250/327], Loss: 0.6968
Epoch[3/8], Step[275/327], Loss: 0.3996
Epoch[3/8], Step[300/327], Loss: 0.3882
Epoch[3/8], Step[325/327], Loss: 0.3809
Start validation #3
Validation #3 mIoU: 0.4065
Best performance at epoch: 3
Save model in ./saved


0it [00:00, ?it/s]

Epoch[4/8], Step[25/327], Loss: 0.2248
Epoch[4/8], Step[50/327], Loss: 0.2478
Epoch[4/8], Step[75/327], Loss: 0.2160
Epoch[4/8], Step[100/327], Loss: 0.3609
Epoch[4/8], Step[125/327], Loss: 0.4634
Epoch[4/8], Step[150/327], Loss: 0.3459
Epoch[4/8], Step[175/327], Loss: 0.4653
Epoch[4/8], Step[200/327], Loss: 0.3155
Epoch[4/8], Step[225/327], Loss: 0.2935
Epoch[4/8], Step[250/327], Loss: 0.3258
Epoch[4/8], Step[275/327], Loss: 0.2770
Epoch[4/8], Step[300/327], Loss: 0.3948
Epoch[4/8], Step[325/327], Loss: 0.2751
Start validation #4
Validation #4 mIoU: 0.4160
Best performance at epoch: 4
Save model in ./saved


0it [00:00, ?it/s]

Epoch[5/8], Step[25/327], Loss: 0.2844
Epoch[5/8], Step[50/327], Loss: 0.3364
Epoch[5/8], Step[75/327], Loss: 0.4119
Epoch[5/8], Step[100/327], Loss: 0.3751
Epoch[5/8], Step[125/327], Loss: 0.2824
Epoch[5/8], Step[150/327], Loss: 0.2707
Epoch[5/8], Step[175/327], Loss: 0.3724
Epoch[5/8], Step[200/327], Loss: 0.2773
Epoch[5/8], Step[225/327], Loss: 0.3381
Epoch[5/8], Step[250/327], Loss: 0.3235
Epoch[5/8], Step[275/327], Loss: 0.2598
Epoch[5/8], Step[300/327], Loss: 0.2661
Epoch[5/8], Step[325/327], Loss: 0.3596
Start validation #5
Validation #5 mIoU: 0.4467
Best performance at epoch: 5
Save model in ./saved


0it [00:00, ?it/s]

Epoch[6/8], Step[25/327], Loss: 0.6224
Epoch[6/8], Step[50/327], Loss: 0.3787
Epoch[6/8], Step[75/327], Loss: 0.1795
Epoch[6/8], Step[100/327], Loss: 0.2821
Epoch[6/8], Step[125/327], Loss: 0.3501
Epoch[6/8], Step[150/327], Loss: 0.4985
Epoch[6/8], Step[175/327], Loss: 0.2149
Epoch[6/8], Step[200/327], Loss: 0.2919
Epoch[6/8], Step[225/327], Loss: 0.1563
Epoch[6/8], Step[250/327], Loss: 0.4361
Epoch[6/8], Step[275/327], Loss: 0.3546
Epoch[6/8], Step[300/327], Loss: 0.4541
Epoch[6/8], Step[325/327], Loss: 0.3356
Start validation #6
Validation #6 mIoU: 0.4532
Best performance at epoch: 6
Save model in ./saved


0it [00:00, ?it/s]

Epoch[7/8], Step[25/327], Loss: 0.6485
Epoch[7/8], Step[50/327], Loss: 0.1317
Epoch[7/8], Step[75/327], Loss: 0.3423
Epoch[7/8], Step[100/327], Loss: 0.2513
Epoch[7/8], Step[125/327], Loss: 0.3854
Epoch[7/8], Step[150/327], Loss: 0.5616
Epoch[7/8], Step[175/327], Loss: 0.4852
Epoch[7/8], Step[200/327], Loss: 0.3009
Epoch[7/8], Step[225/327], Loss: 0.2075
Epoch[7/8], Step[250/327], Loss: 0.5768
Epoch[7/8], Step[275/327], Loss: 0.3902
Epoch[7/8], Step[300/327], Loss: 0.3493
Epoch[7/8], Step[325/327], Loss: 0.1613
Start validation #7
Validation #7 mIoU: 0.4388


0it [00:00, ?it/s]

Epoch[8/8], Step[25/327], Loss: 0.2564
Epoch[8/8], Step[50/327], Loss: 0.4046
Epoch[8/8], Step[75/327], Loss: 0.2359
Epoch[8/8], Step[100/327], Loss: 0.2232
Epoch[8/8], Step[125/327], Loss: 0.1749
Epoch[8/8], Step[150/327], Loss: 0.4186
Epoch[8/8], Step[175/327], Loss: 0.2502
Epoch[8/8], Step[200/327], Loss: 0.1715
Epoch[8/8], Step[225/327], Loss: 0.2221
Epoch[8/8], Step[250/327], Loss: 0.2041
Epoch[8/8], Step[275/327], Loss: 0.3181
Epoch[8/8], Step[300/327], Loss: 0.3642
Epoch[8/8], Step[325/327], Loss: 0.1797
Start validation #8
Validation #8 mIoU: 0.4322


[0.3118551614338785,
 0.385111440425622,
 0.4064766708411446,
 0.41596951649921626,
 0.4467292604135856,
 0.4532302930833944,
 0.438831117628027,
 0.43224035722611304]

In [None]:
model = load_model(model_name)
make_submission(model, test_loader)

In [None]:
def model_test(encoder_list, decoder_list):
    model_test_result = {}
    for encoder in encoder_list:
        for  decoder in decoder_list:
            if decoder == 'DeepLabV3Plus':
                model = smp.DeepLabV3Plus(
                    encoder_name=encoder,
                    encoder_weights="imagenet",
                    in_channels=3,
                    classes=N_CLASSES,
                )
            else:
                raise 'error'
            model = model.to(device)
            result = train(num_epochs, model, train_loader, val_loader, criterion, optimizer, saved_path, val_every, device, print_log=False)
            model_test_result[encoder + ' ' + decoder] = result
    return model_test_result