In [4]:
import torch
import torch.nn as nn
import random
import json

import cv2
from torch.utils.data import DataLoader, Dataset

#preprocessing
from pycocotools.coco import COCO

print('pytorch version: {}'.format(torch.__version__))
print('GPU 사용 가능 여부: {}'.format(torch.cuda.is_available()))

print(torch.cuda.get_device_name(0))
print(torch.cuda.device_count())

# GPU 사용 가능 여부에 따라 device 정보 저장
device = "cuda" if torch.cuda.is_available() else "cpu"


pytorch version: 1.7.1
GPU 사용 가능 여부: True
Tesla V100-SXM2-32GB
1


In [5]:
import segmentation_models_pytorch as smp
import numpy as np

random_seed = 15
random.seed(random_seed)
np.random.seed(random_seed)

## Dataset & Dataloader

In [6]:
class CustomDataLoader(Dataset):
    """COCO format"""
    def __init__(self, data_dir, mode = 'train', transform = None):
        super().__init__()
        self.mode = mode
        self.transform = transform
        self.coco = COCO(data_dir)
        
    def __getitem__(self, index: int):
        # dataset이 index되어 list처럼 동작
        image_id = self.coco.getImgIds(imgIds=index)
        image_infos = self.coco.loadImgs(image_id)[0]
        
        # cv2 를 활용하여 image 불러오기
        images = cv2.imread(os.path.join(dataset_path, image_infos['file_name']))
        images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.float32)
        images /= 255.0
        
        if (self.mode in ('train', 'val')):
            ann_ids = self.coco.getAnnIds(imgIds=image_infos['id'])
            anns = self.coco.loadAnns(ann_ids)

            # Load the categories in a variable
            cat_ids = self.coco.getCatIds()
            cats = self.coco.loadCats(cat_ids)

            # masks : size가 (height x width)인 2D
            # 각각의 pixel 값에는 "category id" 할당
            # Background = 0
            masks = np.zeros((image_infos["height"], image_infos["width"]))
            # General trash = 1, ... , Cigarette = 10
            anns = sorted(anns, key=lambda idx : len(idx['segmentation'][0]), reverse=False)
            for i in range(len(anns)):
                className = get_classname(anns[i]['category_id'], cats)
                pixel_value = category_names.index(className)
                masks[self.coco.annToMask(anns[i]) == 1] = pixel_value
            masks = masks.astype(np.int8)
                        
            # transform -> albumentations 라이브러리 활용
            if self.transform is not None:
                transformed = self.transform(image=images, mask=masks)
                images = transformed["image"]
                masks = transformed["mask"]
            return images, masks, image_infos
        
        if self.mode == 'test':
            # transform -> albumentations 라이브러리 활용
            if self.transform is not None:
                transformed = self.transform(image=images)
                images = transformed["image"]
            return images, image_infos
    
    def __len__(self) -> int:
        # 전체 dataset의 size를 return
        return len(self.coco.getImgIds())

In [9]:
import albumentations as A
from albumentations.pytorch import ToTensorV2


def collate_fn(batch):
    return tuple(zip(*batch))

transform = A.Compose([
    ToTensorV2()
])

dataset_path  = '../input/data'
train_path = dataset_path + '/train.json'
val_path = dataset_path + '/val.json'

train_dataset = CustomDataLoader(data_dir=train_path, mode='train', transform=transform)
valid_dataset = CustomDataLoader(data_dir=val_path, mode='val', transform=transform)

smp_data_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                                batch_size=8,
                                                shuffle=True,
                                                num_workers=4,
                                                drop_last=True,
                                                collate_fn=collate_fn)

smp_valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset,
                                                batch_size=8,
                                                shuffle=True,
                                                num_workers=4,
                                                drop_last=True,
                                                collate_fn=collate_fn)


loading annotations into memory...
Done (t=3.72s)
creating index...
index created!
loading annotations into memory...
Done (t=2.11s)
creating index...
index created!


In [10]:
model = smp.DeepLabV3Plus(
    encoder_name='efficientnet-b7',
    encoder_weights='imagenet',
    in_channels=3,
    classes=11
)

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth" to /opt/ml/.cache/torch/hub/checkpoints/efficientnet-b7-dcc49843.pth


HBox(children=(IntProgress(value=0, max=266860719), HTML(value='')))




In [30]:
print(model)

  )
        (_bn2): BatchNorm2d(384, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_swish): MemoryEfficientSwish()
      )
      (43): MBConvBlock(
        (_expand_conv): Conv2dStaticSamePadding(
          384, 2304, kernel_size=(1, 1), stride=(1, 1), dilation=(2, 2), bias=False
          (static_padding): Identity()
        )
        (_bn0): BatchNorm2d(2304, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_depthwise_conv): Conv2dStaticSamePadding(
          2304, 2304, kernel_size=(5, 5), stride=(1, 1), padding=(4, 4), dilation=(2, 2), groups=2304, bias=False
          (static_padding): Identity()
        )
        (_bn1): BatchNorm2d(2304, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          2304, 96, kernel_size=(1, 1), stride=(1, 1), dilation=(2, 2)
          (static_padding): Identity()
        )
        (_se_e

In [12]:
x = torch.rand([2,3,512,512])
print(f'input shape : {x.shape}')
out = model(x)
print(f'output shape : {out.shape}')

input shape : torch.Size([2, 3, 512, 512])
output shape : torch.Size([2, 11, 512, 512])


In [29]:
import torch.nn.functional as F

print(out[0].shape)
print(out[0][:,:1,:1].shape)
test_pixel = out[0][:,1:2,1:2]
print(test_pixel)
print(sum(test_pixel))
print(torch.nn.Softmax(test_pixel))
print(F.softmax(test_pixel))
print(torch.sum(F.softmax(test_pixel)))

torch.Size([11, 512, 512])
torch.Size([11, 1, 1])
tensor([[[ 0.0012]],

        [[ 0.3014]],

        [[-0.3781]],

        [[-0.1941]],

        [[ 0.3033]],

        [[ 0.1312]],

        [[-0.3318]],

        [[-0.2615]],

        [[-0.3353]],

        [[ 0.6226]],

        [[-0.3456]]], grad_fn=<SliceBackward>)
tensor([[-0.4867]], grad_fn=<AddBackward0>)
Softmax(
  dim=tensor([[[ 0.0012]],
  
          [[ 0.3014]],
  
          [[-0.3781]],
  
          [[-0.1941]],
  
          [[ 0.3033]],
  
          [[ 0.1312]],
  
          [[-0.3318]],
  
          [[-0.2615]],
  
          [[-0.3353]],
  
          [[ 0.6226]],
  
          [[-0.3456]]], grad_fn=<SliceBackward>)
)
tensor([[[0.0900]],

        [[0.1214]],

        [[0.0616]],

        [[0.0740]],

        [[0.1217]],

        [[0.1024]],

        [[0.0645]],

        [[0.0692]],

        [[0.0643]],

        [[0.1674]],

        [[0.0636]]], grad_fn=<SoftmaxBackward>)
tensor(1.0000, grad_fn=<SumBackward0>)
