In [1]:
# !apt-get install curl -y
# !curl 'https://bootstrap.pypa.io/get-pip.py' -o get-pip.py
# !python get-pip.py
# !pip install --upgrade pip
# !conda install nb_conda_kernels -y
# !pip install ipykernel

# virtual_environment = !conda info --envs | grep mmseg

# if len(virtual_environment) != 0:
#     print("virtual environment is already exist")
#     !conda remove -n mmseg --all --yes
#     print("removed virtual environment")
    
# !conda create -n mmseg python=3.7 -y
# !conda info --envs

# kernels = !jupyter kernelspec list

# for line in list(kernels):
#     if "mmseg" in line:
#         print(f"'mmseg' kernel is exists.")
#         !jupyter kernelspec remove mmseg -y
#         print("removed kernel")
        
# !python -m ipykernel install --user --name mmseg --display-name "mmseg"
# !jupyter kernelspec list

# print("Done.")

# You must change kernel to mmseg!! [Python 3 (ipykernel) → mmseg]

In [2]:
# import os

# # mmseg_path = '/opt/ml/mmsegmentation'
# mmseg_path = '/opt/ml/mmsegTest'

# if os.path.isdir(mmseg_path):
#     print(f"Directory is already exist.({mmseg_path})")
#     !rm -r {mmseg_path}
#     print("removed directory")

In [3]:
# !git clone https://github.com/open-mmlab/mmsegmentation.git {mmseg_path}

In [4]:
# %cd {mmseg_path}
# !pwd

In [5]:
# !conda install pytorch=1.6.0 torchvision cudatoolkit=10.1 -c pytorch -y
# !pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html
# !pip install mmcv
# !pip install mmsegmentation
# !pip install wandb
# !pip install -e .

In [6]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
import cv2
import numpy as np
from pycocotools.coco import COCO
import shutil
from tqdm import tqdm

import torch.nn as nn
import torch.optim as optim

import albumentations as A
from albumentations.pytorch import ToTensorV2

In [8]:
mmseg_path = '/opt/ml/mmsegmentation'
dataset_path  = '/opt/ml/segmentation/input/data'
category_names = ['Backgroud', 'General trash', 'Paper', 'Paper pack', 'Metal', 'Glass', 'Plastic', 'Styrofoam', 'Plastic bag', 'Battery', 'Clothing']

def get_classname(classID, cats):
    for i in range(len(cats)):
        if cats[i]['id']==classID:
            return cats[i]['name']
    return "None"

class CustomDataLoader(Dataset):
    """COCO format"""
    def __init__(self, data_dir, mode = 'train', transform = None):
        super().__init__()
        self.mode = mode
        self.transform = transform
        self.coco = COCO(data_dir)
        
    def __getitem__(self, index: int):
        # dataset이 index되어 list처럼 동작
        image_id = self.coco.getImgIds(imgIds=index)
        image_infos = self.coco.loadImgs(image_id)[0]
        
        # cv2 를 활용하여 image 불러오기
        images = cv2.imread(os.path.join(dataset_path, image_infos['file_name']))
        images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.float32)
        images /= 255.0
        
        if (self.mode in ('train', 'val')):
            ann_ids = self.coco.getAnnIds(imgIds=image_infos['id'])
            anns = self.coco.loadAnns(ann_ids)

            # Load the categories in a variable
            cat_ids = self.coco.getCatIds()
            cats = self.coco.loadCats(cat_ids)

            # masks : size가 (height x width)인 2D
            # 각각의 pixel 값에는 "category id" 할당
            # Background = 0
            masks = np.zeros((image_infos["height"], image_infos["width"]))
            # General trash = 1, ... , Cigarette = 10
            anns = sorted(anns, key=lambda idx : len(idx['segmentation'][0]), reverse=False)
            for i in range(len(anns)):
                className = get_classname(anns[i]['category_id'], cats)
                pixel_value = category_names.index(className)
                masks[self.coco.annToMask(anns[i]) == 1] = pixel_value
            masks = masks.astype(np.int8)
                        
            # transform -> albumentations 라이브러리 활용
            if self.transform is not None:
                transformed = self.transform(image=images, mask=masks)
                images = transformed["image"]
                masks = transformed["mask"]
            return images, masks, image_infos, os.path.join(dataset_path, image_infos['file_name'])
        
        if self.mode == 'test':
            # transform -> albumentations 라이브러리 활용
            if self.transform is not None:
                transformed = self.transform(image=images)
                images = transformed["image"]
            return images, image_infos, os.path.join(dataset_path, image_infos['file_name'])
    
    def __len__(self) -> int:
        # 전체 dataset의 size를 return
        return len(self.coco.getImgIds())

In [9]:
# train.json / validation.json / test.json 디렉토리 설정
train_json = 'train_1'
val_json = 'val_1'
test_json = 'test'

train_path = dataset_path + f'/{train_json}.json'
val_path = dataset_path + f'/{val_json}.json'
test_path = dataset_path + f'/{test_json}.json'

# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))

train_transform = A.Compose([ToTensorV2()])
val_transform = A.Compose([ToTensorV2()])
test_transform = A.Compose([ToTensorV2()])

train_dataset = CustomDataLoader(data_dir=train_path, mode='train', transform=train_transform)
val_dataset = CustomDataLoader(data_dir=val_path, mode='val', transform=val_transform)
test_dataset = CustomDataLoader(data_dir=test_path, mode='test', transform=test_transform)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, collate_fn=collate_fn)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, collate_fn=collate_fn)

loading annotations into memory...
Done (t=4.88s)
creating index...
index created!
loading annotations into memory...
Done (t=2.07s)
creating index...
index created!
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!


In [10]:
train_image_path = os.path.join(mmseg_path, f'data/images/{train_json}')
val_image_path = os.path.join(mmseg_path, f'data/images/{val_json}')
train_ann_path = os.path.join(mmseg_path, f'data/annotations/{train_json}')
val_ann_path = os.path.join(mmseg_path, f'data/annotations/{val_json}')
test_image_path = os.path.join(mmseg_path, f'data/images/{test_json}')
test_ann_path = os.path.join(mmseg_path, f'data/annotations/{test_json}')

dir_paths = [train_image_path, val_image_path, train_ann_path, val_ann_path, test_image_path, test_ann_path]

def makedirs(path): 
    try: 
        os.makedirs(path) 
    except OSError: 
        print(f"Directory is already exist.({path})")
        if not os.path.isdir(path): 
            raise

for path in dir_paths:
    makedirs(path)

Directory is already exist.(/opt/ml/mmsegmentation/data/images/train_1)
Directory is already exist.(/opt/ml/mmsegmentation/data/images/val_1)
Directory is already exist.(/opt/ml/mmsegmentation/data/annotations/train_1)
Directory is already exist.(/opt/ml/mmsegmentation/data/annotations/val_1)
Directory is already exist.(/opt/ml/mmsegmentation/data/images/test)
Directory is already exist.(/opt/ml/mmsegmentation/data/annotations/test)


In [11]:
def generate_mask_image(mode, data_loader, image_path, ann_path=None):
    print(f'Create {mode} data...')

    if mode == 'train' or mode == 'val':
        if ann_path==None:
            print(f'ann_path is empty')
            return
        for images, masks, image_infos, imagepath in tqdm(data_loader):
            image_file_path = os.path.join(image_path, f"{image_infos[0]['id']:04}.jpg")
            anno_file_path = os.path.join(ann_path, f"{image_infos[0]['id']:04}.png")
            
            masks = masks[0].numpy()
            shutil.copy2(imagepath[0], image_file_path)
            cv2.imwrite(anno_file_path, masks)
    elif mode == 'test':
        for images, image_infos, imagepath in tqdm(data_loader):
            image_file_path = os.path.join(image_path, f"{image_infos[0]['id']:04}.jpg")
            shutil.copy2(imagepath[0], image_file_path)
    else:
        print(f"mode = 'train' or 'val' or 'test'")

In [12]:
generate_mask_image('train', train_loader, train_image_path, train_ann_path)
generate_mask_image('val', val_loader, val_image_path, val_ann_path)
# generate_mask_image('test', test_loader, test_image_path)

print("Done.")

  0%|          | 1/2610 [00:00<04:37,  9.40it/s]

Create train data...


  0%|          | 6/2610 [00:00<03:30, 12.40it/s]

KeyError: 8