In [None]:
# ====================================================
# Directory settings
# ====================================================
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1' # specify GPUs locally

OUTPUT_DIR = './submission'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    
dataset_path = './data/data'
anns_file_path = dataset_path + '/' + 'train.json'

In [None]:
import os
import random
import time
import json
import warnings 
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from utils import label_accuracy_score
import cv2

import numpy as np
import pandas as pd

# 전처리를 위한 라이브러리
from pycocotools.coco import COCO
import torchvision
import torchvision.transforms as transforms

import albumentations as A
from albumentations.pytorch import ToTensorV2

# 시각화를 위한 라이브러리
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from adamp import AdamP

In [None]:
# Read annotations
with open(anns_file_path, 'r') as f:
    dataset = json.loads(f.read())

categories = dataset['categories']
anns = dataset['annotations']
imgs = dataset['images']
nr_cats = len(categories)
nr_annotations = len(anns)
nr_images = len(imgs)

# Load categories and super categories
cat_names = []
super_cat_names = []
super_cat_ids = {}
super_cat_last_name = ''
nr_super_cats = 0
for cat_it in categories:
    cat_names.append(cat_it['name'])
    super_cat_name = cat_it['supercategory']
    # Adding new supercat
    if super_cat_name != super_cat_last_name:
        super_cat_names.append(super_cat_name)
        super_cat_ids[super_cat_name] = nr_super_cats
        super_cat_last_name = super_cat_name
        nr_super_cats += 1
        
# Count annotations
cat_histogram = np.zeros(nr_cats,dtype=int)
for ann in anns:
    cat_histogram[ann['category_id']] += 1

# Convert to DataFrame
df = pd.DataFrame({'Categories': cat_names, 'Number of annotations': cat_histogram})
df = df.sort_values('Number of annotations', 0, False)

# category labeling 
sorted_temp_df = df.sort_index()

# background = 0 에 해당되는 label 추가 후 기존들을 모두 label + 1 로 설정
sorted_df = pd.DataFrame(["Backgroud"], columns = ["Categories"])
sorted_df = sorted_df.append(sorted_temp_df, ignore_index=True)

In [None]:
category_names = list(sorted_df.Categories)

def get_classname(classID, cats):
    for i in range(len(cats)):
        if cats[i]['id']==classID:
            return cats[i]['name']
    return "None"

class CustomDataLoader(Dataset):
    """COCO format"""
    def __init__(self, data_dir, mode = 'train', transform = None):
        super().__init__()
        self.mode = mode
        self.transform = transform
        self.coco = COCO(data_dir)
        
    def __getitem__(self, index: int):
        # dataset이 index되어 list처럼 동작
        image_id = self.coco.getImgIds(imgIds=index)
        image_infos = self.coco.loadImgs(image_id)[0]
        
        # cv2 를 활용하여 image 불러오기
        images = cv2.imread(os.path.join(dataset_path, image_infos['file_name']))
        images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.float32)
        
        if (self.mode in ('train', 'val')):
            ann_ids = self.coco.getAnnIds(imgIds=image_infos['id'])
            anns = self.coco.loadAnns(ann_ids)

            # Load the categories in a variable
            cat_ids = self.coco.getCatIds()
            cats = self.coco.loadCats(cat_ids)

            # masks : size가 (height x width)인 2D
            # 각각의 pixel 값에는 "category id + 1" 할당
            # Background = 0
            masks = np.zeros((image_infos["height"], image_infos["width"]))
            # Unknown = 1, General trash = 2, ... , Cigarette = 11
            for i in range(len(anns)):
                className = get_classname(anns[i]['category_id'], cats)
                pixel_value = category_names.index(className)
                masks = np.maximum(self.coco.annToMask(anns[i])*pixel_value, masks)
            masks = masks.astype(np.float32)
            # transform -> albumentations 라이브러리 활용
            if self.transform is not None:
                transformed = self.transform(image=images, mask=masks)
                images = transformed["image"]
                masks = transformed["mask"]
            
            return images, masks
        
        if self.mode == 'test':
            # transform -> albumentations 라이브러리 활용
            if self.transform is not None:
                transformed = self.transform(image=images)
                images = transformed["image"]
            
            return images, image_infos
    
    
    def __len__(self) -> int:
        # 전체 dataset의 size를 return
        return len(self.coco.getImgIds())

In [None]:
#if CFG.apex:
from torch.cuda.amp import autocast, GradScaler

In [None]:
# ====================================================
# CFG  
# ====================================================
class CFG:
    debug=False
    img_size=512
    max_len=275
    print_freq=1000
    num_workers=4
    model_name='timm-efficientnet-b4' #['timm-efficientnet-b4', 'tf_efficientnet_b0_ns']
    size=512 # [512, 1024]
    freeze_epo = 0
    warmup_epo = 1
    cosine_epo = 39 #14 #19
    warmup_factor=10
    scheduler='GradualWarmupSchedulerV2' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts', 'GradualWarmupSchedulerV2', 'get_linear_schedule_with_warmup']
    epochs=freeze_epo + warmup_epo + cosine_epo # not to exceed 9h #[1, 5, 10]
    factor=0.2 # ReduceLROnPlateau
    patience=4 # ReduceLROnPlateau
    eps=1e-6 # ReduceLROnPlateau
    T_max=4 # CosineAnnealingLR
    T_0=4 # CosineAnnealingWarmRestarts
    encoder_lr=3e-5 #[1e-4, 3e-5]
    min_lr=1e-6
    batch_size=32 + 0 #[64, 256 + 128, 512, 1024, 512 + 256 + 128, 2048]
    weight_decay=1e-6
    gradient_accumulation_steps=1
    max_grad_norm=5
    dropout=0.5
    seed=42
    smoothing=0.05
    n_fold=5
    trn_fold=[0]
    trn_fold=[0, 1, 2, 3, 4] # [0, 1, 2, 3, 4]
    train=True
    apex=False
    log_day='0504'
    model_type=model_name
    version='v1-1'
    load_state=False
    cutmix=False

In [None]:
# ====================================================
# Library
# ====================================================
import sys
#sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import os
import gc
import re
import math
import time
import random
import shutil
import pickle
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

from functools import partial

import cv2
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from warmup_scheduler import GradualWarmupScheduler
# from transformers import get_linear_schedule_with_warmup

from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

import albumentations as A

import segmentation_models_pytorch as smp

import warnings 
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# ====================================================
# Utils
# ====================================================
def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

In [None]:
from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose, Blur, GaussNoise, MotionBlur, MedianBlur, OpticalDistortion, ElasticTransform, 
    GridDistortion, IAAPiecewiseAffine, CLAHE, IAASharpen, IAAEmboss, HueSaturationValue, ToGray, JpegCompression
    )

# train.json / validation.json / test.json 디렉토리 설정
train_path = dataset_path + '/train.json'
val_path = dataset_path + '/val.json'
test_path = dataset_path + '/test.json'

# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))

train_transform = A.Compose([
            A.Rotate(p=.25, limit=(-30, 30)), 
            A.OneOf([
                A.HorizontalFlip(p=.5), 
                A.VerticalFlip(p=.5),
            ], p=1), 
            A.Cutout(num_holes=10, 
                        max_h_size=int(.1 * CFG.img_size), max_w_size=int(.1 * CFG.img_size), 
                        p=.25),
            A.ShiftScaleRotate(p=.25),
            # A.CLAHE(p=.25), 
            A.RandomResizedCrop(CFG.size, CFG.size, scale = [0.75, 1], p=1),
            A.Normalize(
                mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)
            ),
            ToTensorV2(transpose_mask=False)
        ])
    
val_transform = A.Compose([
                            A.Normalize(
                                mean=(0.485, 0.456, 0.406),
                                std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0
                            ),                           
                            ToTensorV2(transpose_mask=False)
                          ])

test_transform = A.Compose([
                            A.Normalize(
                                mean=(0.485, 0.456, 0.406),
                                std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0
                            ),    
                    ToTensorV2(transpose_mask=False)
        ])

# train dataset
train_dataset = CustomDataLoader(data_dir=train_path, mode='train', transform=train_transform)

# validation dataset
val_dataset = CustomDataLoader(data_dir=val_path, mode='val', transform=val_transform)

# test dataset
test_dataset = CustomDataLoader(data_dir=test_path, mode='test', transform=test_transform)


# DataLoader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=CFG.batch_size,
                                           num_workers=CFG.num_workers, 
                                           pin_memory=True,
                                           drop_last=True, 
                                           shuffle=True)

# v13. drop-last False 
valid_loader = torch.utils.data.DataLoader(dataset=val_dataset, 
                                         batch_size=CFG.batch_size,
                                         num_workers=CFG.num_workers, 
                                         pin_memory=True,
                                         # drop_last=True, 
                                         shuffle=False)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                         batch_size=CFG.batch_size,
                                          num_workers=CFG.num_workers,
                                          pin_memory=True,
                                          shuffle=False,
                                          collate_fn=collate_fn)

In [None]:
class Encoder(nn.Module):
    def __init__(self, model_name='timm-efficientnet-b4', pretrained=False):
        super().__init__()        
        self.encoder = smp.FPN(encoder_name=model_name, encoder_weights="noisy-student", classes=12) # [imagenet, noisy-student]
    
    #@autocast()
    def forward(self, x):
        x = self.encoder(x)
        return x

In [None]:
models = []
for fold in range(5): 
    model_path = f'./submissiond{CFG.dropout}_s{CFG.seed}_{CFG.model_name}_v1-1_fold{fold}_best.pth'
    checkpoint = torch.load(model_path, map_location=device)
    model = Encoder(CFG.model_name, pretrained=False)
    model.load_state_dict(checkpoint['encoder'])
    models += [model]

In [None]:
# 모든 이미지 출력해서 잘 나온 값들 살펴보기 
best = pd.read_csv("./submission/best.csv") # lb가 가장 좋았던 파일 (256, 256 size submission)

COLORS =[
        [0, 0, 0], # 검정 - 배경
        [129, 236, 236], # 청록 # UNKNOWN 
        [2, 132, 227], # 파랑 # 일반쓰레기 
        [232, 67, 147], # 진한분홍 # 종이 
        [255, 234, 267],# 연분홍 # 종이팩 
        [0, 184, 148], # 뚱한녹색 # 메탈 
        [85, 239, 196], # 밝은파스칼청록 # 유리 
        [48, 51, 107], # 남색~보라 # 플라스틱 
        [255, 159, 26], # 주황 # 스트로폼 
        [255, 204, 204], #연분홍 # 플라스틱 가방 
        [179, 57, 57], # 적갈색 # 배터리 
        [248, 243, 212], # 밝은 노랑 # 옷 
    ]

COLORS = np.vstack([[0, 0, 0], COLORS]).astype('uint8')

dataset_path = './data/data'
for i in range(0, best.shape[0]): 
    try: 
        fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(16, 16))
        images = cv2.imread(os.path.join(dataset_path, best.loc[i, 'image_id']))
        images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.float32) / 255

        masks = np.array(list(map(int, re.findall("\d+", best.loc[i]['PredictionString'])))).reshape(256, 256)
        # Original image
        ax1.imshow(images)
        ax1.grid(False)
        ax1.set_title("Original image : {}".format(best.loc[i, 'image_id']), fontsize = 15)

        # Predicted
        ax2.imshow(COLORS[masks])
        ax2.grid(False)
        ax2.set_title("Unique values, category of transformed mask : {} \n".format([{int(k),category_names[int(k)]} for k in list(np.unique(masks))]), fontsize = 15)
        plt.show()
    except: 
        continue

In [None]:
category_names

In [None]:
dataset_path = './data/data'
for i in range(0, best.shape[0]): 
    try: 
        fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(16, 16))
        images = cv2.imread(os.path.join(dataset_path, best.loc[i, 'image_id']))
        images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.float32) / 255

        masks = np.array(list(map(int, re.findall("\d+", best.loc[i]['PredictionString'])))).reshape(256, 256)
        # Original image
        ax1.imshow(images)
        ax1.grid(False)
        ax1.set_title("Original image : {}".format(best.loc[i, 'image_id']), fontsize = 15)

        # Predicted
        ax2.imshow(COLORS[masks])
        ax2.grid(False)
        ax2.set_title("Unique values, category of transformed mask : {} \n".format([{int(k),category_names[int(k)]} for k in list(np.unique(masks))]), fontsize = 15)
        plt.show()
    except: 
        continue

In [None]:
from skimage import measure
import numpy as np


def close_contour(contour):
    if not np.array_equal(contour[0], contour[-1]):
        contour = np.vstack((contour, contour[0]))
    return contour


def binary_mask_to_polygon(binary_mask, tolerance=0):
    """Converts a binary mask to COCO polygon representation
    Args:
        binary_mask: a 2D binary numpy array where '1's represent the object
        tolerance: Maximum distance from original points of polygon to approximated
            polygonal chain. If tolerance is 0, the original coordinate array is returned.
    """
    polygons = []
    # pad mask to close contours of shapes which start and end at an edge
    padded_binary_mask = np.pad(binary_mask, pad_width=1, mode='constant', constant_values=0)
    contours = measure.find_contours(padded_binary_mask, 0.5)
    contours = np.subtract(contours, 1)
    for contour in contours:
        contour = close_contour(contour)
        contour = measure.approximate_polygon(contour, tolerance)
        if len(contour) < 3:
            continue
        contour = np.flip(contour, axis=1)
        segmentation = contour.ravel().tolist()
        # after padding and subtracting 1 we may get -0.5 points in our segmentation 
        segmentation = [int(0) if i < 0 else int(i) for i in segmentation]
        polygons.append(segmentation)

    return polygons

# binary_mask_to_polygon의 
# temp_ann = binary_mask_to_polygon(augmented['mask'])

In [None]:
# 3271, 26401
fold_path = dataset_path + '/' + 'train_data0.json'

# Read annotations
with open(fold_path, 'r') as f:
    dataset = json.loads(f.read())
    
images = dataset['images']
annotations = dataset['annotations']
categories = dataset['categories']

In [None]:
pesudo = pd.read_csv("./submission/Pesudo.csv")
best_submission = pd.read_csv("./submission/Best.csv") # 이제까지 LB 성능이 가장 좋았던 파일 - 256, 256 으로 줄이지 않고 512, 512 유지한 상태 

In [None]:
from tqdm import notebook, tqdm_notebook

In [None]:
from tqdm import notebook, tqdm_notebook
for fold in tqdm(range(5)): 
    fold_path = dataset_path + '/' + f'train_data{fold}.json'

    # Read annotations
    with open(fold_path, 'r') as f:
        dataset = json.loads(f.read())

    images = dataset['images']
    annotations = dataset['annotations']
    categories = dataset['categories']
    
    image_dict_id = images[-1]['id']
    annotation_dict_id = annotations[-1]['id'] + 1
    for id_ in tqdm_notebook(range(pesudo.shape[0])):
        image_dict_id += 1
        images_dict = {}
        images_dict['license'] = 0
        images_dict['url'] = None
        images_dict['file_name'] = pesudo.loc[id_, 'image_id']
        images_dict['height'] = 512
        images_dict['width'] = 512
        images_dict['date_captured'] = None
        images_dict['id'] = image_dict_id
        images += [images_dict]
        for i in range(1, 11): 
            pesudo_dict = {}

            A = np.zeros((512, 512))
            mask = np.array(list(map(int, re.findall("\d+", best_submission[best_submission['image_id'] == pesudo.loc[id_, 'image_id']]['PredictionString'].values[0])))).reshape(512, 512)
            x, y = np.where(mask==i)

            L = []
            for x_, y_ in zip(x, y): 
                L += [(x_, y_)]

            if len(L) != 0: 
                idx = np.r_[L].T
                A[idx[0], idx[1]] = 1
                annotation_dict_id += 1
                pesudo_dict['id'] = annotation_dict_id
                pesudo_dict['image_id'] = image_dict_id
                pesudo_dict['category_id'] = i
                pesudo_dict['segmentation'] = binary_mask_to_polygon(A)
                pesudo_dict['area'] = 0
                pesudo_dict['bbox'] = [0, 0, 0, 0]
                pesudo_dict['iscrowd'] = 0
                annotations += [pesudo_dict]

    train_ann = {}
    train_ann['images'] =  images
    train_ann['annotations'] = annotations
    train_ann['categories'] = categories

    with open(f'train_data_pesudo{fold}.json', 'w') as f:
        json.dump(train_ann, f, indent=4)