#캐글 연동

In [None]:
!pip install kaggle
from google.colab import files
files.upload()      #쿠키 미허용시 에러발생

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"hasangwook","key":"b0edc75e315e5e250242e6ea7062a6bf"}'}

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
# Permission Warning이 발생하지 않도록 해줍니다.
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c clouds-segmentation

Downloading clouds-segmentation.zip to /content
100% 2.66G/2.67G [01:07<00:00, 34.1MB/s]
100% 2.67G/2.67G [01:07<00:00, 42.6MB/s]


In [None]:
!unzip clouds-segmentation.zip 

Archive:  clouds-segmentation.zip
  inflating: sample_submission.csv   
  inflating: test/ngr/00829.png      
  inflating: test/ngr/00830.png      
  inflating: test/ngr/00831.png      
  inflating: test/ngr/00832.png      
  inflating: test/ngr/00833.png      
  inflating: test/ngr/00834.png      
  inflating: test/ngr/00835.png      
  inflating: test/ngr/00836.png      
  inflating: test/ngr/00837.png      
  inflating: test/ngr/00838.png      
  inflating: test/ngr/00839.png      
  inflating: test/ngr/00840.png      
  inflating: test/ngr/00841.png      
  inflating: test/ngr/00842.png      
  inflating: test/ngr/00843.png      
  inflating: test/ngr/00844.png      
  inflating: test/ngr/00845.png      
  inflating: test/ngr/00846.png      
  inflating: test/ngr/00847.png      
  inflating: test/ngr/00848.png      
  inflating: test/ngr/00849.png      
  inflating: test/ngr/00850.png      
  inflating: test/ngr/00851.png      
  inflating: test/ngr/00852.png      
  inflating: tes

#구글 드라이브 연동

In [None]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


#폴더 경로 설정

In [None]:
workspace_path = '/gdrive/Shareddrives/CV/clouds-segmentation'  # 파일 업로드한 경로 반영

In [None]:
import sys
sys.path.append(workspace_path)

sys.path

['',
 '/content',
 '/env/python',
 '/usr/lib/python37.zip',
 '/usr/lib/python3.7',
 '/usr/lib/python3.7/lib-dynload',
 '/usr/local/lib/python3.7/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/local/lib/python3.7/dist-packages/IPython/extensions',
 '/root/.ipython',
 '/gdrive/Shareddrives/CV/clouds-segmentation']

# 필요한 패키지 로드

In [None]:
import os
import torch
import cv2
import numpy as np
from tqdm import tqdm

# 파라미터 세팅

In [None]:
batch_size = 8
epochs = 8
device = 'cuda' if torch.cuda.is_available() else 'cpu'
patch_size = 400
patch_stride = 100
num_workers = 0

num_classes = 4
class_names = ['thick cloud', 'thin cloud', 'cloud shadow']

train_data_rate = 0.7

model_name = 'deeplabv3'

loss_func = 'dice'

# 데이터 증대

In [None]:
!pip install albumentations==0.4.6

import albumentations as A
from albumentations.pytorch import ToTensorV2

class DefaultAug:
    def __init__(self):
        self.aug = A.Compose([A.Normalize(),
                             ToTensorV2()])

    def __call__(self, img, label):
        transformed = self.aug(image=img, mask=label)
        return transformed['image'], transformed['mask']

In [None]:
val_transforms = DefaultAug()

# 모델 정의

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
import torchvision

# Model
if model_name == 'deeplabv3':
    model = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=False, progress=True, num_classes=4)
#elif model_name == 'hrnet_w18':
#    hrnet_cfg = update_config(os.path.join(workspace_path, 'models/hrnet_w18_config.yaml'))
#    model = get_seg_model(hrnet_cfg)
#elif model_name == 'hrnet_w48':
#    hrnet_cfg = update_config(os.path.join(workspace_path, 'models/hrnet_w48_config.yaml'))
#    model = get_seg_model(hrnet_cfg)
#elif model_name == 'dilated_unet':
#    model = MyDilatedConvUNet()

model.to(device)

print('number of parameters: ', count_parameters(model))

Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth


  0%|          | 0.00/171M [00:00<?, ?B/s]

number of parameters:  58626628


#최고 성능 모델 로드

In [None]:
save_path=os.path.join(workspace_path, 'ckpt')

checkpoint_path = os.path.join(save_path,'{}_best.pt'.format(model_name))
checkpoint = torch.load(checkpoint_path)

model.load_state_dict(checkpoint['model'])
model.to(device)

print('model load success')

model load success


# 데이터셋 클래스 정의

In [None]:
class CloudDataset(torch.utils.data.Dataset):
    def __init__(self, image_path, label_path, patch_size = 400, patch_stride = 100, is_train = True, cache_dir = './cache', transforms = None):
        self.image_path = image_path
        self.label_path = label_path
        self.patch_size = patch_size
        self.patch_stride = patch_stride
        self.is_train = is_train
        self.transforms = transforms
        
        self.patch_images = []
        self.patch_labels = []
        
        
        cache_dir = cache_dir
        os.makedirs(cache_dir, exist_ok=True)
        if is_train:
            for img_path in self.image_path:
                img = cv2.imread(img_path)
                img_count = 0
                for x in range(0, img.shape[0]-self.patch_size+1, self.patch_stride):
                    for y in range(0, img.shape[1]-self.patch_size+1, self.patch_stride):
                        patch_image = img[x:x+patch_size, y:y+patch_size, :].copy()
                        patch_path = f'rgb_{os.path.splitext(os.path.basename(img_path))[0]}_{img_count}.png'
                        if not os.path.isfile(os.path.join(cache_dir, patch_path)):
                            cv2.imwrite(os.path.join(cache_dir, patch_path), patch_image)
                        self.patch_images.append(os.path.join(cache_dir, patch_path))
                        img_count += 1

            for label_path in self.label_path:
                img = cv2.imread(label_path)
                img_count = 0
                for x in range(0, img.shape[0]-self.patch_size+1, self.patch_stride):
                    for y in range(0, img.shape[1]-self.patch_size+1, self.patch_stride):
                        patch_image = img[x:x+patch_size, y:y+patch_size, :].copy()
                        patch_path = f'label_{os.path.splitext(os.path.basename(label_path))[0]}_{img_count}.png'
                        if not os.path.isfile(os.path.join(cache_dir, patch_path)):
                            cv2.imwrite(os.path.join(cache_dir, patch_path), patch_image)
                        self.patch_labels.append(os.path.join(cache_dir, patch_path))
                        img_count += 1
        else:
            self.patch_images = self.image_path
            self.patch_labels = self.label_path
    def __len__(self):
        return len(self.patch_images)
        
    def __getitem__(self, idx):
        img = cv2.imread(self.patch_images[idx])

        if self.is_train:
            label = cv2.imread(self.patch_labels[idx])
            # numpy arrays to tensors
            h, w = label.shape[:2]
        
            target = np.zeros((h, w), dtype=np.uint8)
            pos = np.where(np.all(label == [0, 0, 255], axis=-1))  # thick cloud
            target[pos] = 1
            pos = np.where(np.all(label == [0, 255, 0], axis=-1))  # thin cloud
            target[pos] = 2
            pos = np.where(np.all(label == [0, 255, 255], axis=-1))  # cloud shadow
            target[pos] = 3
        else:
            target = img
        
        if self.transforms is not None:
            img, target = self.transforms(img, target)
            
        if self.is_train:
            return img, target
        else:
            return img, self.patch_images[idx]

#테스트 데이터셋 정의

In [None]:
test_rgb_path = os.path.join(workspace_path, 'test/rgb')
test_rgb_images = os.listdir(test_rgb_path)
test_rgb_images = [os.path.join(test_rgb_path, x) for x in test_rgb_images]

In [None]:
#empty value
test_label_path = os.path.join(workspace_path, 'test/label')
try:
    test_label_images = os.listdir(test_label_path)
except:
    test_label_images = []
test_label_images = [os.path.join(test_label_path, x) for x in test_label_images]

In [None]:
test_dataset = CloudDataset(test_rgb_images, test_label_images,
                            transforms=val_transforms, is_train=False)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False,
                                               num_workers=num_workers, pin_memory=True, drop_last=True)

# 테스트 결과 저장

In [None]:
model.eval()

result_path = os.path.join(workspace_path, 'results')
os.makedirs(result_path, exist_ok=True)

with torch.no_grad():
    pbar = tqdm(enumerate(test_dataloader), total=len(test_dataloader))
    for i, (imgs, img_path) in pbar:
        imgs = imgs.to(device)
        if model_name == 'deeplabv3':
            preds = model(imgs)['out']
        elif model_name == 'hrnet_w18' or model_name == 'hrnet_w48':
            preds = model(imgs)
            h, w = preds.shape[2], preds.shape[3]
        elif model_name == 'dilated_unet':
            preds = model(imgs)
        
        pred_img = np.zeros((*list(preds.shape[2:]), 3), dtype=np.uint8)
        _, idx = preds.squeeze(0).max(0)
        pos = idx == 0
        pred_img[pos.cpu().numpy()] = [0, 0, 0]
        pos = idx == 1
        pred_img[pos.cpu().numpy()] = [0, 0, 255]
        pos = idx == 2
        pred_img[pos.cpu().numpy()] = [0, 255, 0]
        pos = idx == 3
        pred_img[pos.cpu().numpy()] = [0, 255, 255]
        
        cv2.imwrite(os.path.join(result_path, os.path.basename(img_path[0])), pred_img)

100%|██████████| 208/208 [06:42<00:00,  1.94s/it]


# 결과 시각화

In [None]:
import matplotlib.pyplot as plt

# Run-Length Encoding

In [None]:
import pandas as pd

In [None]:
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formatted
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
test_label_file_list = os.listdir(result_path)
test_label_path_list = [os.path.join(result_path, x) for x in test_label_file_list]

In [None]:
rle_list = []
for file_path in test_label_path_list:
    img = cv2.imread(file_path)
    rle = mask2rle(img)
    rle_list.append(rle)

In [None]:
my_dict = {'Image_Label':test_label_file_list, 'EncodedPixels':rle_list}

In [None]:
my_df = pd.DataFrame(my_dict)

In [None]:
my_df.to_csv(os.path.join(workspace_path, 'submission.csv'), index=False)

In [None]:
pip list

Package                       Version
----------------------------- ------------------------------
absl-py                       1.0.0
alabaster                     0.7.12
albumentations                0.4.6
altair                        4.2.0
appdirs                       1.4.4
argon2-cffi                   21.3.0
argon2-cffi-bindings          21.2.0
arviz                         0.12.1
astor                         0.8.1
astropy                       4.3.1
astunparse                    1.6.3
atari-py                      0.2.9
atomicwrites                  1.4.0
attrs                         21.4.0
audioread                     2.1.9
autograd                      1.4
Babel                         2.10.1
backcall                      0.2.0
beautifulsoup4                4.6.3
bleach                        5.0.0
blis                          0.4.1
bokeh                         2.3.3
Bottleneck                    1.3.4
branca                        0.5.0
bs4                           0.0