<a href="https://colab.research.google.com/github/heokwon/Data-Handling-for-segmentation/blob/main/HAP_Dataset0823.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dataset Handling (rle2mask)

rle : 3000x3000 픽셀 정보를 압축하여 메모리를 줄이는데 사용

ex) id:10044 / rle : 1459676 77 1462675 82

1459676 > 처음 1(mask)값이 시작하는 지점 ,77 > 1개수(끝지점)

1462675 > 다음 1값이 시작하는 지점(1459676 에서 +77 후, 1462675 위치까지 0)

## Load Dataset

In [None]:
!gdown  # 3000x3000 원본데이터셋

Downloading...
From: https://drive.google.com/uc?id=1VnXyPKFiRL5Cvmw3UXLIK6Id93vjgKW5
To: /content/hubmap-organ-segmentation.zip
100% 6.20G/6.20G [01:13<00:00, 83.8MB/s]


In [None]:
!unzip /content/hubmap-organ-segmentation.zip

In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

## Read Csv

In [None]:
df=pd.read_csv('/content/train.csv')
df

Unnamed: 0,id,organ,data_source,img_height,img_width,pixel_size,tissue_thickness,rle,age,sex
0,10044,prostate,HPA,3000,3000,0.4,4,1459676 77 1462675 82 1465674 87 1468673 92 14...,37.0,Male
1,10274,prostate,HPA,3000,3000,0.4,4,715707 2 718705 8 721703 11 724701 18 727692 3...,76.0,Male
2,10392,spleen,HPA,3000,3000,0.4,4,1228631 20 1231629 24 1234624 40 1237623 47 12...,82.0,Male
3,10488,lung,HPA,3000,3000,0.4,4,3446519 15 3449517 17 3452514 20 3455510 24 34...,78.0,Male
4,10610,spleen,HPA,3000,3000,0.4,4,478925 68 481909 87 484893 105 487863 154 4908...,21.0,Female
...,...,...,...,...,...,...,...,...,...,...
346,9517,kidney,HPA,3000,3000,0.4,4,1611763 11 1614753 29 1617750 35 1620746 43 16...,61.0,Male
347,9769,kidney,HPA,3070,3070,0.4,4,4030400 28 4033466 34 4036526 48 4039594 54 40...,28.0,Male
348,9777,largeintestine,HPA,3000,3000,0.4,4,538473 13 541468 22 544463 30 547461 35 550459...,84.0,Male
349,9791,kidney,HPA,3000,3000,0.4,4,334733 33 337729 43 340729 43 343725 51 346723...,28.0,Male


## Mask2rle

In [None]:
# def rle_encode_less_memory(img):
#     pixels = img.T.flatten()
#     pixels[0] = 0
#     pixels[-1] = 0
#     runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
#     runs[1::2] -= runs[::2]
#     return ' '.join(str(x) for x in runs)

## rleToMask

In [None]:
def rleToMask(rleString,height,width,class_num):
  rows,cols = height,width
  rleNumbers = [int(numstring) for numstring in rleString.split(' ')]
  rlePairs = np.array(rleNumbers).reshape(-1,2)
  img = np.zeros(rows*cols,dtype=np.uint8)
  for index,length in rlePairs:
    index -= 1
    img[index:index+length] = class_num
  img = img.reshape(cols,rows)
  img = img.T
  return img

### Binary Dataset

In [None]:
!mkdir train_labels

In [None]:
for idx in range(len(df)):
    img_id = df.loc[idx]['id']
    img_h = df.loc[idx]['img_height']
    img_w = df.loc[idx]['img_width']
    img_rle = df.loc[idx]['rle']
    
    mask_img = rleToMask(img_rle,img_h,img_w,1)
    mask_img = Image.fromarray(mask_img)
    mask_img.save(f'/content/train_labels/{img_id}.png')

### Multi Dataset

In [None]:
!mkdir mask_img

In [None]:
for idx in range(len(df)):
    img_id = df.loc[idx]['id']
    img_h = df.loc[idx]['img_height']
    img_w = df.loc[idx]['img_width']
    img_rle = df.loc[idx]['rle']
    class_name = df.loc[idx]['organ']

    if class_name == 'kidney':
      
      mask_img = rleToMask(img_rle,img_h,img_w,1)

    elif class_name == 'prostate':

      mask_img = rleToMask(img_rle,img_h,img_w,2)

    elif class_name == 'largeintestine':

      mask_img = rleToMask(img_rle,img_h,img_w,3)
      
    elif class_name == 'spleen':

      mask_img = rleToMask(img_rle,img_h,img_w,4)

    elif class_name == 'lung':

      mask_img = rleToMask(img_rle,img_h,img_w,5)  

    mask_img = Image.fromarray(mask_img)
    mask_img.save(f'/content/mask_img/{img_id}.png')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Inference

In [None]:
import os
import natsort

img_path='/content/512x512_Binary/512x512_images/'
mask_path='/content/MultiDataset/512x512_labels/'
img_lst=os.listdir(img_path)
mask_lst=os.listdir(mask_path)
img_lst=natsort.natsorted(img_lst)
mask_lst=natsort.natsorted(mask_lst)

for img,mask in zip(img_lst, mask_lst):
    img_name=img.split('.')[0]
    mask_name=mask.split('.')[0]
    img_dir=os.path.join(img_path,img)
    mask_dir=os.path.join(mask_path,mask)
    
    image=Image.open(img_dir)
    mask_img=Image.open(mask_dir)
    plt.figure(figsize=(20, 20))
    plt.subplot(1,2,1)
    plt.imshow(image)
    plt.subplot(1,2,2)
    plt.imshow(mask_img)
    print(img_name,mask_name )
    plt.show()

In [None]:
!mv /content/512x512_Multi.zip /content/drive/MyDrive/data/MMSeg_zip

### Test

In [None]:
test_img = Image.open('/content/train_labels/10044.png')
plt.imshow(test_img)

## Resize

In [None]:
!mkdir Binary_512x512_Crop
!mkdir /content/Binary_512x512_Crop/images
!mkdir /content/Binary_512x512_Crop/labels

In [None]:
import os
from PIL import Image
from tqdm.notebook import tqdm

def resizing_jpg(src,lst,size,des):
    for img in lst:
        name = img.split('.')[0]
        dir=os.path.join(src,img)
        img_re = Image.open(dir)
        img_re = img_re.resize((size, size))
        img_re.save(des+name+'.jpg')

In [None]:
def resizing_png(src,lst,size,des):
    for img in tqdm(lst):
        name = img.split('.')[0]
        dir=os.path.join(src,img)
        img_re = Image.open(dir)
        img_re = img_re.resize((size, size))
        img_re.save(des+name+'.png')

In [None]:
src='/content/crop_dataset/images/'
lst=os.listdir(src)
size=512
des='/content/Binary_512x512_Crop/images/'
resizing_jpg(src,lst,size,des)

In [None]:
label_src='/content/crop_dataset/labels/'
label_lst=os.listdir(label_src)
size=512
label_des='/content/Binary_512x512_Crop/labels/'

resizing_png(label_src,label_lst,size,label_des)

  0%|          | 0/351 [00:00<?, ?it/s]

In [None]:
!zip Binary_512x512_Crop.zip -r ./Binary_512x512_Crop

In [None]:
!mv /content/Binary_512x512_Crop.zip /content/drive/MyDrive/MMSeg_data

## Zip, Move

In [None]:
#@title
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#@title
!mkdir 512x512_Binary

!cp /content/sample_submission.csv /content/512x512_Binary
!cp /content/train.csv /content/512x512_Binary
!cp /content/test.csv /content/512x512_Binary

In [None]:
import shutil
# src1='/content/512x512_images'
src2='/content/512x512_labels'
des='/content/512x512_Binary'
# shutil.move(src1,des)
shutil.move(src2,des)

In [None]:
#@title
!zip 512x512_Binary.zip -r ./512x512_Binary
!mv /content/512x512_Binary.zip /content/drive/MyDrive/data/MMSeg_zip

## Crop

In [None]:
#@title
!mkdir crop_dataset
!mkdir /content/crop_dataset/images
!mkdir /content/crop_dataset/labels

In [None]:
img_path = '/content/train_images/'
mask_path = '/content/train_labels/'

img_name_list = os.listdir(img_path)

for img in tqdm(img_name_list):
    name = img.split('.')[0]
    mask_img = name + '.png'

    origin_img = Image.open(img_path + img)
    mask_img = Image.open(mask_path + mask_img)

    h, w = origin_img.size
    crop_start_x = (h - 2048) / 2
    crop_start_y = (w - 2048) / 2
    crop_end_x = 2048 + crop_start_x
    crop_end_y = 2048 + crop_start_y

    crop_img = origin_img.crop((crop_start_x, crop_start_y, crop_end_x, crop_end_y))
    crop_mask = mask_img.crop((crop_start_x, crop_start_y, crop_end_x, crop_end_y))

    crop_img.save(f'/content/crop_dataset/images/{name}.jpg')
    crop_mask.save(f'/content/crop_dataset/labels/{name}.png')

  0%|          | 0/351 [00:00<?, ?it/s]

In [None]:
#@title
!zip 2048x2048_Binary_Crop.zip -r ./crop_dataset

In [None]:
#@title
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#@title
!mv /content/2048x2048_Binary_Crop.zip /content/drive/MyDrive/MMSeg_data

## 2048x2048 -> 512x512 16개 (Crop)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!gdown 

Downloading...
From: https://drive.google.com/uc?id=1nqxW7dxlyoCZcwz8ht4Gx2Gttu7qQUqq
To: /content/Crop_2048x2048_Multi.zip
100% 320M/320M [00:01<00:00, 169MB/s]


In [None]:
!unzip /content/Crop_2048x2048_Multi.zip

In [None]:
!rm -rf Crop_512x512_Multi

In [None]:
!mkdir Crop_512x512_Multi
!mkdir /content/Crop_512x512_Multi/images
!mkdir /content/Crop_512x512_Multi/labels

In [None]:
!rm -rf /content/Crop_512x512_Multi/labels
!mkdir /content/Crop_512x512_Multi/labels

### 1

In [None]:
from PIL import Image
from itertools import product

points = product(range(0,4),range(0,4))
for x ,y in points: 
    # print(x,y)
    min_x = x*512
    max_x = (x+1)*512
    min_y = y*512
    max_y = (y+1)*512
    area=(min_x,min_y,max_x,max_y)
    print(area)

(0, 0, 512, 512)
(0, 512, 512, 1024)
(0, 1024, 512, 1536)
(0, 1536, 512, 2048)
(512, 0, 1024, 512)
(512, 512, 1024, 1024)
(512, 1024, 1024, 1536)
(512, 1536, 1024, 2048)
(1024, 0, 1536, 512)
(1024, 512, 1536, 1024)
(1024, 1024, 1536, 1536)
(1024, 1536, 1536, 2048)
(1536, 0, 2048, 512)
(1536, 512, 2048, 1024)
(1536, 1024, 2048, 1536)
(1536, 1536, 2048, 2048)


In [None]:
import os
import natsort
from PIL import Image
from itertools import product
from tqdm.notebook import tqdm

img_path = '/content/crop_dataset/images/'
img_lst = os.listdir(img_path)
img_des = '/content/Crop_512x512_Multi/images/'
img_lst = natsort.natsorted(img_lst)

def crop512x512(src,lst,des,size):
    for img in lst:
        name = img.split('.')[0]
        img_dir=os.path.join(src,img)
        img_crop=Image.open(img_dir)
        
        points = product(range(0,4),range(0,4))
        for x ,y in points: 

            min_x = x*size
            max_x = (x+1)*size
            min_y = y*size
            max_y = (y+1)*size
            area=(min_x,min_y,max_x,max_y)
            img_crop2=img_crop.crop(area)
            img_crop2.save(des+f'{name}_{x}_{y}.png')

In [None]:
crop512x512(img_path,img_lst,img_des,512)

In [None]:
len(os.listdir('/content/Crop_512x512_Multi/images'))

5616

In [None]:
mask_src='/content/crop_dataset/masks/'
mask_lst=os.listdir(mask_src)
mask_lst=natsort.natsorted(mask_lst)
mask_des='/content/Crop_512x512_Multi/labels/'
size=512

crop512x512(mask_src,mask_lst,mask_des,size)

In [None]:
len(os.listdir('/content/Crop_512x512_Multi/labels'))

5616

In [None]:
!zip Crop_512x512_Multi.zip -r ./Crop_512x512_Multi
!mv /content/Crop_512x512_Multi.zip /content/drive/MyDrive/MMSeg_data

### 2

In [None]:
def crop_img(src,lst,des,d):  
    for img in lst:
        name = img.split('.')[0]
        img_dir=os.path.join(src,img)
        img_crop=Image.open(img_dir)
        w,h = img_crop.size
        
        points = product(range(0, h-h%d, d), range(0, w-w%d, d))
        for x, y in points:
            box = (y, x, y+d, x+d)
            out = os.path.join(des, f'{name}_{x}_{y}.png')
            img_crop.crop(box).save(out)

In [None]:
crop_img('/content/crop_dataset/images',os.listdir('/content/crop_dataset/images/'),512)

In [None]:
crop_img('/content/crop_dataset/masks/',os.listdir('/content/crop_dataset/masks'),'/content/Crop_512x512_Multi/labels/',512)

In [None]:
len(os.listdir('/content/Crop_512x512_Multi/images/')),len(os.listdir('/content/Crop_512x512_Multi/labels/'))

(5616, 5616)

In [None]:
import matplotlib.pyplot as plt
for test_i in os.listdir('/content/Crop_512x512_Multi/labels/'):
    dir=os.path.join('/content/Crop_512x512_Multi/labels/',test_i)
    test_img=Image.open(dir)
    plt.imshow(test_img)
    plt.show()

In [None]:
!zip Crop_512x512_Multi.zip -r ./Crop_512x512_Multi
!mv /content/Crop_512x512_Multi.zip /content/drive/MyDrive/MMSeg_data

In [None]:
import os
import natsort
import matplotlib.pyplot as plt

img_path='/content/Crop_512x512_Multi/images'
mask_path='/content/Crop_512x512_Multi/labels'
img_lst=os.listdir(img_path)
mask_lst=os.listdir(mask_path)
img_lst=natsort.natsorted(img_lst)
mask_lst=natsort.natsorted(mask_lst)

for img,mask in zip(img_lst, mask_lst):
    img_name=img.split('.')[0]
    mask_name=mask.split('.')[0]
    img_dir=os.path.join(img_path,img)
    mask_dir=os.path.join(mask_path,mask)
    
    image=Image.open(img_dir)
    mask_img=Image.open(mask_dir)
    plt.figure(figsize=(20, 20))
    plt.subplot(1,2,1)
    plt.imshow(image)
    plt.subplot(1,2,2)
    plt.imshow(mask_img)
    print(img_name,mask_name )
    plt.show()

## 마스크 없는 데이터 제거하기

In [None]:
!gdown  ## Crop_512x512_Multi_5616

Downloading...
From: https://drive.google.com/uc?id=1c3Kzax50bDqxE7eArM4FPaUWcWAlFGqj
To: /content/Crop_512x512_Multi_5616.zip
100% 326M/326M [00:02<00:00, 149MB/s]


In [None]:
#@title
!rm -rf /content/Crop_512x512_Multi_3000

In [None]:
#@title
!mkdir Crop_512x512_Multi_3000
!mkdir Crop_512x512_Multi_3000/labels
!mkdir Crop_512x512_Multi_3000/images

In [None]:
import numpy as np
import shutil
image_src='/content/Crop_512x512_Multi/images/'
image_des='/content/Crop_512x512_Multi_3000/images/'
masks_src='/content/Crop_512x512_Multi/labels/'
masks_des='/content/Crop_512x512_Multi_3000/labels/'

for mask in os.listdir(masks_src):
    
    dir=os.path.join(masks_src,mask)
    save_mask=Image.open(dir)
    save_mask=np.array(save_mask)

    if np.max(save_mask) != 0:
        mask_name=mask
        shutil.copy(masks_src+mask,masks_des+mask)
    else:
        pass

In [None]:
for img, mask in zip(os.listdir(image_src),os.listdir(masks_des)):
    name = mask.split('.')[0]
    image_name = name + '.jpg'
    img = image_name
    shutil.copy(image_src+img,image_des+img)

In [None]:
len(os.listdir(image_des)),len(os.listdir(masks_des))

(3113, 3112)

In [None]:
#@title
!mv /content/Crop_512x512_Multi_3000 /content/Crop_512x512_Multi_3112

In [None]:
#@title
!zip Crop_512x512_Multi_3112.zip -r ./Crop_512x512_Multi_3112

In [None]:
#@title
!mv /content/Crop_512x512_Multi_3112.zip /content/drive/MyDrive/MMSeg_data

## Crop 256,256 -> Resize 512,512

In [None]:
!gdown 
!unzip /content/Crop_2048x2048_Multi.zip

Downloading...
From: https://drive.google.com/uc?id=1nqxW7dxlyoCZcwz8ht4Gx2Gttu7qQUqq
To: /content/Crop_2048x2048_Multi.zip
100% 320M/320M [00:02<00:00, 151MB/s]


In [None]:
import os
from PIL import Image
from itertools import product

def crop_img(src,lst,des,d):  
    for img in lst:
        name = img.split('.')[0]
        img_dir=os.path.join(src,img)
        img_crop=Image.open(img_dir)
        w,h = img_crop.size
        
        points = product(range(0, h-h%d, d), range(0, w-w%d, d))
        for x, y in points:
            box = (y, x, y+d, x+d)
            out = os.path.join(des, f'{name}_{x}_{y}.jpg')
            img_crop.crop(box).save(out)

In [None]:
#@title
!mkdir Crop_256x256_Multi
!mkdir /content/Crop_256x256_Multi/images
!mkdir /content/Crop_256x256_Multi/labels

In [None]:
#@title
!rm -rf /content/Crop_256x256_Multi/images
!mkdir /content/Crop_256x256_Multi/images

In [None]:
img_src='/content/crop_dataset/images/'
img_lst=os.listdir(img_src)
des='/content/Crop_256x256_Multi/images/'

crop_img(img_src,img_lst,des,256)

In [None]:
mask_src='/content/crop_dataset/masks/'
mask_lst=os.listdir(mask_src)
maks_des='/content/Crop_256x256_Multi/labels/'

crop_img(mask_src,mask_lst,maks_des,256)

In [None]:
!mkdir Resize_256to512_Multi
!mkdir /content/Resize_256to512_Multi/images
!mkdir /content/Resize_256to512_Multi/labels

In [None]:
len(os.listdir(des)),len(os.listdir(maks_des))

(22464, 22464)

In [None]:
## 마스크 없는 파일 제거
import numpy as np
import shutil

masks_src='/content/Crop_256x256_Multi/labels/'
masks_des='/content/Resize_256to512_Multi/labels/'

for mask in os.listdir(masks_src):
    
    dir=os.path.join(masks_src,mask)
    save_mask=Image.open(dir)
    save_mask=np.array(save_mask)

    if np.max(save_mask) != 0:
        mask_name=mask
        shutil.copy(masks_src+mask,masks_des+mask) 
        pass

In [None]:
image_src='/content/Crop_256x256_Multi/images/'
image_des='/content/Resize_256to512_Multi/images/'
for img_, mask in zip(os.listdir(image_src),os.listdir(masks_des)):
    name = mask.split('.')[0]
    image_name = name + '.jpg'
    img_ = image_name
    shutil.copy(image_src+img_,image_des+img_)

In [None]:
len(os.listdir(image_des)),len(os.listdir(masks_des))

(8763, 8763)

In [None]:
!mv /content/Crop_256x256_Multi /content/Crop_256x256_Multi_22464
!mv /content/Resize_256to512_Multi /content/Crop_256x256_Multi
!mv /content/Crop_256x256_Multi /content/Crop_256x256_Multi_8763

In [None]:
!zip Crop_256x256_Multi_8763.zip -r ./Crop_256x256_Multi_8763

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!mv /content/Crop_256x256_Multi_8763.zip /content/drive/MyDrive/data/MMSeg_zip

In [None]:
def resizing_jpg(src,lst,size,des):
    for img in lst:
        name = img.split('.')[0]
        dir=os.path.join(src,img)
        img_re = Image.open(dir)
        img_re = img_re.resize((size, size))
        img_re.save(des+name+'.jpg')

In [None]:
def resizing_png(src,lst,size,des):
    for img in lst:
        name = img.split('.')[0]
        dir=os.path.join(src,img)
        img_re = Image.open(dir)
        img_re = img_re.resize((size, size))
        img_re.save(des+name+'.png')

In [None]:
!mkdir ResizeCrop_256to512_Multi
!mkdir /content/ResizeCrop_256to512_Multi/images
!mkdir /content/ResizeCrop_256to512_Multi/labels

In [None]:
im_src='/content/Crop_256x256_Multi_8763/images/'
im_lst=os.listdir(im_src)
size=512
im_des='/content/ResizeCrop_256to512_Multi/images/'
resizing_jpg(im_src,im_lst,size,im_des)

In [None]:
la_src='/content/Crop_256x256_Multi_8763/labels/'
la_lst=os.listdir(la_src)
size=512
la_des='/content/ResizeCrop_256to512_Multi/labels/'

resizing_png(la_src,la_lst,size,la_des)

In [None]:
len(os.listdir(im_des)),len(os.listdir(la_des))

(8763, 8763)

In [None]:
## 리사이징 됬는지 테스트
for i in os.listdir(la_des):
    im_dir=os.path.join(la_des,i)
    im_shape=Image.open(im_dir)
    print(im_shape.size)

In [None]:
!zip ResizeCrop_256to512_Multi_8763.zip -r ./ResizeCrop_256to512_Multi

In [None]:
!mv /content/ResizeCrop_256to512_Multi_8763.zip /content/drive/MyDrive/data/MMSeg_zip

## 라벨별 편향 줄이기

In [None]:
!gdown  ## 256to512 데이터셋
!gdown  ## 512x512 crop 3112 데이터셋
!gdown  ## 512x512 convert 데이터셋

Downloading...
From: https://drive.google.com/uc?id=1-538FH0Du7gXM9ko-9WCUe4PU7GKoidM
To: /content/ResizeCrop_256to512_Multi_8763.zip
100% 314M/314M [00:01<00:00, 191MB/s]


In [None]:
#@title
!unzip /content/ResizeCrop_256to512_Multi_8763.zip
!unzip /content/Crop_512x512_Multi_3112.zip
!unzip /content/Multi_512x512_MMSeg.zip

In [None]:
#@title
!mkdir Convert_512x512_Multi_351
!mv /content/train /content/images
!mv /content/masks /content/labels
!mv /content/images /content/Convert_512x512_Multi_351
!mv /content/labels /content/Convert_512x512_Multi_351

In [None]:
#@title
!rm -rf /content/ResizeCrop_256to512_Multi
!unzip /content/ResizeCrop_256to512_Multi_8763.zip

In [None]:
import os
import numpy as np
from PIL import Image
from tqdm.notebook import tqdm
import shutil

In [None]:
## png2jpg
png_path='/content/Convert_512x512_Multi_351/png/'
for png in os.listdir(png_path):
    png_dir=os.path.join(png_path,png)
    png2jpg=Image.open(png_dir)
    png2jpg.save(path1+png[:-4]+'.jpg')

In [None]:
#@title
!mv /content/Convert_512x512_Multi_351/images /content/Convert_512x512_Multi_351/png
!mkdir /content/Convert_512x512_Multi_351/images

In [None]:
#@title
!rm -rf /content/test_images
!rm -rf /content/test_labels
!mkdir test_images
!mkdir test_labels

In [None]:
src_path='/content/ResizeCrop_256to512_Multi/images/'
test_path='/content/test_images/'
for name in os.listdir(src_path):
    name_dir=os.path.join(src_path,name)
    change_name=Image.open(name_dir)
    change_name.save(test_path+'a'+name)

In [None]:
msrc_path='/content/ResizeCrop_256to512_Multi/labels/'
mtest_path='/content/test_labels/'
for m_name in os.listdir(msrc_path):
    m_name_dir=os.path.join(msrc_path,m_name)
    change_m_name=Image.open(m_name_dir)
    change_m_name.save(mtest_path+'a'+m_name)

In [None]:
len(os.listdir(test_path)), len(os.listdir(mtest_path))

(8763, 8763)

In [None]:
def sum_label(src,des,class_num):
    for img in tqdm(os.listdir(src)):
        img_dir=os.path.join(src,img)
        img_sum=Image.open(img_dir)
        img_sum=np.array(img_sum)
    
        if img_sum.max() == class_num:
            shutil.copy(src+img,des+img)

In [None]:
#@title
!rm -rf /content/test

In [None]:
#@title
!rm -rf /content/sum_data
!mkdir sum_data
!mkdir /content/sum_data/kidney
!mkdir /content/sum_data/prostate
!mkdir /content/sum_data/largeintestine
!mkdir /content/sum_data/spleen
!mkdir /content/sum_data/lung

In [None]:
#@title
!rm -rf /content/Convert_512x512_Multi_351/image_src
!rm -rf /content/Crop_512x512_Multi_3112/image_src
!rm -rf /content/ResizeCrop_256to512_Multi/image_src
!rm -rf /content/Convert_512x512_Multi_351/label_src
!rm -rf /content/Crop_512x512_Multi_3112/label_src
!rm -rf /content/ResizeCrop_256to512_Multi/label_src

In [None]:
#@title
!mkdir /content/Convert_512x512_Multi_351/image_src
!mkdir /content/Crop_512x512_Multi_3112/image_src
!mkdir /content/ResizeCrop_256to512_Multi/image_src
!mkdir /content/Convert_512x512_Multi_351/label_src
!mkdir /content/Crop_512x512_Multi_3112/label_src
!mkdir /content/ResizeCrop_256to512_Multi/label_src

In [None]:
#@title
!rm -rf /content/Sum_labels/images
!mkdir /content/Sum_labels/images

In [None]:
src1='/content/Convert_512x512_Multi_351/labels/' # 1,3,4,5
src2='/content/Crop_512x512_Multi_3112/labels/' # 1,2,3,4,5
src3='/content/test_labels/' # 5
des1='/content/Convert_512x512_Multi_351/label_src/'
des2='/content/Crop_512x512_Multi_3112/label_src/'
des3='/content/ResizeCrop_256to512_Multi/label_src/'
sum_label(src1,des1,1)
sum_label(src1,des1,3)
sum_label(src1,des1,4)
sum_label(src1,des1,5)
sum_label(src2,des2,1)
sum_label(src2,des2,2)
sum_label(src2,des2,3)
sum_label(src2,des2,4)
sum_label(src2,des2,5)
sum_label(src3,des3,5)

In [None]:
len(os.listdir(des1)),len(os.listdir(des2)),len(os.listdir(des3))

(1305, 3112, 549)

In [None]:
def moveimgfile(image_src,masks_src,image_des):
    for img_, mask in zip(os.listdir(image_src),os.listdir(masks_src)):
        name = mask.split('.')[0]
        image_name = name + '.jpg'
        img_ = image_name
        # print(img_)
        shutil.copy(image_src+img_,image_des+img_)

In [None]:
img_src1='/content/Convert_512x512_Multi_351/images/'
img_src2='/content/Crop_512x512_Multi_3112/images/'
img_src3='/content/test_images/'
mask_src1='/content/Convert_512x512_Multi_351/label_src/'
mask_src2='/content/Crop_512x512_Multi_3112/label_src/'
mask_src3='/content/ResizeCrop_256to512_Multi/label_src/'
img_des='/content/Sum_labels/images/'
moveimgfile(img_src1,mask_src1,img_des)
moveimgfile(img_src2,mask_src2,img_des)
moveimgfile(img_src3,mask_src3,img_des)

In [None]:
len(os.listdir(img_des)),len(os.listdir('/content/Sum_labels/labels'))

(4966, 4966)

In [None]:
!rm -rf /content/sum_data/spleen
!mkdir /content/sum_data/spleen

In [None]:
len(os.listdir('/content/Convert_512x512_Multi_351/images/'))

2895

In [None]:
src1='/content/Convert_512x512_Multi_351/labels/'
des1='/content/sum_data/kidney/'
sum_label(src1,des1,1)
src2='/content/Crop_512x512_Multi_3112/labels/'
des2='/content/sum_data/kidney/'
sum_label(src2,des2,1)
src3='/content/test_labels/'
des3='/content/sum_data/spleen/'
sum_label(src3,des3,4)
src4='/content/Crop_512x512_Multi_3112/labels/'
des4='/content/sum_data/spleen/'
sum_label(src4,des4,4)
src5='/content/Crop_512x512_Multi_3112/labels/'
des5='/content/sum_data/lung/'
sum_label(src5,des5,5)

  0%|          | 0/2895 [00:00<?, ?it/s]

  0%|          | 0/3112 [00:00<?, ?it/s]

In [None]:
len(os.listdir('/content/sum_data/kidney/')),len(os.listdir('/content/sum_data/prostate/')),len(os.listdir('/content/sum_data/largeintestine/')),len(os.listdir('/content/sum_data/spleen/')),len(os.listdir('/content/sum_data/lung/'))

(966, 1053, 1219, 732, 996)

In [None]:
!mkdir Sum_labels
!mkdir /content/Sum_labels/images
!mkdir /content/Sum_labels/labels

In [None]:
des1='/content/sum_data/kidney/'
des2='/content/sum_data/prostate/'
des3='/content/sum_data/largeintestine/'
des4='/content/sum_data/spleen/'
des5='/content/sum_data/lung/'

In [None]:
des_path_='/content/Sum_labels/labels/'
def movefile(des,des_):
    for i in os.listdir(des):
        shutil.copy(des+i,des_)

In [None]:
movefile(des1,des_path_)
movefile(des2,des_path_)
movefile(des3,des_path_)
movefile(des4,des_path_)
movefile(des5,des_path_)

In [None]:
len(os.listdir(des_path_))

4966

In [None]:
def moveimgfile(image_src,masks_src,image_des):
    for img_, mask in zip(os.listdir(image_src),os.listdir(masks_src)):
        name = mask.split('.')[0]
        image_name = name + '.jpg'
        img_ = image_name
        # print(img_)
        shutil.copy(image_src+img_,image_des+img_)

In [None]:
!mv /content/Sum_labels /content/Multi_512x512_SumLabels_4966

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!zip Multi_512x512_SumLabels_4966.zip -r ./Multi_512x512_SumLabels_4966

In [None]:
!mv /content/Multi_512x512_SumLabels_4966.zip /content/drive/MyDrive/data/MMSeg_zip

In [None]:
!mkdir /content/Convert_512x512_Multi_351/background

In [None]:
src='/content/Convert_512x512_Multi_351/labels/'
des='/content/Convert_512x512_Multi_351/background/'
sum_label(src,des,0)

  0%|          | 0/2895 [00:00<?, ?it/s]

In [None]:
len(os.listdir(des))

1010

In [None]:
image_src='/content/Convert_512x512_Multi_351/images/'
masks_src='/content/Convert_512x512_Multi_351/background/'
image_des='/content/Multi_512x512_SumLabels_4966/images/'
moveimgfile(image_src,masks_src,image_des)

In [None]:
movefile(masks_src,'/content/Multi_512x512_SumLabels_4966/labels')

In [None]:
len(os.listdir(image_des)),len(os.listdir('/content/Multi_512x512_SumLabels_4966/labels'))

(5976, 5976)

In [None]:
!mv /content/Multi_512x512_SumLabels_4966 /content/Multi_512x512_SumLabels_containBG

In [None]:
!mv /content/Multi_512x512_SumLabels_containBG /content/Multi_512x512_SumLabels_containBG_5976

In [None]:
!zip Multi_512x512_SumLabels_containBG_5976.zip -r ./Multi_512x512_SumLabels_containBG_5976

In [None]:
!mv /content/Multi_512x512_SumLabels_containBG_5976.zip /content/drive/MyDrive/data/MMSeg_zip

## 바이너리 데이터셋 2048x2048 -> 256x256 crop

In [None]:
!gdown 
!unzip /content/Crop_2048x2048_Binary.zip

In [None]:
!mkdir Crop_256x256_Binary
!mkdir /content/Crop_256x256_Binary/images
!mkdir /content/Crop_256x256_Binary/labels

In [None]:
!mv /content/crop_dataset/images /content/crop_dataset/images_jpg
!mkdir /content/crop_dataset/images

In [None]:
jpg_src='/content/crop_dataset/images_jpg/'
png_des='/content/crop_dataset/images/'

In [None]:
!rm -rf /content/crop_dataset/images
!mkdir /content/crop_dataset/images

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
from PIL import Image
from itertools import product

In [None]:
for jpg in os.listdir(jpg_src):
    dir=os.path.join(jpg_src,jpg)
    jpg2png=Image.open(dir)
    jpg2png.save(png_des+jpg[:-4]+'.png')

In [None]:
bi_src='/content/crop_dataset/images/'
bi_des='/content/Crop_256x256_Binary/images/'

In [None]:
bi_mask_src='/content/crop_dataset/labels/'
bi_mask_des='/content/Crop_256x256_Binary/labels/'

In [None]:
def crop_img(src,des,d):  
    for img in os.listdir(src):
        name = img.split('.')[0]
        img_dir=os.path.join(src,img)
        img_crop=Image.open(img_dir)
        w,h = img_crop.size
        
        points = product(range(0, h-h%d, d), range(0, w-w%d, d))
        for x, y in points:
            box = (y, x, y+d, x+d)
            out = os.path.join(des, f'{name}_{x}_{y}.png')
            img_crop.crop(box).save(out)

In [None]:
crop_img(bi_src,bi_des,256)

In [None]:
crop_img(bi_mask_src,bi_mask_des,256)

In [None]:
len(os.listdir(bi_des)),len(os.listdir(bi_mask_des))

(22464, 22464)

In [None]:
!zip Crop_256x256_Binary0824.zip -r ./Crop_256x256_Binary

In [None]:
import shutil
filename='Crop_256x256_Binary0824.zip'
src='/content/'
des='/content/drive/MyDrive/Colab Notebooks/Team_1. CODEnter [ Save Form : FileName_Date ]/3. 공모전/참고/Dataset/Binary/'
shutil.move(src+filename,des+filename)

'/content/drive/MyDrive/Colab Notebooks/Team_1. CODEnter [ Save Form : FileName_Date ]/3. 공모전/참고/Dataset/Binary/Crop_256x256_Binary0824.zip'