In [7]:
import os, glob
import rasterio
import numpy as np
# from tqdm import tqdm
from rasterio.windows import Window
from tqdm.notebook import tqdm_notebook

In [8]:
def check_label_percent(mask, crop_size, percent):
    mask[mask!=0]==1
    label_number = np.sum(mask)
    label_per = label_number/(crop_size**2)
    if label_per < percent:
        return False
    else:
        return True

In [9]:
def crop_img_stride(image_path, outdir_crop, crop_size, stride_size):
    name_base = os.path.basename(image_path)
    i = 0
    with rasterio.open(image_path) as src:
        h,w = src.height,src.width
        meta = src.meta
        list_weight = list(range(0, w, stride_size))
        list_hight = list(range(0, h, stride_size))
        for start_h_org in list_hight:
            for start_w_org in list_weight:
                win = Window(start_w_org, start_h_org, crop_size, crop_size)
                img_window_crop  = src.read(window=win)
                win_transform = src.window_transform(win)
                meta.update({'height': crop_size, 'width': crop_size, 'transform':win_transform, 'nodata': 0})
                name_file = name_base.replace(".tif", f"_{i}.tif")
                fp_out = os.path.join(outdir_crop, name_file)
                with rasterio.open(fp_out, 'w',**meta) as dst:
                    dst.write(img_window_crop, window=Window(0, 0, img_window_crop.shape[2], img_window_crop.shape[1]))
                i+=1



In [10]:
def write_win(img_window_crop, src_img, win, crop_size, fp_out):
    win_transform = src_img.window_transform(win)
    meta = src_img.meta
    meta.update({'height': crop_size, 'width': crop_size, 'transform':win_transform, 'nodata': 0})
    with rasterio.open(fp_out, 'w',**meta) as dst:
        dst.write(img_window_crop, window=Window(0, 0, img_window_crop.shape[2], img_window_crop.shape[1]))

In [11]:
def crop_img_and_mask_stride(fp_img, fp_mask, dir_out_img, dir_out_mask, crop_size, stride_size, percent=None):
    name_base = os.path.basename(fp_mask)
    i = 0
    check = True
    with rasterio.open(fp_mask) as src_mask:
        h,w = src_mask.height,src_mask.width
        meta_mask = src_mask.meta
        if np.all(src_mask.read() == 0):
            check = False
            stride_size = crop_size
        with rasterio.open(fp_img) as src_img:
            meta_img = src_img.meta
            list_weight = list(range(0, w, stride_size))
            list_hight = list(range(0, h, stride_size))
            
            pbar = tqdm_notebook(total=len(list_hight)*len(list_weight), desc=f'{name_base}')
            for start_h_org in list_hight:
                for start_w_org in list_weight:
                    name_file = name_base.replace(".tif", f"_{i}.tif")
                    fp_img_crop = os.path.join(dir_out_img, name_file)
                    fp_mask_crop = os.path.join(dir_out_mask, name_file)

                    win = Window(start_w_org, start_h_org, crop_size, crop_size)
                    mask_window_crop  = src_mask.read(window=win)

                    if check:
                        if np.all(mask_window_crop == 0):
                            # print(1)
                            pbar.update(1)
                            continue
                        else:
                            if percent and check_label_percent(mask_window_crop, crop_size, percent):
                                write_win(mask_window_crop, src_mask, win, crop_size, fp_mask_crop)
                                img_window_crop = src_img.read(window=win)
                                write_win(img_window_crop, src_img, win, crop_size, fp_img_crop)
                                i+=1
                                # print(2)
                                pbar.update(1)
                            else:
                                pbar.update(1)
                                continue

                    else:
                        write_win(mask_window_crop, src_mask, win, crop_size, fp_mask_crop)
                        img_window_crop = src_img.read(window=win)
                        write_win(img_window_crop, src_img, win, crop_size, fp_img_crop)
                        i+=1
                        # print(3)
                    pbar.update(1)
            pbar.close()
                                                                   

In [12]:
def crop_img_and_mask(dir_img, dir_mask, dir_out, crop_size, stride_size, percent = None):
    if percent:
        name_img_folder = f"img_crop_{int(percent*100)}per"
        name_mask_folder = f"mask_crop_{int(percent*100)}per"
    else:
        name_img_folder = f"img_crop"
        name_mask_folder = f"mask_crop"
    dir_out_img = os.path.join(dir_out, name_img_folder)
    dir_out_mask = os.path.join(dir_out, name_mask_folder)
    if not os.path.exists(dir_out_img):
        os.makedirs(dir_out_img)
    if not os.path.exists(dir_out_mask):
        os.makedirs(dir_out_mask)

    list_img = glob.glob(os.path.join(dir_img, '*.tif'))
    for fp_img in tqdm_notebook(list_img, desc = 'Number image'):
        base_name = os.path.basename(fp_img)
        fp_mask = os.path.join(dir_mask, base_name)
        crop_img_and_mask_stride(fp_img, fp_mask, dir_out_img, dir_out_mask, crop_size, stride_size, percent)
    
    print('DONE ...')
    return dir_out_img, dir_out_mask


In [13]:
from datetime import datetime
now = datetime.today().strftime('%Y-%m-%d_%H-%M-%S')

# crop_size = 256
# stride_size = 64
# percent = 0.15
# dir_img = r"/home/skm/SKM16/Work/SonalPanel_ThaiLand/2Ver3_nghiemchinh/Data_Train_and_Model/images_per95_cut_img"
# dir_mask = r"/home/skm/SKM16/Work/SonalPanel_ThaiLand/2Ver3_nghiemchinh/Data_Train_and_Model/images_per95_cut_img_mask"
# dir_out = r"/home/skm/SKM16/Work/SonalPanel_ThaiLand/2Ver3_nghiemchinh/Data_Train_and_Model/crop256_stride64_giamanhden"
# dir_out_img, dir_out_mask = crop_img_and_mask(dir_img, dir_mask, dir_out, crop_size, stride_size, percent)


# crop_size = 512
# stride_size = 256
# percent = 0.15
# dir_img = r"/home/skm/SKM16/Work/OpenLand/1_Data_train/img_train__khoang_255/img"
# dir_mask = r"/home/skm/SKM16/Work/OpenLand/1_Data_train/img_train__khoang_255/mask"
# dir_out = f"/home/skm/SKM16/Work/OpenLand/1_Data_train/img_train__khoang_255/DS_Train/crop{crop_size}_stride{stride_size}_{now}_chia_histogram"
# dir_out_img, dir_out_mask = crop_img_and_mask(dir_img, dir_mask, dir_out, crop_size, stride_size, percent)

crop_size = 512
stride_size = 256
percent = 0.15
dir_img = r"/home/skm/SKM16/Work/OpenLand/2_Data_train_with_dich_histogram/Data_Train_and_Model/img_dich_cut_img"
dir_mask = r"/home/skm/SKM16/Work/OpenLand/2_Data_train_with_dich_histogram/Data_Train_and_Model/img_dich_cut_img_mask"
dir_out = f"/home/skm/SKM16/Work/OpenLand/2_Data_train_with_dich_histogram/Data_Train_and_Model/DS_Train/crop{crop_size}_stride{stride_size}_{now}_chia_histogram"
dir_out_img, dir_out_mask = crop_img_and_mask(dir_img, dir_mask, dir_out, crop_size, stride_size, percent)

Number image:   0%|          | 0/40 [00:00<?, ?it/s]

20220821_064400_ssc13_u0001_visual_0.tif:   0%|          | 0/143 [00:00<?, ?it/s]

20220404_132910_ssc17_u0001_visual_clip_0.tif:   0%|          | 0/1800 [00:00<?, ?it/s]

20220404_132910_ssc17_u0001_visual_clip_1.tif:   0%|          | 0/88 [00:00<?, ?it/s]

20220404_132910_ssc17_u0001_visual_clip_2.tif:   0%|          | 0/88 [00:00<?, ?it/s]

20220813_070232_ssc12_u0001_visual_0.tif:   0%|          | 0/15 [00:00<?, ?it/s]

20220813_070232_ssc12_u0001_visual_1.tif:   0%|          | 0/15 [00:00<?, ?it/s]

20220813_070232_ssc12_u0001_visual_2.tif:   0%|          | 0/4 [00:00<?, ?it/s]

20220813_070232_ssc12_u0001_visual_3.tif:   0%|          | 0/6 [00:00<?, ?it/s]

20220813_070232_ssc12_u0001_visual_4.tif:   0%|          | 0/12 [00:00<?, ?it/s]

20220813_070232_ssc12_u0001_visual_5.tif:   0%|          | 0/56 [00:00<?, ?it/s]

20220813_070232_ssc12_u0001_visual_6.tif:   0%|          | 0/30 [00:00<?, ?it/s]

20220813_070232_ssc12_u0001_visual_7.tif:   0%|          | 0/80 [00:00<?, ?it/s]

20220813_070232_ssc12_u0001_visual_8.tif:   0%|          | 0/24 [00:00<?, ?it/s]

20220814_065546_ssc2_u0001_visual_0.tif:   0%|          | 0/204 [00:00<?, ?it/s]

20220814_065546_ssc2_u0001_visual_1.tif:   0%|          | 0/88 [00:00<?, ?it/s]

20220814_065546_ssc2_u0001_visual_2.tif:   0%|          | 0/54 [00:00<?, ?it/s]

20220814_065546_ssc2_u0001_visual_3.tif:   0%|          | 0/77 [00:00<?, ?it/s]

20220814_065546_ssc2_u0001_visual_4.tif:   0%|          | 0/25 [00:00<?, ?it/s]

20220815_103905_ssc7_u0001_visual_0.tif:   0%|          | 0/255 [00:00<?, ?it/s]

20220815_103905_ssc7_u0001_visual_1.tif:   0%|          | 0/120 [00:00<?, ?it/s]

20220815_103905_ssc7_u0001_visual_2.tif:   0%|          | 0/420 [00:00<?, ?it/s]

20220815_103905_ssc7_u0001_visual_3.tif:   0%|          | 0/143 [00:00<?, ?it/s]

20220815_103905_ssc7_u0001_visual_4.tif:   0%|          | 0/210 [00:00<?, ?it/s]

20220821_064400_ssc13_u0001_visual_1.tif:   0%|          | 0/108 [00:00<?, ?it/s]

20220821_064400_ssc13_u0001_visual_2.tif:   0%|          | 0/24 [00:00<?, ?it/s]

20220821_064400_ssc13_u0001_visual_3.tif:   0%|          | 0/120 [00:00<?, ?it/s]

20220821_064400_ssc13_u0001_visual_4.tif:   0%|          | 0/10 [00:00<?, ?it/s]

20220821_064400_ssc13_u0001_visual_5.tif:   0%|          | 0/20 [00:00<?, ?it/s]

20220821_064400_ssc13_u0001_visual_6.tif:   0%|          | 0/20 [00:00<?, ?it/s]

20220821_064400_ssc13_u0001_visual_7.tif:   0%|          | 0/132 [00:00<?, ?it/s]

20220821_064432_ssc13_u0002_visual_0.tif:   0%|          | 0/80 [00:00<?, ?it/s]

20220821_064432_ssc13_u0002_visual_1.tif:   0%|          | 0/24 [00:00<?, ?it/s]

20220821_064432_ssc13_u0002_visual_2.tif:   0%|          | 0/80 [00:00<?, ?it/s]

20220821_064432_ssc13_u0002_visual_3.tif:   0%|          | 0/30 [00:00<?, ?it/s]

20220821_064432_ssc13_u0002_visual_4.tif:   0%|          | 0/72 [00:00<?, ?it/s]

20220821_064432_ssc13_u0002_visual_5.tif:   0%|          | 0/56 [00:00<?, ?it/s]

20220821_103012_ssc11_u0001_visual_0.tif:   0%|          | 0/88 [00:00<?, ?it/s]

20220821_103012_ssc11_u0001_visual_1.tif:   0%|          | 0/120 [00:00<?, ?it/s]

20220821_103012_ssc11_u0001_visual_2.tif:   0%|          | 0/24 [00:00<?, ?it/s]

20220821_103012_ssc11_u0001_visual_3.tif:   0%|          | 0/9 [00:00<?, ?it/s]

DONE ...


In [14]:
import glob
print(len(glob.glob(os.path.join(dir_out_img,"*.tif"))))
print(len(glob.glob(os.path.join(dir_out_mask,"*.tif"))))

4330
4330


## Crop only image

In [11]:
import os, glob
import rasterio
import numpy as np
from rasterio.windows import Window
from tqdm.notebook import tqdm_notebook

def crop_img_stride(fp_img, dir_out_img, crop_size, stride_size):
    name_base = os.path.basename(fp_img)
    with rasterio.open(fp_img) as src_img:
        meta_img = src_img.meta
        w = src_img.width
        h = src_img.height

        list_weight = list(range(0, w, stride_size))
        list_hight = list(range(0, h, stride_size))
        pbar = tqdm_notebook(total=len(list_hight)*len(list_weight), desc=f'{name_base}')
        for start_h_org in list_hight:
            for start_w_org in list_weight:
                name_file = name_base.replace(".tif", f"_w{start_h_org}_h{start_w_org}.tif")
                fp_img_crop = os.path.join(dir_out_img, name_file)
                win = Window(start_w_org, start_h_org, crop_size, crop_size)

                img_window_crop  = src_img.read(window=win)
                write_win(img_window_crop, src_img, win, crop_size, fp_img_crop)
                pbar.update(1)
        pbar.close()
    return name_base.replace(".tif","")


def crop_dir_img(dir_img, dir_out, crop_size, stride_size):
    name_img_folder = f"img_crop{crop_size}_str{stride_size}"
    dir_out_img = os.path.join(dir_out, name_img_folder)
    if not os.path.exists(dir_out_img):
        os.makedirs(dir_out_img)

    list_img = glob.glob(os.path.join(dir_img, '*.tif'))
    for fp_img in tqdm_notebook(list_img, desc = 'Number image'):
        base_name = os.path.basename(fp_img)
        crop_img_stride(fp_img, dir_out_img, crop_size, stride_size)
    print('DONE ...')
                        

In [12]:
crop_size = 512
stride_size = 256
dir_img = r"/home/skm/SKM16/Work/SonalPanel_ThaiLand/1Ver2_lable2/V2/image_8bit_perimage_p98"
dir_out = r"/home/skm/SKM16/Work/SonalPanel_ThaiLand/1Ver2_lable2/V2/image_8bit_perimage_p98/tmp_forpredict_big"
# crop_img_and_mask(dir_img, dir_mask, dir_out, crop_size, stride_size, percent)
crop_dir_img(dir_img, dir_out, crop_size, stride_size)

Number image:   0%|          | 0/6 [00:00<?, ?it/s]

01_July_Mosaic_P_2.tif:   0%|          | 0/3224 [00:00<?, ?it/s]

01_July_Mosaic_P_3.tif:   0%|          | 0/2726 [00:00<?, ?it/s]

01_July_Mosaic_P_4.tif:   0%|          | 0/2773 [00:00<?, ?it/s]

01_July_Mosaic_P_5.tif:   0%|          | 0/2940 [00:00<?, ?it/s]

01_July_Mosaic_P_6.tif:   0%|          | 0/3192 [00:00<?, ?it/s]

02_May_Mosaic_P_2.tif:   0%|          | 0/5103 [00:00<?, ?it/s]

DONE ...
