# LOAD DATA

In [1]:
import os

import torchmetrics.functional

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import numpy as np
import cv2
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm
  warn(f"Failed to load image Python extension: {e}")


In [34]:
DATA_DIR = r"E:\DATN_local\1_IN_USED_DATASET"

x_train_dir = os.path.join(DATA_DIR, 'TRAIN_DEV')
y_train_dir = os.path.join(DATA_DIR, 'TRAIN_DEV_MASK')

x_valid_dir = os.path.join(DATA_DIR, 'VAL')
y_valid_dir = os.path.join(DATA_DIR, 'VAL_MASK')

x_test_dir = os.path.join(DATA_DIR, 'TEST')
y_test_dir = os.path.join(DATA_DIR, 'TEST_MASK')

file_label = r"E:\DATN_local\1_IN_USED_DATASET\level_label.json"

In [35]:
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()

# DATA LOADER

In [36]:
from torch.utils.data import DataLoader
from torch.utils.data import Dataset as BaseDataset

In [37]:
import json


class Dataset(BaseDataset):
    """CamVid Dataset. Read images, apply augmentation and preprocessing transformations.

    Args:
        images_dir (str): path to images folder
        masks_dir (str): path to segmentation masks folder
        class_values (list): values of classes to extract from segmentation mask
        augmentation (albumentations.Compose): data transfromation pipeline
            (e.g. flip, scale, etc.)
        preprocessing (albumentations.Compose): data preprocessing
            (e.g. noralization, shape manipulation, etc.)

    """


    def __init__(
            self,
            images_dir,
            masks_dir,
            file_label,
            classes=None,
            augmentation=None,
            preprocessing=None,
    ):
        with open(file_label) as json_file:
            self.label_dict = json.load(json_file)
        self.ids = os.listdir(images_dir)
        self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]
        self.masks_fps = [os.path.join(masks_dir, image_id)[:-4]+"_mask.jpg" for image_id in self.ids]

        # convert str names to class values on masks
        # self.class_values = [self.CLASSES.index(cls.lower()) for cls in classes]


        self.augmentation = augmentation
        self.preprocessing = preprocessing

    def __getitem__(self, i):

        # read data
        image = cv2.imread(self.images_fps[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.masks_fps[i], 0)

        # extract certain classes from mask (e.g. cars)

        # masks = [(mask == v) for v in self.class_values]
        # mask = np.stack(masks, axis=-1).astype('float')
        mask[mask != 0] = 1
        mask = np.array(mask==1).astype('float')

        classification = self.label_dict[self.ids[i][:-4]] - 1

        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']

        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']



        return image, mask, classification

    def __len__(self):
        return len(self.ids)

In [38]:
dataset = Dataset(x_train_dir, y_train_dir, file_label, classes=['flood'])

sample = dataset[150] # get some sample
print(sample)
# visualize(
#     image=sample['image'].transpose(1,2,0),
#     flood_mask=sample['mask'].squeeze(),
# )

(array([[[ 53,  63,  39],
        [ 51,  61,  37],
        [ 49,  58,  37],
        ...,
        [241, 242, 246],
        [241, 242, 244],
        [241, 242, 244]],

       [[ 56,  65,  44],
        [ 52,  61,  40],
        [ 49,  58,  39],
        ...,
        [241, 242, 246],
        [241, 242, 244],
        [241, 242, 244]],

       [[ 60,  69,  50],
        [ 55,  64,  47],
        [ 51,  59,  44],
        ...,
        [241, 242, 246],
        [242, 242, 244],
        [242, 242, 244]],

       ...,

       [[ 41,  44,  37],
        [ 41,  44,  37],
        [ 41,  44,  37],
        ...,
        [ 78,  79,  81],
        [ 81,  82,  84],
        [ 83,  85,  84]],

       [[ 44,  47,  38],
        [ 45,  48,  39],
        [ 47,  50,  41],
        ...,
        [ 78,  80,  79],
        [ 81,  81,  81],
        [ 82,  82,  80]],

       [[ 42,  45,  36],
        [ 45,  48,  39],
        [ 47,  50,  39],
        ...,
        [ 74,  76,  75],
        [ 76,  76,  74],
        [ 76,  76,  74]

# Augmentation

In [39]:
import albumentations as albu
from albumentations.pytorch import ToTensorV2


def get_training_augmentation():
    train_transform = [

        # albu.HorizontalFlip(p=0.5),
        #
        # albu.ShiftScaleRotate(scale_limit=0.5, rotate_limit=5, shift_limit=0.1, p=1, border_mode=0),
        #
        albu.PadIfNeeded(min_height=512, min_width=512, always_apply=True, border_mode=0),
        albu.RandomCrop(height=512, width=512, always_apply=True),
        #
        # albu.GaussNoise(p=0.2),
        # albu.Perspective(p=0.5),
        #
        # albu.OneOf(
        #     [
        #         albu.CLAHE(p=1),
        #         albu.RandomBrightness(p=1),
        #         albu.RandomGamma(p=1),
        #     ],
        #     p=0.9,
        # ),
        #
        # albu.OneOf(
        #     [
        #         albu.Sharpen(p=1),
        #         albu.Blur(blur_limit=3, p=1),
        #         albu.MotionBlur(blur_limit=3, p=1),
        #     ],
        #     p=0.9,
        # ),
        #
        # albu.OneOf(
        #     [
        #         albu.RandomContrast(p=1),
        #         albu.HueSaturationValue(p=1),
        #     ],
        #     p=0.9,
        # ),
        albu.Normalize(),
        ToTensorV2(),
    ]
    return albu.Compose(train_transform)

def get_validation_augmentation():
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
        albu.PadIfNeeded(512, 512),
        albu.RandomCrop(height=512, width=512, always_apply=True),
        albu.Normalize(),
        ToTensorV2(),

    ]
    return albu.Compose(test_transform)

# def to_tensor(x, **kwargs):
#     return x.transpose(2, 0, 1).astype('float32')


def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform

    Args:
        preprocessing_fn (callbale): data normalization function
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose

    """

    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=ToTensorV2, mask=ToTensorV2),
    ]
    return albu.Compose(_transform)

# Creat model and train

In [40]:
import torch
import numpy as np
import segmentation_models_pytorch as smp

In [41]:
ENCODER = 'mobilenet_v2'
ENCODER_WEIGHTS = 'imagenet'
CLASSES = ['flood']
ACTIVATION = 'sigmoid' # could be None for logits or 'softmax2d' for multiclass segmentation
DEVICE = 'cuda'


preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

train_dataset = Dataset(
    x_train_dir,
    y_train_dir,
    file_label=file_label,
    augmentation=get_training_augmentation(),
    # preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

valid_dataset = Dataset(
    x_valid_dir,
    y_valid_dir,
    file_label=file_label,
    augmentation=get_validation_augmentation(),
    # preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

test_dataset = Dataset(
    x_test_dir,
    y_test_dir,
    file_label=file_label,
    augmentation=get_validation_augmentation(),
    # preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=2, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False, num_workers=0)



In [10]:
from segmentation_models_pytorch.encoders import get_preprocessing_fn

aux_params=dict(
    pooling='avg',             # one of 'avg', 'max'
    activation='sigmoid',      # activation function, default is None
    classes=4,                 # define number of output labels
)

model = smp.Unet(encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=1, aux_params=aux_params)
model.to(DEVICE)


functools.partial(<function preprocess_input at 0x000002785BB8C438>, input_space='RGB', input_range=[0, 1], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])


In [11]:
from torch import optim
from torch import nn

mask_loss_fn = smp.losses.DiceLoss(mode="binary")
cls_loss_fn = nn.CrossEntropyLoss()

LEARNING_RATE=0.0001
optimizer = optim.Adam(
    [
        {"params": model.encoder.parameters(), "lr": 1e-5},
        {"params": model.decoder.parameters(), "lr": 1e-4},
        {"params": model.classification_head.parameters()}
    ],
    lr=LEARNING_RATE)

In [12]:
from torchmetrics.functional import stat_scores, accuracy, precision, recall, precision_recall_curve

def share_step(image, targets_m, targets_c, mask_loss_fn, cls_loss_fn, model, device='cuda'):
    preds_m, pred_c = model(image.float().to(device))
    mask_loss = mask_loss_fn(preds_m, targets_m)
    cls_loss = cls_loss_fn(pred_c, targets_c)

    # class_stat_score: [tp, fp, tn, fn, sup]`` (``sup`` stands for support and equals ``tp + fn``)
    # shape: [batch_size, 5]
    cls_stat_score = stat_scores(pred_c, targets_c, reduce='macro', num_classes=4)

    # mask_stat_score: true_positive, false_positive, false_negative, true_negative tensors (N, C) shape each.
    # shape: list of [[batch_size, 1], [batch_size, 1], [batch_size, 1], [batch_size, 1]]
    mask_stat_score = smp.metrics.get_stats(preds_m.long(), targets_m.long(), mode='binary')

    # cls_metrics -> accuracy, confusion matrix -> stat, precision, recall
    output = torch.argmax(pred_c, dim=1)

    # float
    cls_acc = torch.true_divide((targets_c == output).sum(), output.size(0)).item()
    return mask_loss, cls_loss, mask_stat_score, cls_stat_score, cls_acc

def epoch_end(dataset_mask_stat_score, dataset_cls_stat_score, dataset_cls_acc):
    mask_tp = dataset_mask_stat_score[0]
    mask_fp = dataset_mask_stat_score[1]
    mask_fn = dataset_mask_stat_score[2]
    mask_tn = dataset_mask_stat_score[3]
    dataset_mask_iou  = smp.metrics.iou_score(mask_tp, mask_fp, mask_fn, mask_tn, reduction="micro")
    dataset_mask_f1 = smp.metrics.f1_score(mask_tp, mask_fp, mask_fn, mask_tn, reduction="micro")

    dataset_cls_acc = torch.mean(torch.FloatTensor(dataset_cls_acc))

    cls_tp = dataset_cls_stat_score[0]
    cls_fp = dataset_cls_stat_score[1]
    cls_tn = dataset_cls_stat_score[2]
    cls_fn = dataset_cls_stat_score[3]
    dataset_cls_precision = cls_tp.sum() / (cls_fp.sum() + cls_tp.sum())
    dataset_cls_recall = cls_tp.sum() / (cls_tp.sum() + cls_fn.sum())

    # [tp, fp, tn, fn]
    dataset_confusion_matrix = [cls_tp.sum().item(), cls_fp.sum().item(), cls_tn.sum().item(), cls_fn.sum().item()]

    return dataset_mask_iou.item(), dataset_mask_f1.item(), dataset_cls_acc.item(), dataset_cls_precision.item(), dataset_cls_recall.item(), dataset_confusion_matrix


dataset_mask_stat_score, dataset_cls_stat_score, dataset_cls_acc = [], [], []
alpha = 0.7
j=0
for image, targets_m, targets_c in train_loader:
    targets_m = targets_m.float().unsqueeze(1).to(device=DEVICE, non_blocking=True)
    targets_c = targets_c.type(torch.LongTensor).to(device=DEVICE, non_blocking=True)

    with torch.cuda.amp.autocast():

        mask_loss, cls_loss, mask_stat_score, cls_stat_score, cls_acc = share_step(image, targets_m, targets_c, mask_loss_fn, cls_loss_fn, model)
        loss = (mask_loss*alpha + (1-alpha)*cls_loss)

        dataset_cls_acc.append(cls_acc)
        if len(dataset_mask_stat_score) <= 0:
            dataset_mask_stat_score = list(mask_stat_score)
            for i in range(5):
                dataset_cls_stat_score.append(cls_stat_score[:,i])
        else:
            for i in range(len(mask_stat_score)):
                dataset_mask_stat_score[i] = torch.cat((dataset_mask_stat_score[i], mask_stat_score[i]), dim=0)

            for i in range(5):
                dataset_cls_stat_score[i] = torch.cat([dataset_cls_stat_score[i], cls_stat_score[:,0]], dim=0)
    if j == 10:
        break
    j+=1
for i in range(len(dataset_mask_stat_score)):
        dataset_mask_stat_score[i] = dataset_mask_stat_score[i].squeeze()

for i in range(len(dataset_cls_stat_score)):
    dataset_cls_stat_score[i] = dataset_cls_stat_score[i].squeeze()

dataset_mask_iou, dataset_mask_f1, dataset_cls_acc, dataset_cls_precision, dataset_cls_recall, dataset_confusion_matrix = epoch_end(dataset_mask_stat_score, dataset_cls_stat_score, dataset_cls_acc)

print("dataset_mask_iou:", dataset_mask_iou)
print("dataset_mask_f1:", dataset_mask_f1)
print("dataset_cls_acc:", dataset_cls_acc)
print("dataset_cls_precision:", dataset_cls_precision)
print("dataset_cls_recall:", dataset_cls_recall)
print("dataset_confusion_matrix:", dataset_confusion_matrix)

In [24]:
def check_performance(loader, model,type, mask_loss_fn, cls_loss_fn, device=DEVICE, alpha=0.7):
    print("EVAL:", type)
    dataset_mask_stat_score, dataset_cls_stat_score, dataset_cls_acc, dataset_mutual_losses, mask_losses, cls_losses = [], [], [], [], [], []
    model.eval()
    with torch.no_grad():
        for batch_idx, (image, targets_m, targets_c) in enumerate(loader):
            targets_m = targets_m.float().unsqueeze(1).to(device=device, non_blocking=True)
            targets_c = targets_c.type(torch.LongTensor).to(device=device, non_blocking=True)
            mask_loss, cls_loss, mask_stat_score, cls_stat_score, cls_acc = share_step(image, targets_m, targets_c, mask_loss_fn, cls_loss_fn, model)
            loss = (mask_loss*alpha + (1-alpha)*cls_loss)

            dataset_mutual_losses.append(loss)
            mask_losses.append(mask_loss)
            cls_losses.append(cls_loss)

            dataset_cls_acc.append(cls_acc)
            if len(dataset_mask_stat_score) <= 0:
                dataset_mask_stat_score = list(mask_stat_score)
                for i in range(5):
                    dataset_cls_stat_score.append(cls_stat_score[:,i])
            else:
                for i in range(len(mask_stat_score)):
                    dataset_mask_stat_score[i] = torch.cat((dataset_mask_stat_score[i], mask_stat_score[i]), dim=0)

                for i in range(5):
                    dataset_cls_stat_score[i] = torch.cat([dataset_cls_stat_score[i], cls_stat_score[:,0]], dim=0)
        for i in range(len(dataset_mask_stat_score)):
            dataset_mask_stat_score[i] = dataset_mask_stat_score[i].squeeze()

        for i in range(len(dataset_cls_stat_score)):
            dataset_cls_stat_score[i] = dataset_cls_stat_score[i].squeeze()

        dataset_mask_iou, dataset_mask_f1, dataset_cls_acc, dataset_cls_precision, dataset_cls_recall, dataset_confusion_matrix = epoch_end(dataset_mask_stat_score, dataset_cls_stat_score, dataset_cls_acc)

        dataset_mutual_losses = torch.mean(torch.FloatTensor(dataset_mutual_losses)).item()
        mask_losses           = torch.mean(torch.FloatTensor(mask_losses)).item()
        cls_losses            = torch.mean(torch.FloatTensor(cls_losses)).item()

        print(type+'_dataset_mutual_losses:', dataset_mutual_losses)
        print(type+'_mask_losses:', mask_losses)
        print(type+f"_cls_losses:", cls_losses)
        print(type+f"_dataset_mask_iou:", dataset_mask_iou)
        print(type+f"_dataset_mask_f1:", dataset_mask_f1)
        print(type+f"_dataset_cls_acc:", dataset_cls_acc)
        print(type+f"_dataset_cls_precision:", dataset_cls_precision)
        print(type+f"_dataset_cls_recall:", dataset_cls_recall)
        print(type+f"_dataset_confusion_matrix:", dataset_confusion_matrix)
    model.train()
    return dataset_mutual_losses


In [25]:
from tqdm import tqdm


def train_fn(loader, model, optimizer, mask_loss_fn, cls_loss_fn, scaler, alpha=0.7, device=DEVICE):
    loop = tqdm(loader)
    model.train()

    dataset_mask_stat_score, dataset_cls_stat_score, dataset_cls_acc, dataset_mutual_losses, mask_losses, cls_losses = [], [], [], [], [], []

    for batch_idx, (image, targets_m, targets_c) in enumerate(loop):

        targets_m = targets_m.float().unsqueeze(1).to(device=device, non_blocking=True)
        targets_c = targets_c.type(torch.LongTensor).to(device=device, non_blocking=True)

        with torch.cuda.amp.autocast():

            mask_loss, cls_loss, mask_stat_score, cls_stat_score, cls_acc = share_step(image, targets_m, targets_c, mask_loss_fn, cls_loss_fn, model)
            loss = (mask_loss*alpha + (1-alpha)*cls_loss)

            dataset_mutual_losses.append(loss)
            mask_losses.append(mask_loss)
            cls_losses.append(cls_loss)

            dataset_cls_acc.append(cls_acc)
            if len(dataset_mask_stat_score) <= 0:
                dataset_mask_stat_score = list(mask_stat_score)
                for i in range(5):
                    dataset_cls_stat_score.append(cls_stat_score[:,i])
            else:
                for i in range(len(mask_stat_score)):
                    dataset_mask_stat_score[i] = torch.cat((dataset_mask_stat_score[i], mask_stat_score[i]), dim=0)

                for i in range(5):
                    dataset_cls_stat_score[i] = torch.cat([dataset_cls_stat_score[i], cls_stat_score[:,0]], dim=0)

        # squeeze  dataset_mask_stat_score[i]

        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        loop.set_postfix(loss=loss.item(), loss_mask=mask_loss.item(), loss_cls=cls_loss.item())

    for i in range(len(dataset_mask_stat_score)):
        dataset_mask_stat_score[i] = dataset_mask_stat_score[i].squeeze()

    for i in range(len(dataset_cls_stat_score)):
        dataset_cls_stat_score[i] = dataset_cls_stat_score[i].squeeze()

    dataset_mask_iou, dataset_mask_f1, dataset_cls_acc, dataset_cls_precision, dataset_cls_recall, dataset_confusion_matrix = epoch_end(dataset_mask_stat_score, dataset_cls_stat_score, dataset_cls_acc)

    dataset_mutual_losses = torch.mean(torch.FloatTensor(dataset_mutual_losses)).item()
    mask_losses           = torch.mean(torch.FloatTensor(mask_losses)).item()
    cls_losses            = torch.mean(torch.FloatTensor(cls_losses)).item()

    print('dataset_mutual_losses:', dataset_mutual_losses)
    print('mask_losses:', mask_losses)
    print("cls_losses:", cls_losses)
    print("dataset_mask_iou:", dataset_mask_iou)
    print("dataset_mask_f1:", dataset_mask_f1)
    print("dataset_cls_acc:", dataset_cls_acc)
    print("dataset_cls_precision:", dataset_cls_precision)
    print("dataset_cls_recall:", dataset_cls_recall)
    print("dataset_confusion_matrix:", dataset_confusion_matrix)

scaler = torch.cuda.amp.GradScaler()
alpha = 0.7
NUM_EPOCHS = 3
best_perform = 1000
for epoch in range(NUM_EPOCHS):
    train_fn(train_loader, model, optimizer,mask_loss_fn, cls_loss_fn, scaler, alpha=alpha)
    val_mutual_loss = check_performance(valid_loader, model, "val", mask_loss_fn, cls_loss_fn, device=DEVICE, alpha=alpha)
    test_mutual_loss = check_performance(test_loader, model, "test", mask_loss_fn, cls_loss_fn, device=DEVICE, alpha=alpha)

    if best_perform > test_mutual_loss:
        best_perform = test_mutual_loss
        checkpoint = {
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
            }


100%|██████████| 450/450 [04:38<00:00,  1.62it/s, loss=0.452, loss_cls=1.51, loss_mask=0]     


dataset_mutual_losses: 0.6483268141746521
mask_losses: 0.4238913953304291
cls_losses: 1.1720095872879028
dataset_mask_iou: -0.09994778782129288
dataset_mask_f1: -0.22209329903125763
dataset_cls_acc: 0.5655555725097656
dataset_cls_precision: 0.499019593000412
dataset_cls_recall: 0.499019593000412
dataset_confusion_matrix: [509, 511, 513, 511]
EVAL: val
val_dataset_mutual_losses: 0.57001793384552
val_mask_losses: 0.25864744186401367
val_cls_losses: 1.2965490818023682
val_dataset_mask_iou: -0.10853599011898041
val_dataset_mask_f1: -0.24350053071975708
val_dataset_cls_acc: 0.25555557012557983
val_dataset_cls_precision: 0.5
val_dataset_cls_recall: 0.5
val_dataset_confusion_matrix: [23, 23, 27, 23]
EVAL: test


TypeError: 'NoneType' object does not support item assignment

In [42]:
test_mutual_loss = check_performance(test_loader, model, "test", mask_loss_fn, cls_loss_fn, device=DEVICE, alpha=alpha)

EVAL: test
test_dataset_mutual_losses: 0.5274957418441772
test_mask_losses: 0.25589948892593384
test_cls_losses: 1.1612204313278198
test_dataset_mask_iou: 0.02242392487823963
test_dataset_mask_f1: 0.04386424273252487
test_dataset_cls_acc: 0.39772728085517883
test_dataset_cls_precision: 0.504807710647583
test_dataset_cls_recall: 0.504807710647583
test_dataset_confusion_matrix: [105, 103, 109, 103]


None


'E:\\DATN_local\\1_IN_USED_DATASET\\TEST_MAKS\\0_DANANG_STREET_FLOOD_Media1.mp4_102_mask.jpg'

tensorA = [1,2,3,4,5]
tensorA = torch.tensor([tensorA,tensorA,tensorA,tensorA])
tensorB = [1,2,3,4,5]
tensorB = torch.tensor([tensorB,tensorB,tensorB,tensorB])

# tensorA
tensor = torch.cat([tensorA[:,0], tensorB[:,0]], dim=0)
tensor.shape

tensorA = torch.ones([4,1])
tensorB = torch.ones([4,1])

tensor = torch.cat((tensorA, tensorB), dim=0).squeeze()
tensor.shape

In [44]:
x_train_dir_files = os.listdir(x_train_dir)
cnt=0
for file in os.listdir(x_valid_dir):
    if file in x_train_dir_files:
        print(file)
        cnt+=1
print(cnt)

BENHVIEN_C_CAM_0.jpg
BENHVIEN_C_CAM_101.jpg
BENHVIEN_C_CAM_108.jpg
BENHVIEN_C_CAM_14.jpg
BENHVIEN_C_CAM_165.jpg
BENHVIEN_C_CAM_168.jpg
BENHVIEN_C_CAM_17.jpg
BENHVIEN_C_CAM_170.jpg
BENHVIEN_C_CAM_19.jpg
BENHVIEN_C_CAM_2.jpg
BENHVIEN_C_CAM_29.jpg
BENHVIEN_C_CAM_34.jpg
BENHVIEN_C_CAM_37.jpg
BENHVIEN_C_CAM_39.jpg
BENHVIEN_C_CAM_4.jpg
BENHVIEN_C_CAM_41.jpg
BENHVIEN_C_CAM_55.jpg
BENHVIEN_C_CAM_66.jpg
BENHVIEN_C_CAM_70.jpg
BENHVIEN_C_CAM_73.jpg
BENHVIEN_C_CAM_75.jpg
BENHVIEN_C_CAM_77.jpg
BENHVIEN_C_CAM_79.jpg
BENHVIEN_C_CAM_81.jpg
BENHVIEN_C_CAM_83.jpg
BENHVIEN_C_CAM_88.jpg
BENHVIEN_C_CAM_92.jpg
BENHVIEN_C_CAM_95.jpg
BENHVIEN_C_CAM_97.jpg
EUROPEANFLOOD2013_25441112.jpg
EUROPEANFLOOD2013_25441113.jpg
EUROPEANFLOOD2013_25441114.jpg
EUROPEANFLOOD2013_25441115.jpg
EUROPEANFLOOD2013_25441162.jpg
EUROPEANFLOOD2013_25505067.jpg
EUROPEANFLOOD2013_26438015.jpg
EUROPEANFLOOD2013_26438108.jpg
EUROPEANFLOOD2013_26451508.jpg
EUROPEANFLOOD2013_26451509.jpg
EUROPEANFLOOD2013_26451513.jpg
EUROPEANFLOOD2013_2

In [46]:
len(x_train_dir_files)

900

In [47]:
random_choices = np.random.choice(x_train_dir_files, size=int(len(x_train_dir_files)*0.2))

In [53]:
for file in random_choices:
    print(f'move "{x_train_dir}/{file}" "{x_valid_dir}"')
    print(f'move "{y_train_dir}/{file[:-4]}_mask.jpg" {y_valid_dir}')

    os.system(f'move "{x_train_dir}\\{file}" "{x_valid_dir}"')
    os.system(f'move "{y_train_dir}\\{file[:-4]}_mask.jpg" "{y_valid_dir}"')

move "E:\DATN_local\1_IN_USED_DATASET\TRAIN_DEV/image_316.jpg" "E:\DATN_local\1_IN_USED_DATASET\VAL"
move "E:\DATN_local\1_IN_USED_DATASET\TRAIN_DEV_MASK/image_316_mask.jpg" E:\DATN_local\1_IN_USED_DATASET\VAL_MASK
move "E:\DATN_local\1_IN_USED_DATASET\TRAIN_DEV/image_326.jpg" "E:\DATN_local\1_IN_USED_DATASET\VAL"
move "E:\DATN_local\1_IN_USED_DATASET\TRAIN_DEV_MASK/image_326_mask.jpg" E:\DATN_local\1_IN_USED_DATASET\VAL_MASK
move "E:\DATN_local\1_IN_USED_DATASET\TRAIN_DEV/NORAIN_NGUYENHUE_SCHOOLGATE_CAM_295.jpg" "E:\DATN_local\1_IN_USED_DATASET\VAL"
move "E:\DATN_local\1_IN_USED_DATASET\TRAIN_DEV_MASK/NORAIN_NGUYENHUE_SCHOOLGATE_CAM_295_mask.jpg" E:\DATN_local\1_IN_USED_DATASET\VAL_MASK
move "E:\DATN_local\1_IN_USED_DATASET\TRAIN_DEV/NORAIN_PHUONGTRAN_CAM_372.jpg" "E:\DATN_local\1_IN_USED_DATASET\VAL"
move "E:\DATN_local\1_IN_USED_DATASET\TRAIN_DEV_MASK/NORAIN_PHUONGTRAN_CAM_372_mask.jpg" E:\DATN_local\1_IN_USED_DATASET\VAL_MASK
move "E:\DATN_local\1_IN_USED_DATASET\TRAIN_DEV/image_32