In [1]:
# !pip install gdown
# import gdown

# !gdown --id 1v4n1sLlXPVKKReY9OmjQ9l8ijNshJn18
# !unzip -q CVC_clinicDB.zip -d data/
# !rm CVC_clinicDB.zip

In [2]:
import os
import random
import numpy as np
import cv2

In [11]:
def create_folds(proceeded_folder='./proceeded_data', num_folds=5):
    folds_dir = proceeded_folder
    os.makedirs(folds_dir, exist_ok=True)

    image_files = os.listdir(os.path.join(proceeded_folder, 'images'))
    label_files = os.listdir(os.path.join(proceeded_folder, 'labels'))

    assert all(img[:-4] == lbl[:-4] for img, lbl in zip(sorted(image_files), sorted(label_files))), "Image and label names do not match."

    file_names = [img[:-4] for img in image_files]

    random.shuffle(file_names)

    folds = np.array_split(file_names, num_folds)

    for i in range(num_folds):
        with open(os.path.join(folds_dir, f'fold{i + 1}.txt'), 'w') as f:
            for name in folds[i]:
                f.write(f"{name}.npy\n")
                
def save_images_and_labels_as_npy(images_dir, labels_dir, output_npy_dir):
    os.makedirs(output_npy_dir, exist_ok=True)

    # Create dataset directories
    dataset_image_dir = os.path.join(output_npy_dir, 'CVC_clinicDB', 'images')
    dataset_label_dir = os.path.join(output_npy_dir, 'CVC_clinicDB', 'labels')
    
    os.makedirs(dataset_image_dir, exist_ok=True)
    os.makedirs(dataset_label_dir, exist_ok=True)

    # Get list of image files
    image_files = [f for f in os.listdir(images_dir) if f.endswith('.png')]
    print(image_files)
    for file in image_files:
        try:
            # Process image
            image_path = os.path.join(images_dir, file)
            image = cv2.imread(image_path, cv2.IMREAD_COLOR)
            if image is not None:
                image_npy_path = os.path.join(dataset_image_dir, f"{os.path.splitext(file)[0]}.npy")
                np.save(image_npy_path, image)
                print(f"Saved image to {image_npy_path}")
            else:
                print(f"Warning: Could not read image at {image_path}")
                continue

            # Process label
            label_path = os.path.join(labels_dir, file)
            if os.path.exists(label_path):
                label = cv2.imread(label_path, cv2.IMREAD_GRAYSCALE)
                if label is not None:
                    label_npy_path = os.path.join(dataset_label_dir, f"{os.path.splitext(file)[0]}.npy")
                    np.save(label_npy_path, label)
                    print(f"Saved label to {label_npy_path}")
                else:
                    print(f"Warning: Could not read label at {label_path}")
            else:
                print(f"Warning: Label not found for {file}")
                
        except Exception as e:
            print(f"Error processing {file}: {str(e)}")
            continue

In [None]:
images_dirs = '/home/m12gbs1/hieup/Project/SSSS/data/CVC_clinicDB/images'
labels_dirs = '/home/m12gbs1/hieup/Project/SSSS/data/CVC_clinicDB/labels'
print("Images directory content:", os.listdir(images_dirs))
print("Labels directory content:", os.listdir(labels_dirs))

output_npy_dir = '/home/m12gbs1/hieup/Project/SSSS/data_processed'
    
save_images_and_labels_as_npy(images_dirs, labels_dirs, output_npy_dir)


In [14]:
create_folds(proceeded_folder='/home/m12gbs1/hieup/Project/SSSS/data_processed/CVC_clinicDB', num_folds=5)

In [None]:
import gdown

!gdown --id 1KZfXrFmYhjW8G9K8mldt5hh3FDCiaXxk

!mkdir -p data/PolypGen2021
!unzip -q PolypGen2021_MultiCenterData_v3.zip -d data/PolypGen2021
!rm PolypGen2021_MultiCenterData_v3.zip

In [8]:
!mkdir -p data/PolypGen2021/images
!mkdir -p data/PolypGen2021/labels

!mv data/PolypGen2021/PolypGen2021_MultiCenterData_v3/data_C1/images_C1/* data/PolypGen2021/images
!mv data/PolypGen2021/PolypGen2021_MultiCenterData_v3/data_C1/masks_C1/* data/PolypGen2021/labels

!mv data/PolypGen2021/PolypGen2021_MultiCenterData_v3/data_C2/images_C2/* data/PolypGen2021/images
!mv data/PolypGen2021/PolypGen2021_MultiCenterData_v3/data_C2/masks_C2/* data/PolypGen2021/labels

!mv data/PolypGen2021/PolypGen2021_MultiCenterData_v3/data_C3/images_C3/* data/PolypGen2021/images
!mv data/PolypGen2021/PolypGen2021_MultiCenterData_v3/data_C3/masks_C3/* data/PolypGen2021/labels    

!mv data/PolypGen2021/PolypGen2021_MultiCenterData_v3/data_C4/images_C4/* data/PolypGen2021/images
!mv data/PolypGen2021/PolypGen2021_MultiCenterData_v3/data_C4/masks_C4/* data/PolypGen2021/labels  

!mv data/PolypGen2021/PolypGen2021_MultiCenterData_v3/data_C5/images_C5/* data/PolypGen2021/images
!mv data/PolypGen2021/PolypGen2021_MultiCenterData_v3/data_C5/masks_C5/* data/PolypGen2021/labels  

!mv data/PolypGen2021/PolypGen2021_MultiCenterData_v3/data_C6/images_C6/* data/PolypGen2021/images
!mv data/PolypGen2021/PolypGen2021_MultiCenterData_v3/data_C6/masks_C6/* data/PolypGen2021/labels  



In [None]:

!rm -r data/PolypGen2021/PolypGen2021_MultiCenterData_v3
!rm -r data/PolypGen2021/__MACOSX
!gdown --id 1z_kx8tK6BrEX6DFLzuaoD8GDn5nujwSx


In [None]:
!unzip kvasir-seg.zip -d data/
!rm kvasir-seg.zip


In [None]:
import os
import cv2
import numpy as np
import random

def save_images_and_labels_as_npy(images_dir, labels_dir, output_npy_dir):
    os.makedirs(output_npy_dir, exist_ok=True)

    # Create dataset directories
    dataset_image_dir = os.path.join(output_npy_dir, 'kvasir-seg', 'images')
    dataset_label_dir = os.path.join(output_npy_dir, 'kvasir-seg', 'labels')
    
    os.makedirs(dataset_image_dir, exist_ok=True)
    os.makedirs(dataset_label_dir, exist_ok=True)

    # Get list of image files
    image_files = [f for f in os.listdir(images_dir) if f.endswith('.jpg')]
    print(image_files)
    for file in image_files:
        try:
            # Process image
            image_path = os.path.join(images_dir, file)
            image = cv2.imread(image_path, cv2.IMREAD_COLOR)
            if image is not None:
                image_npy_path = os.path.join(dataset_image_dir, f"{os.path.splitext(file)[0]}.npy")
                np.save(image_npy_path, image)
                print(f"Saved image to {image_npy_path}")
            else:
                print(f"Warning: Could not read image at {image_path}")
                continue

            # Process label
            label_path = os.path.join(labels_dir, file)
            if os.path.exists(label_path):
                label = cv2.imread(label_path, cv2.IMREAD_GRAYSCALE)
                if label is not None:
                    label_npy_path = os.path.join(dataset_label_dir, f"{os.path.splitext(file)[0]}.npy")
                    np.save(label_npy_path, label)
                    print(f"Saved label to {label_npy_path}")
                else:
                    print(f"Warning: Could not read label at {label_path}")
            else:
                print(f"Warning: Label not found for {file}")
                
        except Exception as e:
            print(f"Error processing {file}: {str(e)}")
            continue

def create_folds(proceeded_folder='./proceeded_data', num_folds=5):
    folds_dir = proceeded_folder
    os.makedirs(folds_dir, exist_ok=True)

    image_files = os.listdir(os.path.join(proceeded_folder, 'images'))
    label_files = os.listdir(os.path.join(proceeded_folder, 'labels'))

    assert all(img[:-4] == lbl[:-4] for img, lbl in zip(sorted(image_files), sorted(label_files))), "Image and label names do not match."

    file_names = [img[:-4] for img in image_files]

    random.shuffle(file_names)

    folds = np.array_split(file_names, num_folds)

    for i in range(num_folds):
        with open(os.path.join(folds_dir, f'fold{i + 1}.txt'), 'w') as f:
            for name in folds[i]:
                f.write(f"{name}.npy\n")

images_dirs = '/home/m12gbs1/hieup/Project/SSSS/data/Kvasir-SEG/images'
labels_dirs = '/home/m12gbs1/hieup/Project/SSSS/data/Kvasir-SEG/masks'
print("Images directory content:", os.listdir(images_dirs))
print("Labels directory content:", os.listdir(labels_dirs))

output_npy_dir = '/home/m12gbs1/hieup/Project/SSSS/data_processed'
    
save_images_and_labels_as_npy(images_dirs, labels_dirs, output_npy_dir)

create_folds(proceeded_folder='/home/m12gbs1/hieup/Project/SSSS/data_processed/kvasir-seg', num_folds=5)