In [5]:
import os
os.chdir('/root/DWDP')
print("Current working directory:", os.getcwd())


Current working directory: /root/DWDP


In [6]:
import gdown
import zipfile

def download_data():
    file_id = '1iTM57U43L0ANn1u_bRv4dEEERQemuOXc'
    destination = 'Denoising_Dataset_train_val.zip'

    # Download the file from Google Drive
    gdown.download(f'https://drive.google.com/uc?id={file_id}', destination, quiet=False)

    # Unzip the downloaded file into the 'data' folder
    with zipfile.ZipFile(destination, 'r') as zip_ref:
        zip_ref.extractall('data')  # Extract dataset into 'data' folder


In [9]:
download_data()

Downloading...
From (original): https://drive.google.com/uc?id=1iTM57U43L0ANn1u_bRv4dEEERQemuOXc
From (redirected): https://drive.google.com/uc?id=1iTM57U43L0ANn1u_bRv4dEEERQemuOXc&confirm=t&uuid=536167fd-d11d-4b4d-a9db-bd7eeaab36e4
To: /root/DWDP/Denoising_Dataset_train_val.zip
100%|██████████| 3.78G/3.78G [19:25<00:00, 3.25MB/s]


In [17]:
zip_file_path = 'Denoising_Dataset_train_val.zip'
extract_to_path = './'

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to_path)

print("File extracted to the directory.")


File extracted to the directory.


In [26]:
import os
import shutil

def rearrange_and_rename_files(src_dir, dest_dir):
    """
    Rearranges the folder structure from:
    - src_dir/Class/Train/[Defect_mask/defect_type/images, Degraded_image/defect_type/images, clean_image/defect_type/images]
    
    To:
    - dest_dir/Train_or_Val/class_name_defect_type/Defect/images, Degraded/images, Ground_Truth/images
    
    Args:
        src_dir (str): The source directory of the original structure.
        dest_dir (str): The destination directory for the new structure.
    """
    # Loop through each class in the source directory
    for class_name in os.listdir(src_dir):
        class_path = os.path.join(src_dir, class_name)
        if os.path.isdir(class_path):
            # Process both Train and Val folders
            for split in ['Train', 'Val']:
                split_path = os.path.join(class_path, split)
                if os.path.exists(split_path):
                    print(f"Processing {split} folder for class: {class_name}")

                    # Loop through categories: Defect_mask, Degraded_image, and clean_image
                    for category, folder_name in zip(
                        ['Defect_mask', 'Degraded_image', 'GT_clean_image'],
                        ['defect_mask','degraded', 'ground_truth']
                    ):
                        category_path = os.path.join(split_path, category)
                        if os.path.exists(category_path):
                            # Loop through defect types (subclass)
                            for defect_type in os.listdir(category_path):
                                defect_type_path = os.path.join(category_path, defect_type)

                                if os.path.isdir(defect_type_path):
                                    print(f"  Processing defect type: {defect_type} in category: {category}")

                                    # Define the destination directory
                                    dest_category_path = os.path.join(dest_dir, split, folder_name)
                                    os.makedirs(dest_category_path, exist_ok=True)

                                    # Copy images with renamed filenames
                                    for i, filename in enumerate(sorted(os.listdir(defect_type_path)), start=1):
                                        src_image_path = os.path.join(defect_type_path, filename)
                                        new_filename = f"{class_name}_{defect_type}_{i:03d}.png"
                                        dest_image_path = os.path.join(dest_category_path, new_filename)
                                        
                                        if os.path.isfile(src_image_path):
                                            shutil.copy2(src_image_path, dest_image_path)
                                            print(f"Copied {src_image_path} to {dest_image_path}")
                                        else:
                                            print(f"Skipped non-file item: {src_image_path}")
                        else:
                            print(f"Category {category} does not exist in {split} for class {class_name}")

# Example usage
src_dir = 'Denoising_Dataset_train_val'
dest_dir = 'structured_data'
rearrange_and_rename_files(src_dir, dest_dir)


Processing Train folder for class: cable
  Processing defect type: missing_wire in category: Defect_mask
Copied Denoising_Dataset_train_val/cable/Train/Defect_mask/missing_wire/000_mask.png to structured_data/Train/defect_mask/cable_missing_wire_001.png
Copied Denoising_Dataset_train_val/cable/Train/Defect_mask/missing_wire/001_mask.png to structured_data/Train/defect_mask/cable_missing_wire_002.png
Copied Denoising_Dataset_train_val/cable/Train/Defect_mask/missing_wire/002_mask.png to structured_data/Train/defect_mask/cable_missing_wire_003.png
Copied Denoising_Dataset_train_val/cable/Train/Defect_mask/missing_wire/003_mask.png to structured_data/Train/defect_mask/cable_missing_wire_004.png
Copied Denoising_Dataset_train_val/cable/Train/Defect_mask/missing_wire/004_mask.png to structured_data/Train/defect_mask/cable_missing_wire_005.png
Copied Denoising_Dataset_train_val/cable/Train/Defect_mask/missing_wire/005_mask.png to structured_data/Train/defect_mask/cable_missing_wire_006.png
C

In [28]:
%run /root/DWDP/Data/clean_dataset.py


Unmatched files to be deleted in 'train' category:

Unmatched files to be deleted in 'val' category:

Unmatched file deletion process completed successfully.


In [29]:
%run /root/DWDP/Data/inspect_dataset.py


Counting shapes in folder: degraded (train)

Counting shapes in folder: defect_mask (train)

Counting shapes in folder: ground_truth (train)

Image shape counts in 'train' category:
Shape (H x W): 1000 x 1000 - 255 images
Shape (H x W): 1024 x 1024 - 1746 images
Shape (H x W): 700 x 700 - 159 images
Shape (H x W): 800 x 800 - 309 images
Shape (H x W): 840 x 840 - 192 images
Shape (H x W): 900 x 900 - 135 images

Counting shapes in folder: degraded (val)

Counting shapes in folder: defect_mask (val)

Counting shapes in folder: ground_truth (val)

Image shape counts in 'val' category:
Shape (H x W): 1024 x 1024 - 165 images
Shape (H x W): 800 x 800 - 33 images
Shape (H x W): 1000 x 1000 - 15 images
Shape (H x W): 840 x 840 - 15 images
Shape (H x W): 700 x 700 - 18 images
Shape (H x W): 900 x 900 - 18 images

Image shape counting completed.
