In [15]:
# Image Compression Without Losing Quality (if > 1000 KB)
import os
from PIL import Image

def get_file_size_kb(path):
    return round(os.path.getsize(path) / 1024, 2)

def compress_image(input_path, output_path, quality=85, use_webp=False):
    try:
        with Image.open(input_path) as img:
            img = img.convert("RGB")  # Ensure compatible format

            if use_webp:
                output_path = output_path.rsplit('.', 1)[0] + ".webp"
                img.save(output_path, format='WEBP', quality=quality, method=6)
            else:
                img.save(output_path, format='JPEG', quality=quality, optimize=True)

            return get_file_size_kb(input_path), get_file_size_kb(output_path), output_path
    except Exception as e:
        print(f"Error compressing {input_path}: {e}")
        return None, None, None

def batch_compress_images(input_folder, output_folder, quality=85, use_webp=False, size_threshold_kb=1000):
    os.makedirs(output_folder, exist_ok=True)
    results = []

    for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
            input_path = os.path.join(input_folder, filename)
            output_path = os.path.join(output_folder, filename)

            original_size = get_file_size_kb(input_path)
            if original_size > size_threshold_kb:
                before_size, after_size, saved_path = compress_image(input_path, output_path, quality, use_webp)
                if before_size and after_size:
                    results.append((filename, before_size, after_size, saved_path))
            else:
                # Simply copy smaller images without compression
                output_path = os.path.join(output_folder, filename)
                with open(input_path, 'rb') as src, open(output_path, 'wb') as dst:
                    dst.write(src.read())

    # Print summary
    print(f"\n{'Filename':30} {'Original (KB)':>15} {'Compressed (KB)':>18}")
    print("-" * 65)
    for fname, before, after, _ in results:
        print(f"{fname:30} {before:>15} {after:>18}")

# Example usage:
if __name__ == "__main__":
    input_dir = r"D:\ShailyDL\DeepL\345098_DS2\images\train"
    output_dir = r"D:\ShailyDL\DeepL\345098_DS2\images\train_compressed"

    quality = 85
    use_webp = False
    size_threshold_kb = 1000

    batch_compress_images(input_dir, output_dir, quality, use_webp, size_threshold_kb)

   #result  #Filename                         Original (KB)    Compressed (KB)
#-----------------------------------------------------------------
#1042487_dataset 2025-05-15 08-51-58_C1012.jpg         5841.71            5758.66


KeyboardInterrupt: 

In [21]:
import os
import shutil
import re

# Source folders
image_input_dir = r"D:\ShailyDL\DeepL\ds2_864\images"
label_input_dir = r"D:\ShailyDL\DeepL\ds2_864\labels"

# Destination folders
image_output_dir = r"D:\ShailyDL\DeepL\NEW_DS_Cleaned\images"
label_output_dir = r"D:\ShailyDL\DeepL\NEW_DS_Cleaned\labels"

# Create destination folders if they don't exist
os.makedirs(image_output_dir, exist_ok=True)
os.makedirs(label_output_dir, exist_ok=True)

# Regex: Match Cxxx, Dxxx, or Nodefect_xxx with optional _aug or _aug1
pattern = re.compile(r'(C\d+(?:_aug\d*)?|D\d+(?:_aug\d*)?|Nodefect_\d+)', re.IGNORECASE)

def rename_and_copy_files(input_dir, output_dir, is_label=False):
    count = 0
    for file in os.listdir(input_dir):
        match = pattern.search(file)
        if match:
            matched_part = match.group(1)
            ext = ".txt" if is_label else os.path.splitext(file)[1]
            new_name = matched_part + ext
            src = os.path.join(input_dir, file)
            dst = os.path.join(output_dir, new_name)

            # If file already exists, add a unique suffix
            suffix = 1
            while os.path.exists(dst):
                new_name = f"{matched_part}_{suffix}{ext}"
                dst = os.path.join(output_dir, new_name)
                suffix += 1

            shutil.copy2(src, dst)
            count += 1
        else:
            print(f"⚠️ No match found in filename: {file}")
    return count

# Process
images_copied = rename_and_copy_files(image_input_dir, image_output_dir, is_label=False)
labels_copied = rename_and_copy_files(label_input_dir, label_output_dir, is_label=True)

print("✅ Done renaming and copying.")
print(f"Total images processed: {images_copied}")
print(f"Total labels processed: {labels_copied}")

#result for all : Done renaming and copying.
#Total images processed: 326
#Total labels processed: 326


⚠️ No match found in filename: annotations_test.json
⚠️ No match found in filename: annotations_train.json
⚠️ No match found in filename: annotations_val.json
✅ Done renaming and copying.
Total images processed: 864
Total labels processed: 864


In [16]:
import os
import shutil
import re

# Input folders
image_input_dir = r"D:\ShailyDL\DeepL\345676_DS3 (2)\images\train"
label_input_dir = r"D:\ShailyDL\DeepL\345676_DS3 (2)\labels\train"

# Output folders for matched files (not used now)
# image_output_dir = r"D:\ShailyDL\DeepL\NEW_DS_Cleaned\images"
# label_output_dir = r"D:\ShailyDL\DeepL\NEW_DS_Cleaned\labels"

# Output folders for unmatched files
image_filtered_dir = r"D:\ShailyDL\DeepL\FilteredOut\images"
label_filtered_dir = r"D:\ShailyDL\DeepL\FilteredOut\labels"

# Create output dirs
os.makedirs(image_filtered_dir, exist_ok=True)
os.makedirs(label_filtered_dir, exist_ok=True)

# Matching pattern
pattern = re.compile(r'(C\d+|D\d+|Nodefect_\d+)', re.IGNORECASE)

def filter_and_copy_unmatched(input_dir, output_dir, is_label=False):
    for file in os.listdir(input_dir):
        match = pattern.search(file)
        if not match:
            src = os.path.join(input_dir, file)
            dst = os.path.join(output_dir, file)
            shutil.copy2(src, dst)

# Process unmatched images and labels
filter_and_copy_unmatched(image_input_dir, image_filtered_dir, is_label=False)
filter_and_copy_unmatched(label_input_dir, label_filtered_dir, is_label=True)

print("✅ Done filtering and moving unmatched images and labels.")

#result: ✅ Done filtering and moving unmatched images and labels.code for those which are not renamed

✅ Done filtering and moving unmatched images and labels.


In [12]:
import os
import re

# Directory to check
image_dir = r"D:\ShailyDL\DeepL\345676_DS3 (2)\images\train"

# Regex patterns
pattern_only_delami = re.compile(r'OnlyDelami', re.IGNORECASE)
pattern_nodefect = re.compile(r'Nodefect', re.IGNORECASE)

# Counters
count_only_delami = 0
count_nodefect = 0
count_others = 0

# Check if path exists
if os.path.exists(image_dir):
    for file in os.listdir(image_dir):
        if pattern_only_delami.search(file):
            count_only_delami += 1
        elif pattern_nodefect.search(file):
            count_nodefect += 1
        else:
            count_others += 1

    print(f"OnlyDelami count: {count_only_delami}")
    print(f"NoDefect count: {count_nodefect}")
    print(f"Others count: {count_others}")
else:
    print("❌ The specified directory does not exist.")

  #result  #OnlyDelami count: 326
#NoDefect count: 1362
#Others count: 480


OnlyDelami count: 326
NoDefect count: 1362
Others count: 480


In [None]:
import os
import shutil
import re

# Source folders
image_input_dir = r"D:\ShailyDL\DeepL\345676_DS3 (2)\images\train"
label_input_dir = r"D:\ShailyDL\DeepL\345676_DS3 (2)\labels\train"

# Destination folders
image_output_dir = r"D:\ShailyDL\DeepL\NEW_DS_Cleaned\images"
label_output_dir = r"D:\ShailyDL\DeepL\NEW_DS_Cleaned\labels"

# Create destination folders if they don't exist
os.makedirs(image_output_dir, exist_ok=True)
os.makedirs(label_output_dir, exist_ok=True)

# Regex: Match Cxxx, Dxxx, or Nodefect_xxx with optional _aug or _aug1
pattern = re.compile(r'(Nodefect_\d*)', re.IGNORECASE)

def rename_and_copy_files(input_dir, output_dir, is_label=False):
    count = 0
    for file in os.listdir(input_dir):
        match = pattern.search(file)
        if match:
            matched_part = match.group(1)
            ext = ".txt" if is_label else os.path.splitext(file)[1]
            new_name = matched_part + ext
            src = os.path.join(input_dir, file)
            dst = os.path.join(output_dir, new_name)

            # If file already exists, add a unique suffix
            suffix = 1
            while os.path.exists(dst):
                new_name = f"{matched_part}_{suffix}{ext}"
                dst = os.path.join(output_dir, new_name)
                suffix += 1

            shutil.copy2(src, dst)
            count += 1
        else:
            print(f"⚠️ No match found in filename: {file}")
    return count

# Process
images_copied = rename_and_copy_files(image_input_dir, image_output_dir, is_label=False)
labels_copied = rename_and_copy_files(label_input_dir, label_output_dir, is_label=True)

print("✅ Done renaming and copying.")
print(f"Total images processed: {images_copied}")
print(f"Total labels processed: {labels_copied}")


In [19]:
import os
import shutil
import re

# Input folders
image_input_dir = r"D:\ShailyDL\DeepL\345676_DS3 (2)\images\train"
label_input_dir = r"D:\ShailyDL\DeepL\345676_DS3 (2)\labels\train"

# Output folders for unmatched (filtered) files
image_filtered_dir = r"D:\ShailyDL\DeepL\FilteredOut\images"
label_filtered_dir = r"D:\ShailyDL\DeepL\FilteredOut\labels"

# Create output dirs
os.makedirs(image_filtered_dir, exist_ok=True)
os.makedirs(label_filtered_dir, exist_ok=True)

# Correct pattern: match Cxxx, Dxxx, or Nodefect_xxx with optional _aug or _aug1
pattern = re.compile(r'(C\d+(?:_aug\d*)?|D\d+(?:_aug\d*)?|Nodefect_\d*)', re.IGNORECASE)

def filter_and_copy_unmatched(input_dir, output_dir):
    count = 0
    for file in os.listdir(input_dir):
        if not pattern.search(file):
            src = os.path.join(input_dir, file)
            dst = os.path.join(output_dir, file)
            shutil.copy2(src, dst)
            count += 1
    return count

# Process unmatched files
unmatched_images = filter_and_copy_unmatched(image_input_dir, image_filtered_dir)
unmatched_labels = filter_and_copy_unmatched(label_input_dir, label_filtered_dir)

print("✅ Done filtering and moving unmatched images and labels.")
print(f"Unmatched images copied: {unmatched_images}")
print(f"Unmatched labels copied: {unmatched_labels}")


✅ Done filtering and moving unmatched images and labels.
Unmatched images copied: 480
Unmatched labels copied: 480


In [None]:
import os
import shutil
import re

# Source folders
image_input_dir = r"D:\ShailyDL\DeepL\345676_DS3 (2)\images\train"
label_input_dir = r"D:\ShailyDL\DeepL\345676_DS3 (2)\labels\train"

# Destination folders
image_output_dir = r"D:\ShailyDL\DeepL\NEW_DS_Cleaned1\images"
label_output_dir = r"D:\ShailyDL\DeepL\NEW_DS_Cleaned1\labels"

# Create destination folders if they don't exist
os.makedirs(image_output_dir, exist_ok=True)
os.makedirs(label_output_dir, exist_ok=True)

# Regex: Match only Dxxx or Dxxx_aug variants
pattern = re.compile(r'(D\d+(?:_aug\d*)?)', re.IGNORECASE)

def rename_and_copy_files(input_dir, output_dir, is_label=False):
    count = 0
    for file in os.listdir(input_dir):
        match = pattern.search(file)
        if match:
            matched_part = match.group(1)
            ext = ".txt" if is_label else os.path.splitext(file)[1]
            new_name = matched_part + ext
            src = os.path.join(input_dir, file)
            dst = os.path.join(output_dir, new_name)

            # Avoid overwrite by appending suffix if needed
            suffix = 1
            while os.path.exists(dst):
                new_name = f"{matched_part}_{suffix}{ext}"
                dst = os.path.join(output_dir, new_name)
                suffix += 1

            shutil.copy2(src, dst)
            count += 1
        else:
            print(f"⚠️ No match found in filename: {file}")
    return count

# Process files
images_copied = rename_and_copy_files(image_input_dir, image_output_dir, is_label=False)
labels_copied = rename_and_copy_files(label_input_dir, label_output_dir, is_label=True)

print("✅ Done renaming and copying only Dxxx files.")
print(f"Total images processed: {images_copied}")
print(f"Total labels processed: {labels_copied}")


In [25]:
import os
import shutil

# Input folders
image_dir = r"D:\ShailyDL\DeepL\FilteredOut\images"
label_dir = r"D:\ShailyDL\DeepL\FilteredOut\labels"

# Output folders
output_image_dir = r"D:\ShailyDL\DeepL\Cleaned_Categorized\images"
output_label_dir = r"D:\ShailyDL\DeepL\Cleaned_Categorized\labels"

os.makedirs(output_image_dir, exist_ok=True)
os.makedirs(output_label_dir, exist_ok=True)

# Starting counters
crack_counter = 1031
delam_counter = 660
both_counter = 1

# Class-wise counts
crack_count = 0
delam_count = 0
both_count = 0

# Supported image formats
valid_exts = (".jpg", ".jpeg", ".png")

for img_file in os.listdir(image_dir):
    if not img_file.lower().endswith(valid_exts):
        continue

    base_name = os.path.splitext(img_file)[0]
    label_file = base_name + ".txt"
    label_path = os.path.join(label_dir, label_file)

    if not os.path.exists(label_path):
        print(f"❌ Missing label file for {img_file}")
        continue

    with open(label_path, 'r') as f:
        labels = [line.strip().split()[0] for line in f if line.strip()]
        unique_classes = set(labels)

    # Determine class and assign new name
    if unique_classes == {'1'}:
        new_base = f"C{crack_counter}"
        crack_counter += 1
        crack_count += 1
    elif unique_classes == {'0'}:
        new_base = f"D{delam_counter}"
        delam_counter += 1
        delam_count += 1
    else:
        new_base = f"both{both_counter}"
        both_counter += 1
        both_count += 1

    # File extensions
    ext = os.path.splitext(img_file)[1]

    # Paths
    src_img_path = os.path.join(image_dir, img_file)
    src_lbl_path = os.path.join(label_dir, label_file)
    dst_img_path = os.path.join(output_image_dir, new_base + ext)
    dst_lbl_path = os.path.join(output_label_dir, new_base + ".txt")

    # Copy
    shutil.copy2(src_img_path, dst_img_path)
    shutil.copy2(src_lbl_path, dst_lbl_path)

# Final summary
print("\n✅ Done renaming and copying files.")
print(f"🧱 Crack-only images      : {crack_count}")
print(f"🪨 Delamination-only images : {delam_count}")
print(f"🔀 Mixed (both) images     : {both_count}")



✅ Done renaming and copying files.
🧱 Crack-only images      : 100
🪨 Delamination-only images : 377
🔀 Mixed (both) images     : 3


In [26]:
import os

# Paths
image_dir = r"D:\ShailyDL\DeepL\FINAL_DS\images"
label_dir = r"D:\ShailyDL\DeepL\FINAL_DS\labels"

# Counters
crack_count = 0
delam_count = 0
nodefect_count = 0
missing_label_files = 0

# Image extensions
valid_exts = ('.jpg', '.jpeg', '.png')

# Check each image
for img_file in os.listdir(image_dir):
    if not img_file.lower().endswith(valid_exts):
        continue

    base_name = os.path.splitext(img_file)[0]
    label_file = base_name + '.txt'
    label_path = os.path.join(label_dir, label_file)

    if not os.path.exists(label_path):
        missing_label_files += 1
        continue

    with open(label_path, 'r') as f:
        lines = f.readlines()
        if not lines:
            nodefect_count += 1
        else:
            classes = set(line.strip().split()[0] for line in lines if line.strip())
            if '0' in classes:
                crack_count += 1
            if '1' in classes:
                delam_count += 1

print(f"🟡 Crack count: {crack_count}")
print(f"🔵 Delamination count: {delam_count}")
print(f"⚪ No defect (empty labels): {nodefect_count}")
print(f"❌ Missing label files: {missing_label_files}")


🟡 Crack count: 1011
🔵 Delamination count: 1477
⚪ No defect (empty labels): 1362
❌ Missing label files: 0


In [27]:
import os
import shutil
from PIL import Image

# Source image and label folders
source_image_folder = r"D:\ShailyDL\DeepL\FINAL_DS\images"
source_label_folder = r"D:\ShailyDL\DeepL\FINAL_DS\labels"

# Destination folders for filtered-out images and labels
filtered_out_image_folder = r"D:\ShailyDL\DeepL\FilteredOut_Not_2048x2048\images"
filtered_out_label_folder = r"D:\ShailyDL\DeepL\FilteredOut_Not_2048x2048\labels"

os.makedirs(filtered_out_image_folder, exist_ok=True)
os.makedirs(filtered_out_label_folder, exist_ok=True)

mismatch_count = 0

for filename in os.listdir(source_image_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tif', '.tiff')):
        image_path = os.path.join(source_image_folder, filename)
        try:
            with Image.open(image_path) as img:
                width, height = img.size
                if width != 2048 or height != 2048:
                    # Move image to filtered-out images folder
                    shutil.copy2(image_path, os.path.join(filtered_out_image_folder, filename))
                    
                    # Corresponding label file
                    label_filename = os.path.splitext(filename)[0] + '.txt'
                    label_path = os.path.join(source_label_folder, label_filename)

                    if os.path.exists(label_path):
                        shutil.copy2(label_path, os.path.join(filtered_out_label_folder, label_filename))
                    else:
                        print(f"⚠️ Label file missing for image: {filename}")

                    mismatch_count += 1
        except Exception as e:
            print(f"⚠️ Error reading {filename}: {e}")

print(f"✅ Done! Total images filtered out (not 2048x2048): {mismatch_count}")


✅ Done! Total images filtered out (not 2048x2048): 2984


In [None]:
import os
import cv2

filtered_out_image_dir = r"D:\ShailyDL\DeepL\FilteredOut_Not_2048x2048\images"
input_label_dir = r"D:\ShailyDL\DeepL\FilteredOut_Not_2048x2048\labels"

output_image_dir = r"D:\ShailyDL\DeepL\FilteredOut_Processed\images"
output_label_dir = r"D:\ShailyDL\DeepL\FilteredOut_Processed\labels"

os.makedirs(output_image_dir, exist_ok=True)
os.makedirs(output_label_dir, exist_ok=True)

TARGET_SIZE = 2048
PADDING_COLOR = (0, 0, 0)

def resize_and_pad_image(image):
    h, w = image.shape[:2]
    scale = min(TARGET_SIZE / w, TARGET_SIZE / h)
    resized = cv2.resize(image, (int(w * scale), int(h * scale)))
    new_h, new_w = resized.shape[:2]
    pad_top = (TARGET_SIZE - new_h) // 2
    pad_bottom = TARGET_SIZE - new_h - pad_top
    pad_left = (TARGET_SIZE - new_w) // 2
    pad_right = TARGET_SIZE - new_w - pad_left
    padded = cv2.copyMakeBorder(resized, pad_top, pad_bottom, pad_left, pad_right,
                                borderType=cv2.BORDER_CONSTANT, value=PADDING_COLOR)
    return padded, scale, pad_left, pad_top, w, h

def adjust_yolo_labels(label_path, output_path, scale, pad_left, pad_top, orig_w, orig_h):
    if not os.path.exists(label_path):
        print(f"⚠️ Label file not found for {os.path.basename(label_path)}. Creating empty label file.")
        open(output_path, 'w').close()
        return
    with open(label_path, 'r') as f:
        lines = f.readlines()
    updated_lines = []
    for line in lines:
        parts = line.strip().split()
        if len(parts) != 5:
            continue
        cls, x_center, y_center, box_w, box_h = map(float, parts)
        abs_x = x_center * orig_w
        abs_y = y_center * orig_h
        abs_w = box_w * orig_w
        abs_h = box_h * orig_h
        abs_x = abs_x * scale + pad_left
        abs_y = abs_y * scale + pad_top
        abs_w *= scale
        abs_h *= scale
        rel_x = abs_x / TARGET_SIZE
        rel_y = abs_y / TARGET_SIZE
        rel_w = abs_w / TARGET_SIZE
        rel_h = abs_h / TARGET_SIZE
        updated_line = f"{int(cls)} {rel_x:.6f} {rel_y:.6f} {rel_w:.6f} {rel_h:.6f}"
        updated_lines.append(updated_line)
    with open(output_path, 'w') as f:
        f.write('\n'.join(updated_lines))

image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff']

processed_count = 0

for filename in os.listdir(filtered_out_image_dir):
    if not any(filename.lower().endswith(ext) for ext in image_extensions):
        continue
    image_path = os.path.join(filtered_out_image_dir, filename)
    label_path = os.path.join(input_label_dir, os.path.splitext(filename)[0] + '.txt')
    print(f"Processing image: {filename}")

    img = cv2.imread(image_path)
    if img is None:
        print(f"⚠️ Could not read image {filename}, skipping.")
        continue

    padded_img, scale, pad_left, pad_top, orig_w, orig_h = resize_and_pad_image(img)

    out_img_path = os.path.join(output_image_dir, filename)
    out_label_path = os.path.join(output_label_dir, os.path.splitext(filename)[0] + '.txt')

    cv2.imwrite(out_img_path, padded_img)
    adjust_yolo_labels(label_path, out_label_path, scale, pad_left, pad_top, orig_w, orig_h)

    print(f"✅ Saved processed image and label for {filename}")
    processed_count += 1

print(f"✅ Done! Processed {processed_count} images.")


In [36]:
import os
import cv2

# Input paths
image_dir = r"D:\ShailyDL\DeepL\FINAL_DS\images"
label_dir = r"D:\ShailyDL\DeepL\FINAL_DS\labels"

# Output path
output_vis_dir = r"D:\ShailyDL\DeepL\FINAL_DS\Visualized"
os.makedirs(output_vis_dir, exist_ok=True)

# Class mapping and color (OpenCV uses BGR)
class_names = {0: "Delamination", 1: "Crack"}
class_colors = {
    0: (98, 0, 255),     # Delamination: Purple
    1: (0, 238, 196),    # Crack: Aqua
}

# Loop through each image
for filename in os.listdir(image_dir):
    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    image_path = os.path.join(image_dir, filename)
    label_path = os.path.join(label_dir, os.path.splitext(filename)[0] + '.txt')

    img = cv2.imread(image_path)
    if img is None:
        print(f"⚠️ Could not load {filename}")
        continue

    h, w = img.shape[:2]
    present_classes = set()

    # Draw bounding boxes if label exists
    if os.path.exists(label_path):
        with open(label_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) != 5:
                    continue
                cls_id, x_center, y_center, bw, bh = map(float, parts)

                cls_id = int(cls_id)
                present_classes.add(cls_id)

                box_color = class_colors.get(cls_id, (0, 255, 0))

                # Convert YOLO -> absolute coords
                cx, cy = int(x_center * w), int(y_center * h)
                box_w, box_h = int(bw * w), int(bh * h)
                x1 = int(cx - box_w / 2)
                y1 = int(cy - box_h / 2)
                x2 = int(cx + box_w / 2)
                y2 = int(cy + box_h / 2)

                # Draw bbox
                cv2.rectangle(img, (x1, y1), (x2, y2), box_color, 2)

    # Show only the relevant class names in the center
    if present_classes:
        font_scale = 2
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_thickness = 3
        text_color = (255, 255, 255)

        # Compose label string
        text = " | ".join([class_names[c] for c in sorted(present_classes)])
        text_size = cv2.getTextSize(text, font, font_scale, font_thickness)[0]
        text_x = (w - text_size[0]) // 2
        text_y = (h + text_size[1]) // 2

        # Draw the text in the center
        cv2.putText(img, text, (text_x, text_y), font, font_scale, text_color, font_thickness, cv2.LINE_AA)

    # Save visualized image
    out_path = os.path.join(output_vis_dir, filename)
    cv2.imwrite(out_path, img)

print("✅ Done! Visualized images saved with centered class names.")


⚠️ Could not load D50.png
✅ Done! Visualized images saved with centered class names.


In [34]:
import os
import shutil

# Original folders
input_img_dir = r"D:\ShailyDL\DeepL\FINAL_DS\images"
input_lbl_dir = r"D:\ShailyDL\DeepL\FINAL_DS\labels"

# Output folders
output_img_dir = r"D:\ShailyDL\DeepL\Swapped_Renamed\images"
output_lbl_dir = r"D:\ShailyDL\DeepL\Swapped_Renamed\labels"

os.makedirs(output_img_dir, exist_ok=True)
os.makedirs(output_lbl_dir, exist_ok=True)

# --- Phase 1: C1031–C1130 → rename to D660–D759
c_start, c_end = 1031, 1130
d_start_new = 660

for i, c_id in enumerate(range(c_start, c_end + 1)):
    old_prefix = f"C{c_id}"
    new_prefix = f"D{d_start_new + i}"

    for ext in ['.jpg', '.png', '.jpeg']:
        old_img_path = os.path.join(input_img_dir, old_prefix + ext)
        if os.path.exists(old_img_path):
            new_img_path = os.path.join(output_img_dir, new_prefix + ext)
            shutil.copy2(old_img_path, new_img_path)
            break  # use only first matching extension

    old_lbl_path = os.path.join(input_lbl_dir, old_prefix + ".txt")
    new_lbl_path = os.path.join(output_lbl_dir, new_prefix + ".txt")
    if os.path.exists(old_lbl_path):
        shutil.copy2(old_lbl_path, new_lbl_path)

# --- Phase 2: D660–D1036 → rename to C1031–C1407 (maintaining 1:1)
d_start, d_end = 660, 1036
c_start_new = 1031

for i, d_id in enumerate(range(d_start, d_end + 1)):
    old_prefix = f"D{d_id}"
    new_prefix = f"C{c_start_new + i}"

    for ext in ['.jpg', '.png', '.jpeg']:
        old_img_path = os.path.join(input_img_dir, old_prefix + ext)
        if os.path.exists(old_img_path):
            new_img_path = os.path.join(output_img_dir, new_prefix + ext)
            shutil.copy2(old_img_path, new_img_path)
            break

    old_lbl_path = os.path.join(input_lbl_dir, old_prefix + ".txt")
    new_lbl_path = os.path.join(output_lbl_dir, new_prefix + ".txt")
    if os.path.exists(old_lbl_path):
        shutil.copy2(old_lbl_path, new_lbl_path)

print("✅ Renaming and swapping completed. Files saved in 'Swapped_Renamed' folder.")


✅ Renaming and swapping completed. Files saved in 'Swapped_Renamed' folder.


In [40]:
import os
import random

random.seed(42)

image_dir = r'D:\ShailyDL\DeepL\FINAL_DS\images'
label_dir = r'D:\ShailyDL\DeepL\FINAL_DS\labels'  # unused here but kept for clarity

# Split ratios
test_ratio = 0.05
val_ratio = 0.12
train_ratio = 1 - test_ratio - val_ratio  # 0.83

# Categories with prefixes (lowercase for easy matching)
categories = {
    "Crack": "c",
    "Delamination": "d",
    "Both": "both",  # assuming prefix "both" if any, else handle separately
    "NoDefect": "n"
}

# Helper to detect augmented images
def is_augmented(filename):
    return "_aug" in filename.lower()

# Detect category based on prefix ignoring case
def get_category(filename):
    fname = filename.lower()
    for cat, prefix in categories.items():
        if prefix == "both":
            # Handle Both category logic if needed here
            if fname.startswith("both"):
                return "Both"
            continue
        if fname.startswith(prefix):
            return cat
    # No prefix matched = NoDefect
    return "NoDefect"

# Read all images
all_images = [f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

# Prepare dict for categorized images
category_files = {cat: {"original": [], "augmented": []} for cat in categories}

# Categorize images
for img in all_images:
    cat = get_category(img)
    if is_augmented(img):
        category_files[cat]["augmented"].append(img)
    else:
        category_files[cat]["original"].append(img)

# Print counts before splitting
print("Processing images...\n")

for cat in categories:
    orig = len(category_files[cat]["original"])
    aug = len(category_files[cat]["augmented"])
    total = orig + aug
    print(f"{cat}: Original={orig}, Augmented={aug}, Total={total}")

# Function to split list
def split_list(items):
    random.shuffle(items)
    n = len(items)
    n_test = int(n * test_ratio)
    n_val = int(n * val_ratio)
    n_train = n - n_val - n_test
    return items[:n_train], items[n_train:n_train+n_val], items[n_train+n_val:]

print("\nSplitting datasets...\n")

summary = []

for cat in categories:
    orig = category_files[cat]["original"]
    aug = category_files[cat]["augmented"]

    orig_train, orig_val, orig_test = split_list(orig)
    aug_train, aug_val, aug_test = split_list(aug)

    summary.append((cat, "Train", len(orig_train), len(aug_train), len(orig_train)+len(aug_train)))
    summary.append((cat, "Val", len(orig_val), len(aug_val), len(orig_val)+len(aug_val)))
    summary.append((cat, "Test", len(orig_test), len(aug_test), len(orig_test)+len(aug_test)))

print("Dataset split summary:")
print(f"{'Split':<7} {'Class':<14} {'Orig':<5} {'Aug':<5} {'Total':<5}")
for cat, split, orig_c, aug_c, total_c in summary:
    print(f"{split:<7} {cat:<14} {orig_c:<5} {aug_c:<5} {total_c:<5}")


Processing images...

Crack: Original=1383, Augmented=92, Total=1475
Delamination: Original=725, Augmented=284, Total=1009
Both: Original=2, Augmented=0, Total=2
NoDefect: Original=1362, Augmented=0, Total=1362

Splitting datasets...

Dataset split summary:
Split   Class          Orig  Aug   Total
Train   Crack          1149  77    1226 
Val     Crack          165   11    176  
Test    Crack          69    4     73   
Train   Delamination   602   236   838  
Val     Delamination   87    34    121  
Test    Delamination   36    14    50   
Train   Both           2     0     2    
Val     Both           0     0     0    
Test    Both           0     0     0    
Train   NoDefect       1131  0     1131 
Val     NoDefect       163   0     163  
Test    NoDefect       68    0     68   


In [1]:
import os
import shutil
import random

random.seed(42)

image_dir = r'D:\ShailyDL\DeepL\FINAL_DSw488\images'
label_dir = r'D:\ShailyDL\DeepL\FINAL_DSw488\labels'
output_base_dir = r'D:\ShailyDL\DeepL\FINAL_DS_Splitw488'

# Split ratios
test_ratio = 0.05
val_ratio = 0.12
train_ratio = 1 - test_ratio - val_ratio  # 0.83

# Categories with prefixes (lowercase for easy matching)
categories = {
    "Crack": "c",
    "Delamination": "d",
    "Both": "both",
    "NoDefect": "n"
}

# Helper to detect augmented images
def is_augmented(filename):
    return "_aug" in filename.lower()

# Detect category based on prefix ignoring case
def get_category(filename):
    fname = filename.lower()
    for cat, prefix in categories.items():
        if prefix == "both":
            if fname.startswith("both"):
                return "Both"
            continue
        if fname.startswith(prefix):
            return cat
    return "NoDefect"

# Read all images
all_images = [f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

# Prepare dict for categorized images
category_files = {cat: {"original": [], "augmented": []} for cat in categories}

# Categorize images
for img in all_images:
    cat = get_category(img)
    if is_augmented(img):
        category_files[cat]["augmented"].append(img)
    else:
        category_files[cat]["original"].append(img)

# Print counts before splitting
print("Processing images...\n")
for cat in categories:
    orig = len(category_files[cat]["original"])
    aug = len(category_files[cat]["augmented"])
    total = orig + aug
    print(f"{cat}: Original={orig}, Augmented={aug}, Total={total}")

# Function to split list
def split_list(items):
    random.shuffle(items)
    n = len(items)
    n_test = int(n * test_ratio)
    n_val = int(n * val_ratio)
    n_train = n - n_val - n_test
    return items[:n_train], items[n_train:n_train+n_val], items[n_train+n_val:]

print("\nSplitting datasets and copying files...\n")

summary = []
splits = {'train': [], 'val': [], 'test': []}

for cat in categories:
    orig = category_files[cat]["original"]
    aug = category_files[cat]["augmented"]

    orig_train, orig_val, orig_test = split_list(orig)
    aug_train, aug_val, aug_test = split_list(aug)

    splits['train'].extend(orig_train + aug_train)
    splits['val'].extend(orig_val + aug_val)
    splits['test'].extend(orig_test + aug_test)

    summary.append((cat, "Train", len(orig_train), len(aug_train), len(orig_train)+len(aug_train)))
    summary.append((cat, "Val", len(orig_val), len(aug_val), len(orig_val)+len(aug_val)))
    summary.append((cat, "Test", len(orig_test), len(aug_test), len(orig_test)+len(aug_test)))

# Create folders and copy files
for split in ['train', 'val', 'test']:
    img_output = os.path.join(output_base_dir, 'images', split)
    lbl_output = os.path.join(output_base_dir, 'labels', split)
    os.makedirs(img_output, exist_ok=True)
    os.makedirs(lbl_output, exist_ok=True)

    for fname in splits[split]:
        src_img = os.path.join(image_dir, fname)
        dst_img = os.path.join(img_output, fname)
        shutil.copyfile(src_img, dst_img)

        lbl_name = os.path.splitext(fname)[0] + ".txt"
        src_lbl = os.path.join(label_dir, lbl_name)
        dst_lbl = os.path.join(lbl_output, lbl_name)
        if os.path.exists(src_lbl):
            shutil.copyfile(src_lbl, dst_lbl)
        else:
            print(f"⚠️ Warning: Label not found for image {fname}")

# Final summary print
print("\n✅ Dataset split summary:")
print(f"{'Split':<7} {'Class':<14} {'Orig':<5} {'Aug':<5} {'Total':<5}")
for cat, split, orig_c, aug_c, total_c in summary:
    print(f"{split:<7} {cat:<14} {orig_c:<5} {aug_c:<5} {total_c:<5}")


Processing images...

Crack: Original=1383, Augmented=0, Total=1383
Delamination: Original=725, Augmented=0, Total=725
Both: Original=2, Augmented=0, Total=2
NoDefect: Original=1362, Augmented=0, Total=1362

Splitting datasets and copying files...


✅ Dataset split summary:
Split   Class          Orig  Aug   Total
Train   Crack          1149  0     1149 
Val     Crack          165   0     165  
Test    Crack          69    0     69   
Train   Delamination   602   0     602  
Val     Delamination   87    0     87   
Test    Delamination   36    0     36   
Train   Both           2     0     2    
Val     Both           0     0     0    
Test    Both           0     0     0    
Train   NoDefect       1131  0     1131 
Val     NoDefect       163   0     163  
Test    NoDefect       68    0     68   
