In [1]:

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

In [2]:
from ultralytics import YOLO

# Load the YOLOv8 model (use 'yolov8n', 'yolov8s', etc., for different sizes)

model = YOLO('yolov8n.yaml')  
model = YOLO('yolov8n.pt') # Load a pretrained YOLOv8 model

In [1]:
import os
import random
import shutil

# Define the root dataset directory
root_dir = r'C:\Users\HP\AR\Bone fracture.v11i.yolov8'

# Paths for train, valid, and test directories
train_dir = os.path.join(root_dir, 'train')
valid_dir = os.path.join(root_dir, 'valid')
test_dir = os.path.join(root_dir, 'test')

# Subdirectories for images and labels
subdirs = ['images', 'labels']

# Create directories for train, valid, and test
for dir_name in [train_dir, valid_dir, test_dir]:
    for subdir in subdirs:
        os.makedirs(os.path.join(dir_name, subdir), exist_ok=True)

# Paths to the original data
original_images_dir = os.path.join(train_dir, 'images')
original_labels_dir = os.path.join(train_dir, 'labels')

# Ensure the original directories exist
if not os.path.exists(original_images_dir) or not os.path.exists(original_labels_dir):
    raise FileNotFoundError("Original 'images' or 'labels' directory not found.")

# Get all image and label files
image_files = sorted([f for f in os.listdir(original_images_dir) if f.endswith(('.jpg', '.png', '.jpeg'))])
label_files = sorted([f for f in os.listdir(original_labels_dir) if f.endswith('.txt')])

# Ensure every image has a corresponding label
paired_files = [(img, lbl) for img, lbl in zip(image_files, label_files) if os.path.splitext(img)[0] == os.path.splitext(lbl)[0]]

# Shuffle the paired files for randomness
random.shuffle(paired_files)

# Calculate split sizes
total_files = len(paired_files)
train_split = int(total_files * 0.7)
valid_split = int(total_files * 0.2) + train_split

# Split the data
train_files = paired_files[:train_split]
valid_files = paired_files[train_split:valid_split]
test_files = paired_files[valid_split:]

# Function to move files to their respective folders
def move_files(file_list, dest_dir):
    for img_file, lbl_file in file_list:
        # Move images
        shutil.move(os.path.join(original_images_dir, img_file), os.path.join(dest_dir, 'images', img_file))
        # Move labels
        shutil.move(os.path.join(original_labels_dir, lbl_file), os.path.join(dest_dir, 'labels', lbl_file))

# Move files to train, valid, and test directories
move_files(train_files, train_dir)
move_files(valid_files, valid_dir)
move_files(test_files, test_dir)

# Print summary
print(f"Dataset split complete:")
print(f"Train: {len(train_files)} files")
print(f"Valid: {len(valid_files)} files")
print(f"Test: {len(test_files)} files")


Dataset split complete:
Train: 1945 files
Valid: 555 files
Test: 279 files


In [5]:
import os
import shutil

# Define dataset paths
dataset_dir = r'C:\Users\HP\AR\Bone fracture.v11i.yolov8'
output_dir = r'C:\Users\HP\AR\Bone fracture_filtered'

# Classes to extract and their new IDs
class_mapping = {
    1: 0,
    2: 1,
    4: 2,
    6: 3,
    10: 4
}

# Directories for train, valid, and test
subdirs = ['train', 'valid', 'test']

# Function to filter and rename classes in label files
def process_labels(input_label_dir, output_label_dir, class_map):
    os.makedirs(output_label_dir, exist_ok=True)
    for label_file in os.listdir(input_label_dir):
        if label_file.endswith('.txt'):
            input_path = os.path.join(input_label_dir, label_file)
            output_path = os.path.join(output_label_dir, label_file)

            with open(input_path, 'r') as f:
                lines = f.readlines()

            filtered_lines = []
            for line in lines:
                parts = line.split()
                if parts:
                    class_id = int(parts[0])
                    if class_id in class_map:
                        new_class_id = class_map[class_id]
                        parts[0] = str(new_class_id)
                        filtered_lines.append(" ".join(parts) + '\n')

            if filtered_lines:
                with open(output_path, 'w') as f:
                    f.writelines(filtered_lines)

# Function to copy corresponding images
def copy_images(input_image_dir, output_image_dir, label_dir):
    os.makedirs(output_image_dir, exist_ok=True)
    for label_file in os.listdir(label_dir):
        image_file = label_file.replace('.txt', '.jpg')
        input_image_path = os.path.join(input_image_dir, image_file)
        output_image_path = os.path.join(output_image_dir, image_file)
        if os.path.exists(input_image_path):
            shutil.copy(input_image_path, output_image_path)

# Process each subdirectory
for subdir in subdirs:
    input_label_dir = os.path.join(dataset_dir, subdir, 'labels')
    input_image_dir = os.path.join(dataset_dir, subdir, 'images')

    if not os.path.exists(input_label_dir) or not os.path.exists(input_image_dir):
        print(f"Skipping {subdir}, as the label or image directory is missing.")
        continue

    output_label_dir = os.path.join(output_dir, subdir, 'labels')
    output_image_dir = os.path.join(output_dir, subdir, 'images')

    process_labels(input_label_dir, output_label_dir, class_mapping)
    copy_images(input_image_dir, output_image_dir, output_label_dir)

# Create new data.yaml file
new_data_yaml = f"""
train: {os.path.join(output_dir, 'train', 'images')}
val: {os.path.join(output_dir, 'valid', 'images')}
test: {os.path.join(output_dir, 'test', 'images')}

nc: {len(class_mapping)}
names: {list(class_mapping.values())}
"""

with open(os.path.join(output_dir, 'data.yaml'), 'w') as yaml_file:
    yaml_file.write(new_data_yaml)

print(f"Filtered dataset created in: {output_dir}")


Filtered dataset created in: C:\Users\HP\AR\Bone fracture_filtered


In [13]:
results = model.train(data="data_5.yaml", epochs=2, imgsz=640)


New https://pypi.org/project/ultralytics/8.3.65 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.39  Python-3.12.4 torch-2.3.1+cpu CPU (12th Gen Intel Core(TM) i5-1235U)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=data_5.yaml, epochs=2, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train16, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_fr

[34m[1mtrain: [0mScanning C:\Users\HP\AR\Bone fracture_filtered\train\labels... 1143 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1143/1143 [00:04<00:00, 235.63it/s]


[34m[1mtrain: [0mNew cache created: C:\Users\HP\AR\Bone fracture_filtered\train\labels.cache


[34m[1mval: [0mScanning C:\Users\HP\AR\Bone fracture_filtered\valid\labels... 788 images, 0 backgrounds, 0 corrupt: 100%|██████████| 788/788 [00:02<00:00, 341.70it/s]


[34m[1mval: [0mNew cache created: C:\Users\HP\AR\Bone fracture_filtered\valid\labels.cache
Plotting labels to C:\Users\HP\runs\detect\train16\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001111, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mC:\Users\HP\runs\detect\train16[0m
Starting training for 2 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/2         0G      1.146      3.422      1.622         50        640:   7%|▋         | 5/72 [01:52<25:00, 22.40s/it]


KeyboardInterrupt: 

In [None]:
metrics = model.val(data="data.yaml")
print(metrics)


In [None]:
results = model.predict(source="path_to_image_or_folder", save=True, conf=0.25)


In [59]:
metrics = model.val(data="data_5.yaml")
print(metrics)  # Outputs mAP, precision, recall, etc.


Ultralytics 8.3.39  Python-3.12.4 torch-2.3.1+cpu CPU (12th Gen Intel Core(TM) i5-1235U)


[34m[1mval: [0mScanning C:\Users\HP\AR\Bone fracture_filtered\valid\labels.cache... 627 images, 0 backgrounds, 0 corrupt: 100%|██████████| 627/627 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 40/40 [01:08<00:00,  1.71s/it]


                   all        627        642    0.00276      0.791     0.0753      0.011
     avulsion-fracture         96         96    0.00248     0.0521    0.00135    0.00035
closed-simple-fracture        195        195     0.0038      0.954     0.0714     0.0123
compression-crush-fracture        131        133    0.00273      0.985     0.0573    0.00908
   greenstick-fracture        124        129    0.00267          1      0.208     0.0262
 longitudinal-fracture         81         89    0.00211      0.966     0.0383    0.00723
Speed: 1.7ms preprocess, 95.7ms inference, 0.0ms loss, 4.6ms postprocess per image
Results saved to [1mC:\Users\HP\runs\detect\val2[0m
ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3, 4])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x0000022FBD53C1D0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-C

In [None]:
# Evaluate the model
metrics = model.val(data="data_5.yaml")

# Extract metrics
precision = metrics['precision']  # Average precision across all classes
recall = metrics['recall']        # Average recall across all classes
map_50 = metrics['map50']         # mAP@0.5

# Calculate accuracy as a percentage
accuracy = map_50 * 100
print(f"Accuracy: {accuracy:.2f}%")


In [61]:
from ultralytics import YOLO

# Load the trained YOLO model
model = YOLO('C:/Users/HP/runs/detect/train1243/weights/best.pt')  # Replace 'best.pt' with the path to your trained model weights

# Define the path to the testing data in a new data_test.yaml file
# Example data_test.yaml:
# test: C:/Users/HP/AR/Bone fracture_filtered/test/images
# nc: 5
# names: ['avulsion-fracture', 'closed-simple-fracture', 'compression-crush-fracture', 'greenstick-fracture', 'longitudinal-fracture']

# Perform validation/testing on the test dataset
metrics = model.val(data="data_test.yaml", split="test")

# Display metrics
print(metrics)


Ultralytics 8.3.39  Python-3.12.4 torch-2.3.1+cpu CPU (12th Gen Intel Core(TM) i5-1235U)
YOLOv8n summary (fused): 168 layers, 3,006,623 parameters, 0 gradients, 8.1 GFLOPs


SyntaxError: data_test.yaml 'train:' key missing .
'train' and 'val' are required in all data YAMLs. (<string>)

In [14]:
!pip install albumentations opencv-python


Collecting albumentations
  Downloading albumentations-2.0.0-py3-none-any.whl.metadata (38 kB)
Collecting pydantic>=2.9.2 (from albumentations)
  Downloading pydantic-2.10.5-py3-none-any.whl.metadata (30 kB)
Collecting albucore==0.0.23 (from albumentations)
  Downloading albucore-0.0.23-py3-none-any.whl.metadata (5.3 kB)
Collecting stringzilla>=3.10.4 (from albucore==0.0.23->albumentations)
  Downloading stringzilla-3.11.3-cp312-cp312-win_amd64.whl.metadata (81 kB)
     ---------------------------------------- 0.0/81.7 kB ? eta -:--:--
     ---------------------------------------- 81.7/81.7 kB 2.3 MB/s eta 0:00:00
Collecting simsimd>=5.9.2 (from albucore==0.0.23->albumentations)
  Downloading simsimd-6.2.1-cp312-cp312-win_amd64.whl.metadata (67 kB)
     ---------------------------------------- 0.0/67.4 kB ? eta -:--:--
     ---------------------------------------- 67.4/67.4 kB 3.8 MB/s eta 0:00:00
Collecting pydantic-core==2.27.2 (from pydantic>=2.9.2->albumentations)
  Downloading pyd

  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
fastapi 0.78.0 requires pydantic!=1.7,!=1.7.1,!=1.7.2,!=1.7.3,!=1.8,!=1.8.1,<2.0.0,>=1.6.2, but you have pydantic 2.10.5 which is incompatible.


In [6]:
pip install --upgrade albumentations


Note: you may need to restart the kernel to use updated packages.


In [None]:
!pip install albumentations


In [4]:
pip install albumentations opencv-python


Note: you may need to restart the kernel to use updated packages.


In [8]:
import os
import cv2
import json
from albumentations import (
    HorizontalFlip, RandomBrightnessContrast, Rotate, Compose
)
from albumentations.pytorch import ToTensorV2

In [9]:
augmentations = Compose(
    [
        HorizontalFlip(p=0.5),
        RandomBrightnessContrast(p=0.3),
        Rotate(limit=15, p=0.5)
    ],
    bbox_params={
        'format': 'pascal_voc',  # 'pascal_voc' format: [x_min, y_min, x_max, y_max]
        'min_area': 500,         # Minimum area of bounding box
        'min_visibility': 0.1,   # Minimum visibility of bounding box after augmentation
        'label_fields': ['category_ids']  # Specify that bounding boxes are linked with category IDs
    }
)

In [30]:
import cv2
import os
from collections import Counter
import random
from albumentations import Compose, RandomCrop, HorizontalFlip, ShiftScaleRotate, BboxParams, Resize

# Define augmentation transformations
def augmentations(image, bboxes, category_ids):
    transform = Compose([
        HorizontalFlip(p=0.5),
        ShiftScaleRotate(p=0.5),
        Resize(width=500, height=500),  # Resize image if smaller than crop size
        RandomCrop(width=300, height=300, p=1)
    ], bbox_params=BboxParams(format='yolo', label_fields=['category_ids']))  # Define the format of the bounding boxes

    augmented = transform(image=image, bboxes=bboxes, category_ids=category_ids)
    return augmented

# Augment images and their annotations (updated for .txt)
def augment_detection_data(image_path, annotation_path, output_img_dir, output_ann_dir, num_augments=5):
    # Read image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error reading {image_path}")
        return

    # Read annotations from .txt file (YOLO format)
    bboxes = []
    category_ids = []
    with open(annotation_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            category_ids.append(int(parts[0]))  # class label
            bboxes.append([float(parts[1]), float(parts[2]), float(parts[3]), float(parts[4])])  # [x_min, y_min, x_max, y_max]

    augmented_images = []
    augmented_bboxes = []
    augmented_category_ids = []

    for i in range(num_augments):
        augmented = augmentations(image=image, bboxes=bboxes, category_ids=category_ids)
        augmented_images.append(augmented['image'])
        augmented_bboxes.append(augmented['bboxes'])
        augmented_category_ids.append(augmented['category_ids'])

    return augmented_images, augmented_bboxes, augmented_category_ids

# Process the dataset (updated for .txt annotations and class balancing)
def process_dataset(image_folder, annotation_folder, output_image_folder, output_annotation_folder, num_augments=5):
    if not os.path.exists(output_image_folder):
        os.makedirs(output_image_folder)
    if not os.path.exists(output_annotation_folder):
        os.makedirs(output_annotation_folder)

    # Count the number of samples per class
    class_counts = Counter()
    image_paths = []
    annotation_paths = []

    # First pass: Count samples per class
    for img_file in os.listdir(image_folder):
        if not img_file.endswith(('.jpg', '.png')):
            continue

        img_path = os.path.join(image_folder, img_file)
        ann_path = os.path.join(annotation_folder, os.path.splitext(img_file)[0] + '.txt')  # Ensure matching .txt annotation

        if not os.path.exists(ann_path):
            print(f"Annotation for {img_file} not found, skipping.")
            continue

        # Count class labels
        with open(ann_path, 'r') as f:
            for line in f:
                category_id = int(line.strip().split()[0])  # Get class label
                class_counts[category_id] += 1

        # Store paths for later use
        image_paths.append(img_path)
        annotation_paths.append(ann_path)

    # Find the class with the maximum number of samples
    max_class_count = max(class_counts.values())

    # Second pass: Augment the imbalanced classes
    for img_path, ann_path in zip(image_paths, annotation_paths):
        # Read annotations from .txt file (YOLO format)
        bboxes = []
        category_ids = []
        with open(ann_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                category_ids.append(int(parts[0]))  # class label
                bboxes.append([float(parts[1]), float(parts[2]), float(parts[3]), float(parts[4])])  # [x_min, y_min, x_max, y_max]

        # Count the class occurrences in this image
        img_class_counts = Counter(category_ids)

        # Check if any class in this image is imbalanced (i.e., less than the max class count)
        for class_id, count in img_class_counts.items():
            if count < max_class_count:
                # Calculate how many augmentations are needed for this class
                num_required = max_class_count - count
                augmented_images, augmented_bboxes, augmented_category_ids = augment_detection_data(
                    img_path, ann_path, output_image_folder, output_annotation_folder, num_augments=num_required
                )

                # Save augmented images and annotations
                for i, aug_image in enumerate(augmented_images):
                    augmented_img_name = f"{os.path.splitext(os.path.basename(img_path))[0]}_aug_{i}.jpg"
                    augmented_img_path = os.path.join(output_image_folder, augmented_img_name)
                    cv2.imwrite(augmented_img_path, aug_image)

                    # Save corresponding annotations
                    augmented_ann_name = f"{os.path.splitext(os.path.basename(ann_path))[0]}_aug_{i}.txt"
                    augmented_ann_path = os.path.join(output_annotation_folder, augmented_ann_name)
                    with open(augmented_ann_path, 'w') as f:
                        for bbox, category_id in zip(augmented_bboxes[i], augmented_category_ids[i]):
                            # Write bounding boxes in YOLO format: class_id x_center y_center width height
                            x_min, y_min, x_max, y_max = bbox
                            width = x_max - x_min
                            height = y_max - y_min
                            x_center = (x_min + x_max) / 2
                            y_center = (y_min + y_max) / 2
                            f.write(f"{category_id} {x_center} {y_center} {width} {height}\n")

# Paths
train_images = "C:/Users/HP/AR/Bone fracture_filtered/train/images"  # Path to images
train_labels = "C:/Users/HP/AR/Bone fracture_filtered/train/labels"  # Path to annotations
augmented_images = "C:/Users/HP/AR/Bone fracture_filtered/train_aug_images"  # Output path for augmented images
augmented_annotations = "C:/Users/HP/AR/Bone fracture_filtered/train_aug_labels"  # Output path for augmented annotations

# Perform augmentation
process_dataset(train_images, train_labels, augmented_images, augmented_annotations, num_augments=5)


KeyboardInterrupt: 

In [22]:
# Check if annotation files are present in the directory
annotation_files = [f for f in os.listdir("C:/Users/HP/AR/Bone fracture_filtered/train/labels") if f.endswith(('.json', '.xml','.txt'))]
print(f"Found {len(annotation_files)} annotation files")


Found 1143 annotation files


In [None]:
import pandas as pd

# Load annotations
annotations = pd.read_csv("path/to/annotations.csv")

# Check for anomalies
small_boxes = annotations[(annotations['width'] < 10) & (annotations['height'] < 10)]
print("Small bounding boxes:", small_boxes)

# Filter out anomalies
filtered_annotations = annotations[(annotations['width'] >= 10) & (annotations['height'] >= 10)]
filtered_annotations.to_csv("path/to/filtered_annotations.csv", index=False)


In [19]:
import os

def count_files_per_class(folder):
    counts = {}
    for class_folder in os.listdir(folder):
        class_path = os.path.join(folder, class_folder)
        # Check if the entry is a directory
        if os.path.isdir(class_path):
            counts[class_folder] = len([
                f for f in os.listdir(class_path) 
                if os.path.isfile(os.path.join(class_path, f))
            ])
    return counts

# Example
folder = "C:/Users/HP/AR/Bone fracture_filtered/train"
print(count_files_per_class(folder))
folder = "C:/Users/HP/AR/Bone fracture_filtered/valid"
print(count_files_per_class(folder))

folder = "C:/Users/HP/AR/Bone fracture_filtered/test"
print(count_files_per_class(folder))



{'images': 1143, 'labels': 1143}
{'images': 788, 'labels': 788}
{'images': 448, 'labels': 448}


In [None]:
Faster R-CNN: Good for high accuracy but slower inference.
EfficientDet: Balances speed and accuracy.
SSD (Single Shot Detector): Faster but slightly less accurate.
RetinaNet: Good for datasets with class imbalance due to its Focal Loss.

In [17]:
from ultralytics import YOLO

# Load the model
model = YOLO("C:/Users/HP/runs/detect/train1243/weights/best.pt")

# Validate the model
metrics = model.val(data="data_test.yaml", imgsz=640, iou=0.5, task="test")
print(metrics)


Ultralytics 8.3.39  Python-3.12.4 torch-2.3.1+cpu CPU (12th Gen Intel Core(TM) i5-1235U)
YOLOv8n summary (fused): 168 layers, 3,006,623 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning C:\Users\HP\AR\Bone fracture_filtered\valid\labels.cache... 788 images, 0 backgrounds, 0 corrupt: 100%|██████████| 788/788 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/50 [00:02<?, ?it/s]

KeyboardInterrupt



In [1]:
import os
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
from collections import Counter

# Define augmentations
def augmentations(image, bboxes, category_ids):
    transform = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        A.Rotate(limit=15, p=0.5),
        A.GaussNoise(p=0.2),
        A.Blur(p=0.2)
    ], bbox_params=A.BboxParams(format='yolo', label_fields=['category_ids']))

    return transform(image=image, bboxes=bboxes, category_ids=category_ids)

# Augment detection data
def augment_detection_data(image_path, annotation_path, num_augments=5):
    print(f"🔄 Augmenting: {image_path}")

    # Read image
    image = cv2.imread(image_path)
    if image is None:
        print(f"⚠ Error: Unable to read image {image_path}")
        return [], [], []

    # Read annotations
    bboxes = []
    category_ids = []
    try:
        with open(annotation_path, 'r') as f:
            lines = f.readlines()
            if not lines:
                print(f"⚠ Warning: Empty annotation file {annotation_path}, skipping.")
                return [], [], []
            for line in lines:
                parts = line.strip().split()
                category_ids.append(int(parts[0]))  # class label
                bboxes.append([float(parts[1]), float(parts[2]), float(parts[3]), float(parts[4])])
    except Exception as e:
        print(f"⚠ Error reading annotation file {annotation_path}: {e}")
        return [], [], []

    augmented_images = []
    augmented_bboxes = []
    augmented_category_ids = []

    for i in range(num_augments):
        augmented = augmentations(image=image, bboxes=bboxes, category_ids=category_ids)
        augmented_images.append(augmented['image'])
        augmented_bboxes.append(augmented['bboxes'])
        augmented_category_ids.append(augmented['category_ids'])

    return augmented_images, augmented_bboxes, augmented_category_ids

# Process the dataset
def process_dataset(image_folder, annotation_folder, output_image_folder, output_annotation_folder):
    print(f"📂 Checking folders...")
    print(f"✅ Image folder exists: {os.path.exists(image_folder)}")
    print(f"✅ Annotation folder exists: {os.path.exists(annotation_folder)}")

    if not os.path.exists(output_image_folder):
        os.makedirs(output_image_folder)
    if not os.path.exists(output_annotation_folder):
        os.makedirs(output_annotation_folder)

    # Count the number of samples per class
    class_counts = Counter()
    image_annotations = []

    print(f"🔍 Scanning dataset...")

    # First pass: Count samples per class
    for img_file in os.listdir(image_folder):
        if not img_file.endswith(('.jpg', '.png')):
            continue

        img_path = os.path.join(image_folder, img_file)
        ann_path = os.path.join(annotation_folder, os.path.splitext(img_file)[0] + '.txt')

        if not os.path.exists(ann_path):
            print(f"⚠ Warning: Annotation for {img_file} not found, skipping.")
            continue

        with open(ann_path, 'r') as f:
            lines = f.readlines()
            if not lines:
                print(f"⚠ Warning: Empty annotation file {ann_path}, skipping.")
                continue
            
            categories_in_image = []
            for line in lines:
                category_id = int(line.strip().split()[0])
                categories_in_image.append(category_id)
                class_counts[category_id] += 1
            
            # Store for later augmentation
            image_annotations.append((img_path, ann_path, categories_in_image))

    if not class_counts:
        print("⚠ Error: No valid class labels found. Check annotation files.")
        return

    max_class_count = max(class_counts.values())

    # Determine augmentation needed
    augmentation_needed = {class_id: max_class_count - count for class_id, count in class_counts.items()}
    print(f"📊 Class distribution before augmentation: {class_counts}")
    print(f"📌 Augmentation required per class: {augmentation_needed}")

    # Second pass: Augment underrepresented classes
    for img_path, ann_path, categories_in_image in image_annotations:
        augment_needed_for_this_image = any(augmentation_needed[class_id] > 0 for class_id in categories_in_image)

        if augment_needed_for_this_image:
            min_class_id = min(categories_in_image, key=lambda c: augmentation_needed[c])
            num_required = min(augmentation_needed[min_class_id], 5)

            augmented_images, augmented_bboxes, augmented_category_ids = augment_detection_data(
                img_path, ann_path, num_augments=num_required
            )

            for i, aug_image in enumerate(augmented_images):
                if augmentation_needed[min_class_id] <= 0:
                    break  # Stop augmentation when balanced

                augmented_img_name = f"{os.path.splitext(os.path.basename(img_path))[0]}_aug{i}.jpg"
                augmented_img_path = os.path.join(output_image_folder, augmented_img_name)
                cv2.imwrite(augmented_img_path, aug_image)

                augmented_ann_name = f"{os.path.splitext(os.path.basename(ann_path))[0]}_aug{i}.txt"
                augmented_ann_path = os.path.join(output_annotation_folder, augmented_ann_name)
                with open(augmented_ann_path, 'w') as f:
                    for bbox, category_id in zip(augmented_bboxes[i], augmented_category_ids[i]):
                        f.write(f"{category_id} {bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]}\n")

                augmentation_needed[min_class_id] -= 1

    print("✅ Augmentation completed successfully!")

# Paths
train_images = "C:/Users/HP/AR/Bone fracture_filtered/valid/images"
train_labels = "C:/Users/HP/AR/Bone fracture_filtered/valid/labels"
augmented_images = "C:/Users/HP/AR/Bone fracture_filtered/aug_valid/valid_aug_images"
augmented_annotations = "C:/Users/HP/AR/Bone fracture_filtered/aug_valid/valid_aug_labels"

# Perform augmentation
process_dataset(train_images, train_labels, augmented_images, augmented_annotations)

  check_for_updates()


📂 Checking folders...
✅ Image folder exists: True
✅ Annotation folder exists: True
🔍 Scanning dataset...
📊 Class distribution before augmentation: Counter({1: 278, 2: 193, 3: 177, 4: 140, 0: 137})
📌 Augmentation required per class: {0: 141, 3: 101, 2: 85, 4: 138, 1: 0}
🔄 Augmenting: C:/Users/HP/AR/Bone fracture_filtered/valid/images\1-1-_jpeg.rf.6748a0f2734782dd137090894073caf0.jpg
🔄 Augmenting: C:/Users/HP/AR/Bone fracture_filtered/valid/images\1-1-_jpeg.rf.67e84a05727eb6f5fd881bffe1bbfcaa.jpg
🔄 Augmenting: C:/Users/HP/AR/Bone fracture_filtered/valid/images\1-1-_jpeg.rf.e44f895f23706f99d1a309697402d468.jpg
🔄 Augmenting: C:/Users/HP/AR/Bone fracture_filtered/valid/images\1-1-_jpeg.rf.f7a5134c2a50bf536d984ffd7f80460c.jpg
🔄 Augmenting: C:/Users/HP/AR/Bone fracture_filtered/valid/images\1-1-_jpeg.rf.ffe89b5faae37cc71f97d948f62cf2db.jpg
🔄 Augmenting: C:/Users/HP/AR/Bone fracture_filtered/valid/images\1-1-_jpg.rf.19e35980c31a08b4ae54f10f4bd894e9.jpg
🔄 Augmenting: C:/Users/HP/AR/Bone fractur

In [7]:
import os
import cv2
import shutil
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
from collections import Counter

def augmentations(image, bboxes, category_ids):
    transform = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        A.Rotate(limit=15, p=0.5),
        A.GaussNoise(p=0.2),
        A.Blur(p=0.2)
    ], bbox_params=A.BboxParams(format='yolo', label_fields=['category_ids']))
    return transform(image=image, bboxes=bboxes, category_ids=category_ids)

def augment_detection_data(image_path, annotation_path, num_augments=5):
    image = cv2.imread(image_path)
    if image is None:
        print(f"⚠ Error: Unable to read image {image_path}")
        return [], [], []
    
    bboxes, category_ids = [], []
    try:
        with open(annotation_path, 'r') as f:
            lines = f.readlines()
            if not lines:
                return [], [], []
            for line in lines:
                parts = line.strip().split()
                category_ids.append(int(parts[0]))
                bboxes.append([float(parts[1]), float(parts[2]), float(parts[3]), float(parts[4])])
    except Exception as e:
        print(f"⚠ Error reading annotation file {annotation_path}: {e}")
        return [], [], []

    augmented_images, augmented_bboxes, augmented_category_ids = [], [], []
    for i in range(num_augments):
        augmented = augmentations(image=image, bboxes=bboxes, category_ids=category_ids)
        augmented_images.append(augmented['image'])
        augmented_bboxes.append(augmented['bboxes'])
        augmented_category_ids.append(augmented['category_ids'])
    
    return augmented_images, augmented_bboxes, augmented_category_ids

def process_dataset(image_folder, annotation_folder, output_image_folder, output_annotation_folder):
    if not os.path.exists(output_image_folder):
        os.makedirs(output_image_folder)
    if not os.path.exists(output_annotation_folder):
        os.makedirs(output_annotation_folder)
    
    class_counts = Counter()
    image_annotations = []
    
    for img_file in os.listdir(image_folder):
        if not img_file.endswith(('.jpg', '.png')):
            continue
        
        img_path = os.path.join(image_folder, img_file)
        ann_path = os.path.join(annotation_folder, os.path.splitext(img_file)[0] + '.txt')
        
        if not os.path.exists(ann_path):
            continue
        
        with open(ann_path, 'r') as f:
            lines = f.readlines()
            if not lines:
                continue
            categories_in_image = [int(line.strip().split()[0]) for line in lines]
            for cat in categories_in_image:
                class_counts[cat] += 1
            image_annotations.append((img_path, ann_path, categories_in_image))
    
    if not class_counts:
        print("⚠ Error: No valid class labels found.")
        return

    max_class_count = max(class_counts.values())
    augmentation_needed = {class_id: max_class_count - count for class_id, count in class_counts.items()}
    
    for img_path, ann_path, categories_in_image in image_annotations:
        if any(augmentation_needed[class_id] > 0 for class_id in categories_in_image):
            min_class_id = min(categories_in_image, key=lambda c: augmentation_needed[c])
            num_required = min(augmentation_needed[min_class_id], 5)
            augmented_images, augmented_bboxes, augmented_category_ids = augment_detection_data(img_path, ann_path, num_augments=num_required)
            
            for i, aug_image in enumerate(augmented_images):
                if augmentation_needed[min_class_id] <= 0:
                    break
                augmented_img_name = f"{os.path.splitext(os.path.basename(img_path))[0]}_aug{i}.jpg"
                cv2.imwrite(os.path.join(output_image_folder, augmented_img_name), aug_image)
                augmented_ann_name = f"{os.path.splitext(os.path.basename(ann_path))[0]}_aug{i}.txt"
                with open(os.path.join(output_annotation_folder, augmented_ann_name), 'w') as f:
                    for bbox, category_id in zip(augmented_bboxes[i], augmented_category_ids[i]):
                        f.write(f"{category_id} {bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]}\n")
                augmentation_needed[min_class_id] -= 1
    
    print("✅ Augmentation completed! Appending original images & annotations...")
    for img_file in os.listdir(image_folder):
        if img_file.endswith(('.jpg', '.png')):
            shutil.copy(os.path.join(image_folder, img_file), output_image_folder)
    for ann_file in os.listdir(annotation_folder):
        if ann_file.endswith('.txt'):
            shutil.copy(os.path.join(annotation_folder, ann_file), output_annotation_folder)
    print("✅ Original files appended successfully!")

# Paths
train_images = "C:/Users/HP/AR/Bone fracture_filtered/train/images"
train_labels = "C:/Users/HP/AR/Bone fracture_filtered/train/labels"
augmented_images = "C:/Users/HP/AR/Bone fracture_filtered/aug_train/train_aug_images"
augmented_annotations = "C:/Users/HP/AR/Bone fracture_filtered/aug_train/train_aug_labels"

# Perform augmentation and append original data
process_dataset(train_images, train_labels, augmented_images, augmented_annotations)


✅ Augmentation completed! Appending original images & annotations...
✅ Original files appended successfully!


In [None]:
import os
import cv2
import shutil
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
from collections import Counter

def augmentations(image, bboxes, category_ids):
    transform = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        A.Rotate(limit=15, p=0.5),
        A.GaussNoise(p=0.2),
        A.Blur(p=0.2)
    ], bbox_params=A.BboxParams(format='yolo', label_fields=['category_ids']))
    return transform(image=image, bboxes=bboxes, category_ids=category_ids)

def augment_detection_data(image_path, annotation_path, num_augments=5):
    image = cv2.imread(image_path)
    if image is None:
        print(f"⚠ Error: Unable to read image {image_path}")
        return [], [], []
    
    bboxes, category_ids = [], []
    try:
        with open(annotation_path, 'r') as f:
            lines = f.readlines()
            if not lines:
                return [], [], []
            for line in lines:
                parts = line.strip().split()
                category_ids.append(int(parts[0]))
                bboxes.append([float(parts[1]), float(parts[2]), float(parts[3]), float(parts[4])])
    except Exception as e:
        print(f"⚠ Error reading annotation file {annotation_path}: {e}")
        return [], [], []

    augmented_images, augmented_bboxes, augmented_category_ids = [], [], []
    for i in range(num_augments):
        augmented = augmentations(image=image, bboxes=bboxes, category_ids=category_ids)
        augmented_images.append(augmented['image'])
        augmented_bboxes.append(augmented['bboxes'])
        augmented_category_ids.append(augmented['category_ids'])
    
    return augmented_images, augmented_bboxes, augmented_category_ids

def process_dataset(image_folder, annotation_folder, output_image_folder, output_annotation_folder):
    if not os.path.exists(output_image_folder):
        os.makedirs(output_image_folder)
    if not os.path.exists(output_annotation_folder):
        os.makedirs(output_annotation_folder)
    
    class_counts = Counter()
    image_annotations = []
    
    for img_file in os.listdir(image_folder):
        if not img_file.endswith(('.jpg', '.png')):
            continue
        
        img_path = os.path.join(image_folder, img_file)
        ann_path = os.path.join(annotation_folder, os.path.splitext(img_file)[0] + '.txt')
        
        if not os.path.exists(ann_path):
            continue
        
        with open(ann_path, 'r') as f:
            lines = f.readlines()
            if not lines:
                continue
            categories_in_image = [int(line.strip().split()[0]) for line in lines]
            for cat in categories_in_image:
                class_counts[cat] += 1
            image_annotations.append((img_path, ann_path, categories_in_image))
    
    if not class_counts:
        print("⚠ Error: No valid class labels found.")
        return

    max_class_count = max(class_counts.values())
    augmentation_needed = {class_id: max_class_count - count for class_id, count in class_counts.items()}
    
    for img_path, ann_path, categories_in_image in image_annotations:
        if any(augmentation_needed[class_id] > 0 for class_id in categories_in_image):
            min_class_id = min(categories_in_image, key=lambda c: augmentation_needed[c])
            num_required = min(augmentation_needed[min_class_id], 5)
            augmented_images, augmented_bboxes, augmented_category_ids = augment_detection_data(img_path, ann_path, num_augments=num_required)
            
            for i, aug_image in enumerate(augmented_images):
                if augmentation_needed[min_class_id] <= 0:
                    break
                augmented_img_name = f"{os.path.splitext(os.path.basename(img_path))[0]}_aug{i}.jpg"
                cv2.imwrite(os.path.join(output_image_folder, augmented_img_name), aug_image)
                augmented_ann_name = f"{os.path.splitext(os.path.basename(ann_path))[0]}_aug{i}.txt"
                with open(os.path.join(output_annotation_folder, augmented_ann_name), 'w') as f:
                    for bbox, category_id in zip(augmented_bboxes[i], augmented_category_ids[i]):
                        f.write(f"{category_id} {bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]}\n")
                augmentation_needed[min_class_id] -= 1
    
    print("✅ Augmentation completed! Appending original images & annotations...")
    for img_file in os.listdir(image_folder):
        if img_file.endswith(('.jpg', '.png')):
            shutil.copy(os.path.join(image_folder, img_file), output_image_folder)
    for ann_file in os.listdir(annotation_folder):
        if ann_file.endswith('.txt'):
            shutil.copy(os.path.join(annotation_folder, ann_file), output_annotation_folder)
    print("✅ Original files appended successfully!")

# Paths
train_images = "C:/Users/HP/AR/Bone fracture_filtered/train/images"
train_labels = "C:/Users/HP/AR/Bone fracture_filtered/train/labels"
augmented_images = "C:/Users/HP/AR/Bone fracture_filtered/aug_train/train_aug_images"
augmented_annotations = "C:/Users/HP/AR/Bone fracture_filtered/aug_train/train_aug_labels"

# Perform augmentation and append original data
process_dataset(train_images, train_labels, augmented_images, augmented_annotations)


In [3]:
results = model.train(data="data_5.yaml", epochs=10, imgsz=640)

New https://pypi.org/project/ultralytics/8.3.78 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.39  Python-3.12.4 torch-2.3.1+cpu CPU (12th Gen Intel Core(TM) i5-1235U)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=data_5.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train18, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_f

[34m[1mtrain: [0mScanning C:\Users\HP\AR\Bone fracture_filtered\aug_train\train_aug_images... 0 images, 1643 backgrounds, 0 corrupt: 100%|██████████| 1643/1643 [00:05<00:00, 294.80it/s]






[34m[1mtrain: [0mNew cache created: C:\Users\HP\AR\Bone fracture_filtered\aug_train\train_aug_images.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


  check_for_updates()
  A.ImageCompression(quality_lower=75, p=0.0),
[34m[1mval: [0mScanning C:\Users\HP\AR\Bone fracture_filtered\aug_valid\valid_aug_images... 0 images, 1356 backgrounds, 0 corrupt: 100%|██████████| 1356/1356 [00:03<00:00, 400.21it/s]






[34m[1mval: [0mNew cache created: C:\Users\HP\AR\Bone fracture_filtered\aug_valid\valid_aug_images.cache
Plotting labels to C:\Users\HP\runs\detect\train18\labels.jpg... 
zero-size array to reduction operation maximum which has no identity
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001111, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mC:\Users\HP\runs\detect\train18[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid

  A.ImageCompression(quality_lower=75, p=0.0),
       1/10         0G          0        107          0          0        640: 100%|██████████| 103/103 [33:23<00:00, 19.45s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 43/43 [05:06<00:00,  7.14s/it]

                   all       1356          0          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G          0      79.32          0          0        640: 100%|██████████| 103/103 [38:35<00:00, 22.48s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 43/43 [07:03<00:00,  9.85s/it]

                   all       1356          0          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10         0G          0      57.73          0          0        640: 100%|██████████| 103/103 [16:33<00:00,  9.65s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 43/43 [03:44<00:00,  5.22s/it]

                   all       1356          0          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10         0G          0       47.2          0          0        640:   1%|          | 1/103 [00:33<57:37, 33.90s/it]


KeyboardInterrupt: 

In [1]:
from ultralytics import YOLO


In [2]:
# Load a pre-trained YOLOv8 model (change 'yolov8n' to your model)
model = YOLO("yolov8n.pt")  # 'n' stands for nano, can be 's', 'm', 'l', 'x' based on size


In [15]:
import os
import cv2
import shutil
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
from collections import Counter

def augmentations(image, bboxes, category_ids):
    transform = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        A.Rotate(limit=15, p=0.5),
        A.GaussNoise(p=0.2),
        A.Blur(p=0.2)
    ], bbox_params=A.BboxParams(format='yolo', label_fields=['category_ids']))
    return transform(image=image, bboxes=bboxes, category_ids=category_ids)

def augment_detection_data(image_path, annotation_path, num_augments=5):
    image = cv2.imread(image_path)
    if image is None:
        print(f"⚠ Error: Unable to read image {image_path}")
        return [], [], []
    
    bboxes, category_ids = [], []
    try:
        with open(annotation_path, 'r') as f:
            lines = f.readlines()
            if not lines:
                return [], [], []
            for line in lines:
                parts = line.strip().split()
                category_ids.append(int(parts[0]))
                bboxes.append([float(parts[1]), float(parts[2]), float(parts[3]), float(parts[4])])
    except Exception as e:
        print(f"⚠ Error reading annotation file {annotation_path}: {e}")
        return [], [], []

    augmented_images, augmented_bboxes, augmented_category_ids = [], [], []
    for i in range(num_augments):
        augmented = augmentations(image=image, bboxes=bboxes, category_ids=category_ids)
        augmented_images.append(augmented['image'])
        augmented_bboxes.append(augmented['bboxes'])
        augmented_category_ids.append(augmented['category_ids'])
    
    return augmented_images, augmented_bboxes, augmented_category_ids

def process_dataset(image_folder, annotation_folder, output_image_folder, output_annotation_folder):
    if not os.path.exists(output_image_folder):
        os.makedirs(output_image_folder)
    if not os.path.exists(output_annotation_folder):
        os.makedirs(output_annotation_folder)
    
    class_counts = Counter()
    image_annotations = []
    
    for img_file in os.listdir(image_folder):
        if not img_file.endswith(('.jpg', '.png')):
            continue
        
        img_path = os.path.join(image_folder, img_file)
        ann_path = os.path.join(annotation_folder, os.path.splitext(img_file)[0] + '.txt')
        
        if not os.path.exists(ann_path):
            continue
        
        with open(ann_path, 'r') as f:
            lines = f.readlines()
            if not lines:
                continue
            categories_in_image = [int(line.strip().split()[0]) for line in lines]
            for cat in categories_in_image:
                class_counts[cat] += 1
            image_annotations.append((img_path, ann_path, categories_in_image))
    
    if not class_counts:
        print("⚠ Error: No valid class labels found.")
        return

    max_class_count = max(class_counts.values())
    augmentation_needed = {class_id: max_class_count - count for class_id, count in class_counts.items()}
    
    for img_path, ann_path, categories_in_image in image_annotations:
        if any(augmentation_needed[class_id] > 0 for class_id in categories_in_image):
            min_class_id = min(categories_in_image, key=lambda c: augmentation_needed[c])
            num_required = min(augmentation_needed[min_class_id], 5)
            augmented_images, augmented_bboxes, augmented_category_ids = augment_detection_data(img_path, ann_path, num_augments=num_required)
            
            for i, aug_image in enumerate(augmented_images):
                if augmentation_needed[min_class_id] <= 0:
                    break
                augmented_img_name = f"{os.path.splitext(os.path.basename(img_path))[0]}_aug{i}.jpg"
                cv2.imwrite(os.path.join(output_image_folder, augmented_img_name), aug_image)
                augmented_ann_name = f"{os.path.splitext(os.path.basename(ann_path))[0]}_aug{i}.txt"
                with open(os.path.join(output_annotation_folder, augmented_ann_name), 'w') as f:
                    for bbox, category_id in zip(augmented_bboxes[i], augmented_category_ids[i]):
                        f.write(f"{category_id} {bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]}\n")
                augmentation_needed[min_class_id] -= 1
    
    print("✅ Augmentation completed! Appending original images & annotations...")
    for img_file in os.listdir(image_folder):
        if img_file.endswith(('.jpg', '.png')):
            shutil.copy(os.path.join(image_folder, img_file), output_image_folder)
    for ann_file in os.listdir(annotation_folder):
        if ann_file.endswith('.txt'):
            shutil.copy(os.path.join(annotation_folder, ann_file), output_annotation_folder)
    print("✅ Original files appended successfully!")

# Paths for Validation Set
val_images = "C:/Users/HP/AR/Bone fracture_filtered/valid/images"
val_labels = "C:/Users/HP/AR/Bone fracture_filtered/valid/labels"
augmented_val_images = "C:/Users/HP/AR/Bone fracture_filtered/aug_valid/valid_aug_images"
augmented_val_labels = "C:/Users/HP/AR/Bone fracture_filtered/aug_valid/valid_aug_labels"

# Perform augmentation and append original validation data
process_dataset(val_images, val_labels, augmented_val_images, augmented_val_labels)


✅ Augmentation completed! Appending original images & annotations...
✅ Original files appended successfully!
