In [2]:
# Importing necessary libraries
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2

# Define paths for images and labels
train_image_dir = "BoneFractureYolo8\train\images"  # Replace with the actual path to the train image folder
train_label_dir = "BoneFractureYolo8\train\labels"  # Replace with the actual path to the train label folder

# Check the number of images and labels
image_files = sorted([f for f in os.listdir(train_image_dir) if f.endswith('.jpg') or f.endswith('.png')])
label_files = sorted([f for f in os.listdir(train_label_dir) if f.endswith('.txt')])

print(f"Number of images: {len(image_files)}")
print(f"Number of labels: {len(label_files)}")

# Ensure that each image has a corresponding label file
assert len(image_files) == len(label_files), "Mismatch between number of images and labels!"
for img, lbl in zip(image_files, label_files):
    assert os.path.splitext(img)[0] == os.path.splitext(lbl)[0], f"Image {img} does not match label {lbl}!"

# Function to parse a single label file
def parse_label_file(label_path):
    """
    Parse a YOLO-style label file.
    Returns a list of dictionaries containing class and bounding box information.
    """
    with open(label_path, 'r') as file:
        lines = file.readlines()
        labels = []
        for line in lines:
            parts = line.strip().split()
            class_id = int(parts[0])
            bbox = list(map(float, parts[1:]))
            labels.append({'class': class_id, 'bbox': bbox})
        return labels

# Analyze class distribution in the training dataset
class_counts = {}
for label_file in label_files:
    label_path = os.path.join(train_label_dir, label_file)
    labels = parse_label_file(label_path)
    for label in labels:
        class_id = label['class']
        class_counts[class_id] = class_counts.get(class_id, 0) + 1


  train_image_dir = "BoneFractureYolo8\train\images"  # Replace with the actual path to the train image folder
  train_label_dir = "BoneFractureYolo8\train\labels"  # Replace with the actual path to the train label folder


OSError: [WinError 123] The filename, directory name, or volume label syntax is incorrect: 'BoneFractureYolo8\train\\images'

In [None]:

# Plot class distribution
plt.figure(figsize=(8, 6))
sns.barplot(x=list(class_counts.keys()), y=list(class_counts.values()), palette="viridis")
plt.title("Class Distribution in Training Dataset")
plt.xlabel("Class ID")
plt.ylabel("Count")
plt.show()

# Analyze image dimensions and aspect ratios
image_shapes = []
for img_file in image_files:
    img_path = os.path.join(train_image_dir, img_file)
    with Image.open(img_path) as img:
        image_shapes.append(img.size)  # (width, height)

# Convert image dimensions to DataFrame for analysis
image_data = pd.DataFrame(image_shapes, columns=['Width', 'Height'])
image_data['AspectRatio'] = image_data['Width'] / image_data['Height']

# Plot distribution of image dimensions
plt.figure(figsize=(10, 6))
sns.scatterplot(data=image_data, x='Width', y='Height', alpha=0.5)
plt.title("Image Dimensions Distribution")
plt.xlabel("Width")
plt.ylabel("Height")
plt.show()

# Plot aspect ratio distribution
plt.figure(figsize=(10, 6))
sns.histplot(image_data['AspectRatio'], bins=30, kde=True, color='blue')
plt.title("Image Aspect Ratio Distribution")
plt.xlabel("Aspect Ratio (Width/Height)")
plt.ylabel("Frequency")
plt.show()

# Visualize sample images with bounding boxes
def plot_image_with_bboxes(image_path, label_path):
    """
    Display an image with its bounding boxes.
    """
    # Load image
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Parse labels
    labels = parse_label_file(label_path)
    
    # Draw bounding boxes on the image
    h, w, _ = img.shape
    for label in labels:
        class_id = label['class']
        bbox = label['bbox']
        x_center, y_center, bbox_width, bbox_height = bbox
        
        # Convert YOLO format to pixel coordinates
        x_min = int((x_center - bbox_width / 2) * w)
        y_min = int((y_center - bbox_height / 2) * h)
        x_max = int((x_center + bbox_width / 2) * w)
        y_max = int((y_center + bbox_height / 2) * h)
        
        # Draw rectangle and add class id text
        cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
        cv2.putText(img, str(class_id), (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.5, color=(255, 0, 0), thickness=1)

    # Display the image
    plt.figure(figsize=(8, 8))
    plt.imshow(img)
    plt.axis('off')
    plt.show()

# Display a few sample images with bounding boxes
for i in range(3):  # Adjust number of samples as needed
    img_path = os.path.join(train_image_dir, image_files[i])
    lbl_path = os.path.join(train_label_dir, label_files[i])
    plot_image_with_bboxes(img_path, lbl_path)
