In [1]:
import os
import shutil
import pandas as pd
import cv2

# === USER INPUT ===
csv_path = r"C:\Users\Devansh\Desktop\ProjectYolo\Datasets\TBx11-New\new_data.csv"         
image_dir = r"C:\Users\Devansh\Desktop\ProjectYolo\Datasets\TBx11-New\New-Images"              
output_dir = r"C:\Users\Devansh\Desktop\ProjectYolo\yolo-Datasets\new-yolo-tbx11k"        

# === YOLOv8 STRUCTURE ===
splits = ['train', 'val']
for split in splits:
    os.makedirs(os.path.join(output_dir, 'images', split), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labels', split), exist_ok=True)

# === LOAD CSV ===
df = pd.read_csv(csv_path)

# === PROCESS ONLY TB IMAGES, with TRAIN and VAL SPLIT ===
tb_df = df[(df['target'] == 'tb') & (df['source'].isin(['train', 'val']))]

# === UTILITY: Normalize bbox for YOLO format ===
def convert_bbox_yolo(xmin, ymin, width, height, img_w, img_h):
    x_center = (xmin + width / 2) / img_w
    y_center = (ymin + height / 2) / img_h
    w = width / img_w
    h = height / img_h
    return x_center, y_center, w, h

# === PROCESS EACH IMAGE ===
for i, row in tb_df.iterrows():
    fname = row['fname']
    split = row['source']  # train or val
    bbox = eval(row['bbox']) if row['bbox'] != 'none' else None

    src_img_path = os.path.join(image_dir, fname)
    dst_img_path = os.path.join(output_dir, 'images', split, fname)

    if not os.path.exists(src_img_path):
        print(f"Image missing: {src_img_path}")
        continue

    # Copy image
    shutil.copy(src_img_path, dst_img_path)

    # Prepare label file
    if bbox:
        img = cv2.imread(src_img_path)
        if img is None:
            print(f"Failed to read: {src_img_path}")
            continue
        h, w = img.shape[:2]
        x_center, y_center, bw, bh = convert_bbox_yolo(bbox['xmin'], bbox['ymin'], bbox['width'], bbox['height'], w, h)

        label_path = os.path.join(output_dir, 'labels', split, os.path.splitext(fname)[0] + '.txt')
        with open(label_path, 'w') as f:
            f.write(f"0 {x_center:.6f} {y_center:.6f} {bw:.6f} {bh:.6f}\n")  # class 0 for TB

print("\n✅ All images and labels processed.")

# === CREATE data.yaml ===
yaml_path = os.path.join(output_dir, 'data.yaml')
with open(yaml_path, 'w') as f:
    f.write(f"path: {output_dir.replace(os.sep, '/')}\n")
    f.write("train: images/train\n")
    f.write("val: images/val\n")
    f.write("\n")
    f.write("nc: 1\n")
    f.write("names: ['tb']\n")

print(f"\n✅ YOLOv8 dataset structure created at:\n{output_dir}")
print(f"✅ data.yaml created at: {yaml_path}")



✅ All images and labels processed.

✅ YOLOv8 dataset structure created at:
C:\Users\Devansh\Desktop\ProjectYolo\yolo-Datasets\new-yolo-tbx11k
✅ data.yaml created at: C:\Users\Devansh\Desktop\ProjectYolo\yolo-Datasets\new-yolo-tbx11k\data.yaml
