In [None]:
import os
import cv2
import yaml
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
import pickle

# ✅ Define dataset paths
DATASET_ROOT = "E:\PFE\Flower code\data created\client_cbis_ddsm_1"
TRAIN_PATH = "E:\PFE\Flower code\data original\DATA\Mass\Train"
TEST_PATH = "E:\PFE\Flower code\data original\DATA\Mass\Test"

# ✅ Create YOLO dataset structure
train_img_dir = os.path.join(DATASET_ROOT, "train/images")
train_lbl_dir = os.path.join(DATASET_ROOT, "train/labels")
valid_img_dir = os.path.join(DATASET_ROOT, "valid/images")
valid_lbl_dir = os.path.join(DATASET_ROOT, "valid/labels")
os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(train_lbl_dir, exist_ok=True)
os.makedirs(valid_img_dir, exist_ok=True)
os.makedirs(valid_lbl_dir, exist_ok=True)

# ✅ Function to Extract Bounding Boxes from Masks
def find_bounding_boxes(mask_path):
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    _, binary_mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
    binary_mask = np.uint8(binary_mask)
    
    # ✅ Apply Morphological Operations to Remove Noise
    kernel = np.ones((5, 5), np.uint8)
    cleaned_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel)
    
    contours, _ = cv2.findContours(cleaned_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    boxes = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        if w > 10 and h > 10:
            boxes.append([x, y, x + w, y + h])
    return boxes

# ✅ Collect Image Data (Unified Mass Detection)
def collect_data(root_path):
    data = []
    for class_label in ["BENIGN", "MALIGNANT"]:
        class_path = os.path.join(root_path, class_label)
        all_files = sorted(os.listdir(class_path))
        for i in range(0, len(all_files), 2):
            img_file = all_files[i]
            mask_file = all_files[i + 1] if i + 1 < len(all_files) else None
            img_path = os.path.join(class_path, img_file)
            mask_path = os.path.join(class_path, mask_file) if mask_file and "MASK" in mask_file else None
            if mask_path and os.path.exists(mask_path):
                boxes = find_bounding_boxes(mask_path)
                data.append([img_path, boxes, 0])  # ✅ Single class for all masses
    return pd.DataFrame(data, columns=["image", "boxes", "label"])

# ✅ Load and Merge Data
train_df = collect_data(TRAIN_PATH)
test_df = collect_data(TEST_PATH)
merged_df = pd.concat([train_df, test_df]).sample(frac=1, random_state=42).reset_index(drop=True)

# ✅ Manual 80-20 Split
split_idx = int(len(merged_df) * 0.8)
train_df = merged_df.iloc[:split_idx].reset_index(drop=True)
valid_df = merged_df.iloc[split_idx:].reset_index(drop=True)

# ✅ Function to Process and Save Images/Labels
def process_and_save(df, img_dir, lbl_dir):
    for _, row in tqdm(df.iterrows(), total=len(df)):
        img_path, boxes, label = row["image"], row["boxes"], row["label"]
        image = cv2.imread(img_path)
        h, w = image.shape[:2]
        img_filename = os.path.basename(img_path)
        cv2.imwrite(os.path.join(img_dir, img_filename), image)
        lbl_path = os.path.join(lbl_dir, img_filename.replace(".png", ".txt"))
        if not boxes:
            open(lbl_path, "w").close()
            continue
        yolo_boxes = [f"{label} {(x1 + x2) / 2 / w:.6f} {(y1 + y2) / 2 / h:.6f} {(x2 - x1) / w:.6f} {(y2 - y1) / h:.6f}" for x1, y1, x2, y2 in boxes]
        with open(lbl_path, "w") as f:
            f.write("\n".join(yolo_boxes))

# ✅ Process and Save Data
process_and_save(train_df, train_img_dir, train_lbl_dir)
process_and_save(valid_df, valid_img_dir, valid_lbl_dir)

# ✅ Create YOLO Dataset YAML File
yolo_config = {
    "path": DATASET_ROOT,
    "train": "train/images",
    "val": "valid/images",
    "names": {0: "mass"}  # ✅ Single class for all masses
}

with open(os.path.join(DATASET_ROOT, "cbis_ddsm.yaml"), "w") as yaml_file:
    yaml.dump(yolo_config, yaml_file)

print("✅ YOLO dataset for CBIS-DDSM (Mass Detection) created successfully!")

# ✅ Train YOLO Model
from ultralytics import YOLO

data_yaml = os.path.join(DATASET_ROOT, "cbis_ddsm.yaml")
model = YOLO("E:\PFE\Flower code\yolo models\yolo11m.pt")
model.train(
    augment=True,
    data=data_yaml,
    epochs=100,
    imgsz=640,
    batch=8,
    optimizer="Adam",  # Use Adam optimizer
    lr0=0.0001,        # Learning rate
    weight_decay=0.0005,
    device="cuda",
    workers=4,
    verbose=True
)
metrics = model.val(data=data_yaml, split="val")

print("Validation Results:")
print(f"mAP50: {metrics.box.map50:.4f}")
print(f"mAP50-95: {metrics.box.map:.4f}")
print(f"Precision: {metrics.box.mp:.4f}")
print(f"Recall: {metrics.box.mr:.4f}")


metrics = model.val(data=data_yaml, split="train")

print("training Results:")
print(f"mAP50: {metrics.box.map50:.4f}")
print(f"mAP50-95: {metrics.box.map:.4f}")
print(f"Precision: {metrics.box.mp:.4f}")
print(f"Recall: {metrics.box.mr:.4f}")

100%|██████████| 1273/1273 [15:38<00:00,  1.36it/s]
100%|██████████| 319/319 [04:19<00:00,  1.23it/s]


✅ YOLO dataset for CBIS-DDSM (Mass Detection) created successfully!
Ultralytics 8.3.107  Python-3.11.9 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce GTX 1660 SUPER, 6144MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=E:\PFE\Flower code\yolo models\yolo11m.pt, data=E:\PFE\Flower code\data created\client_cbis_ddsm_1\cbis_ddsm.yaml, epochs=100, time=None, patience=100, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=cuda, workers=4, project=None, name=train11, exist_ok=False, pretrained=True, optimizer=Adam, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=False, classes=None, retin

[34m[1mtrain: [0mScanning E:\PFE\Flower code\data created\client_cbis_ddsm_1\train\labels... 1273 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1273/1273 [08:25<00:00,  2.52it/s]


[34m[1mtrain: [0mNew cache created: E:\PFE\Flower code\data created\client_cbis_ddsm_1\train\labels.cache


[34m[1mval: [0mScanning E:\PFE\Flower code\data created\client_cbis_ddsm_1\valid\labels... 319 images, 0 backgrounds, 0 corrupt: 100%|██████████| 319/319 [02:12<00:00,  2.41it/s]

[34m[1mval: [0mNew cache created: E:\PFE\Flower code\data created\client_cbis_ddsm_1\valid\labels.cache





Plotting labels to runs\detect\train11\labels.jpg... 
[34m[1moptimizer:[0m Adam(lr=0.0001, momentum=0.937) with parameter groups 106 weight(decay=0.0), 113 weight(decay=0.0005), 112 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1mruns\detect\train11[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      7.96G      2.646      3.643        1.9         18        640:  11%|█         | 17/160 [13:02<7:26:00, 187.14s/it]