In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.87-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.87-py3-none-any.whl (923 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m923.8/923.8 kB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading ultralytics_thop-2.0.14-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.87 ultralytics-thop-2.0.14


In [2]:
import os
import xml.etree.ElementTree as ET
import random
import shutil
import numpy as np
import cv2
import torch
from ultralytics import YOLO
from sklearn.metrics import f1_score
from pathlib import Path

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
# Define class names
classes = ["nodule"]

In [4]:
# ============== DATA PROCESSING FUNCTIONS ==============

# Convert XML annotations to YOLO format
def convert_annotation(xml_file, output_dir):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    image_width = int(root.find("size/width").text)
    image_height = int(root.find("size/height").text)
    
    label_file = os.path.join(output_dir, os.path.basename(xml_file).replace(".xml", ".txt"))
    with open(label_file, "w") as f:
        for obj in root.findall("object"):
            class_name = obj.find("name").text
            if class_name not in classes:
                continue
            class_id = classes.index(class_name)
            
            bbox = obj.find("bndbox")
            xmin = round(float(bbox.find("xmin").text))
            ymin = round(float(bbox.find("ymin").text))
            xmax = round(float(bbox.find("xmax").text))
            ymax = round(float(bbox.find("ymax").text))
            
            # Convert to YOLO format
            x_center = (xmin + xmax) / 2 / image_width
            y_center = (ymin + ymax) / 2 / image_height
            bbox_width = (xmax - xmin) / image_width
            bbox_height = (ymax - ymin) / image_height
            
            f.write(f"{class_id} {x_center} {y_center} {bbox_width} {bbox_height}\n")


In [5]:
# Split dataset into train, validation, and test sets
def split_dataset(images_path, labels_path, train_ratio=0.7, val_ratio=0.15):
    images = [f for f in os.listdir(images_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
    random.seed(42)  # For reproducibility
    random.shuffle(images)
    
    train_size = int(len(images) * train_ratio)
    val_size = int(len(images) * val_ratio)

    train_files = images[:train_size]
    val_files = images[train_size:train_size + val_size]
    test_files = images[train_size + val_size:]

    for folder in ["train", "val", "test"]:
        os.makedirs(f"dataset/images/{folder}", exist_ok=True)
        os.makedirs(f"dataset/labels/{folder}", exist_ok=True)
    
    # Copy images and corresponding labels to respective folders
    for file_list, subset in zip([train_files, val_files, test_files], ["train", "val", "test"]):
        for img_file in file_list:
            # Copy image
            src_img = os.path.join(images_path, img_file)
            dst_img = os.path.join(f"dataset/images/{subset}", img_file)
            shutil.copy(src_img, dst_img)
            
            # Copy label if exists
            label_file = os.path.splitext(img_file)[0] + ".txt"
            src_label = os.path.join(labels_path, label_file)
            if os.path.exists(src_label):
                dst_label = os.path.join(f"dataset/labels/{subset}", label_file)
                shutil.copy(src_label, dst_label)
    
    print(f"Dataset split complete: {len(train_files)} training, {len(val_files)} validation, {len(test_files)} test images")
    return len(train_files), len(val_files), len(test_files)


In [6]:
# ============== DATA AUGMENTATION FUNCTIONS ==============

def apply_augmentations(image_dir, label_dir, output_image_dir, output_label_dir, num_augmentations=3):
    """
    Apply augmentations to images and corresponding labels
    """
    import albumentations as A
    from albumentations.pytorch import ToTensorV2
    
    # Create output directories if they don't exist
    os.makedirs(output_image_dir, exist_ok=True)
    os.makedirs(output_label_dir, exist_ok=True)
    
    # Define augmentation pipeline
    transform = A.Compose([
        A.OneOf([
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
            A.RandomGamma(gamma_limit=(80, 120), p=0.5),
            A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=15, val_shift_limit=10, p=0.5),
        ], p=0.5),
        A.OneOf([
            A.Blur(blur_limit=3, p=0.5),
            A.MedianBlur(blur_limit=3, p=0.5),
            A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
        ], p=0.5),
        A.OneOf([
            A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=0.5),
            A.ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=0.5),
        ], p=0.5),
        A.Flip(p=0.5),
        A.RandomRotate90(p=0.5),
    ], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
    
    # Get list of images
    image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
    
    augmented_count = 0
    for img_file in image_files:
        # Load image
        img_path = os.path.join(image_dir, img_file)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Load corresponding label if exists
        label_path = os.path.join(label_dir, os.path.splitext(img_file)[0] + '.txt')
        if not os.path.exists(label_path):
            continue
            
        # Parse YOLO format labels
        bboxes = []
        class_labels = []
        
        with open(label_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                class_id = int(parts[0])
                x_center, y_center, width, height = map(float, parts[1:5])
                bboxes.append([x_center, y_center, width, height])
                class_labels.append(class_id)
        
        # Skip if no bounding boxes
        if not bboxes:
            continue
            
        # Copy original image and label
        shutil.copy(img_path, os.path.join(output_image_dir, img_file))
        shutil.copy(label_path, os.path.join(output_label_dir, os.path.splitext(img_file)[0] + '.txt'))
        
        # Apply augmentations
        for i in range(num_augmentations):
            augmented = transform(image=image, bboxes=bboxes, class_labels=class_labels)
            aug_image = augmented['image']
            aug_bboxes = augmented['bboxes']
            aug_class_labels = augmented['class_labels']
            
            # Skip if no bounding boxes after augmentation
            if not aug_bboxes:
                continue
                
            # Save augmented image
            aug_img_file = f"{os.path.splitext(img_file)[0]}_aug{i+1}{os.path.splitext(img_file)[1]}"
            aug_img_path = os.path.join(output_image_dir, aug_img_file)
            cv2.imwrite(aug_img_path, cv2.cvtColor(aug_image, cv2.COLOR_RGB2BGR))
            
            # Save augmented labels
            aug_label_file = f"{os.path.splitext(img_file)[0]}_aug{i+1}.txt"
            aug_label_path = os.path.join(output_label_dir, aug_label_file)
            
            with open(aug_label_path, 'w') as f:
                for bbox, class_id in zip(aug_bboxes, aug_class_labels):
                    f.write(f"{class_id} {bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]}\n")
            
            augmented_count += 1
    
    print(f"Created {augmented_count} augmented images")
    return augmented_count


In [7]:
# ============== DATASET PREPARATION ==============

def prepare_dataset(annotations_path, images_path, apply_augmentation=True):
    # Create base directories
    os.makedirs("dataset/labels", exist_ok=True)
    
    # Convert all annotations to YOLO format
    print("Converting annotations to YOLO format...")
    for xml_file in os.listdir(annotations_path):
        if xml_file.endswith(".xml"):
            convert_annotation(os.path.join(annotations_path, xml_file), "dataset/labels")
    
    # Split dataset
    print("Splitting dataset...")
    train_count, val_count, test_count = split_dataset(images_path, "dataset/labels")
    
    # Apply augmentations to training set
    if apply_augmentation:
        print("Applying augmentations to training set...")
        aug_count = apply_augmentations(
            "dataset/images/train", 
            "dataset/labels/train",
            "dataset/images/train_aug", 
            "dataset/labels/train_aug",
            num_augmentations=3
        )
        
        # Merge original and augmented data
        for file_type in ["images", "labels"]:
            aug_dir = f"dataset/{file_type}/train_aug"
            train_dir = f"dataset/{file_type}/train"
            
            if os.path.exists(aug_dir):
                for file in os.listdir(aug_dir):
                    shutil.copy(os.path.join(aug_dir, file), os.path.join(train_dir, file))
                
                # Remove temporary augmentation directory
                shutil.rmtree(aug_dir)
        
        print(f"Final training set: {train_count + aug_count} images (including {aug_count} augmented)")
    
    # Create dataset.yaml for YOLOv8
    yaml_content = f"""
    path: ./dataset
    train: images/train
    val: images/val
    test: images/test
    
    nc: {len(classes)}
    names: {classes}
    """
    
    with open("dataset.yaml", "w") as f:
        f.write(yaml_content)
    
    print("Dataset preparation complete!")

In [15]:
# ============== MODEL TRAINING ==============

def train_model(epochs=50, img_size=640, batch_size=16):
    import torch
    import torch.nn as nn
    import torch.optim as optim
    from torch.utils.data import DataLoader
    from ultralytics import YOLO
    from ultralytics.data.dataset import YOLODataset
    from ultralytics.utils.metrics import box_iou
    from tqdm import tqdm
    import numpy as np
    import time
    import yaml

    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Load YOLOv8m model
    model = YOLO('yolov8m.pt')

    # Multi-GPU support
    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)

    # Get the model's task-specific modules
    # model = model.model.to(device)

    # Create datasets and dataloaders
    train_dataset = YOLODataset(
        img_path="dataset/images/train",
        data={"names": classes},
        imgsz=img_size,
        augment=True,
        # hyp={"flipud": 0.0, "fliplr": 0.5, "mixup": 0.0},
        prefix="train"
    )
    
    val_dataset = YOLODataset(
        img_path="dataset/images/val",
        data={"names": classes},
        imgsz=img_size,
        augment=False,
        # hyp=None,
        prefix="val"
    )
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size * torch.cuda.device_count(),  # Scale batch size
        shuffle=True,
        num_workers=4,
        collate_fn=train_dataset.collate_fn,
        pin_memory=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size * torch.cuda.device_count(),
        shuffle=False,
        num_workers=4,
        collate_fn=val_dataset.collate_fn,
        pin_memory=True
    )
    
    # Optimizer, scheduler, and loss function
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.937, weight_decay=0.0005)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    # compute_loss = model.module.loss if hasattr(model, 'module') else model.loss


    # Training loop
    best_map = 0
    start_time = time.time()

    model.train(data='dataset/data.yaml', epochs=epochs, imgsz=img_size, batch=batch_size)

    # Training complete
    total_time = time.time() - start_time
    print(f"Training complete in {total_time/60:.2f} minutes")
    print(f"Best mAP: {best_map:.4f}")
    
    # Load best model
    model.load_state_dict(torch.load("best_model.pt"))

    # Convert back to YOLO format for saving
    yolo_model = YOLO('yolov8m.pt')
    yolo_model.model = model

    return yolo_model, {"best_map": best_map}, {"training_time": total_time}

In [9]:
# Install required packages
import subprocess
subprocess.run(["pip", "install", "ultralytics", "albumentations"])
    
# Paths for dataset directories
annotations_path = "/kaggle/input/pulmonary-nodule-detection/train/anno"
images_path = "/kaggle/input/pulmonary-nodule-detection/train/jpg"
    
# Check if directories exist
if not os.path.isdir(annotations_path) or not os.path.isdir(images_path):
    print(f"Warning: One or more directories don't exist: {annotations_path}, {images_path}")
    print("Please update the paths to match your dataset location.")
    
# Prepare dataset
prepare_dataset(annotations_path, images_path, apply_augmentation=True)


Converting annotations to YOLO format...
Splitting dataset...
Dataset split complete: 1050 training, 225 validation, 225 test images
Applying augmentations to training set...


  check_for_updates()
  A.ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=0.5),
  A.Flip(p=0.5),


Created 3150 augmented images
Final training set: 4200 images (including 3150 augmented)
Dataset preparation complete!


In [16]:
# Train model
model, results, metrics = train_model(epochs=50)
    
# Save model
model.save("yolov8m_pulmonary.pt")
    
print("Training complete! Model saved as 'yolov8m_pulmonary.pt'.")

Using device: cuda


trainScanning dataset/labels/train.cache... 4200 images, 0 backgrounds, 0 corrupt: 100%|██████████| 4200/4200 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



valScanning dataset/labels/val.cache... 225 images, 0 backgrounds, 0 corrupt: 100%|██████████| 225/225 [00:00<?, ?it/s]


TypeError: Module.train() got an unexpected keyword argument 'data'