In [1]:
import torch
from ultralytics import YOLO
import wandb

# 1. Login to Weights & Biases
# Ensure you have an account at wandb.ai
wandb.login()

# 2. GPU Setup
def select_ideal_gpus():
    """
    Selects GPUs based on availability using Ultralytics 'Idle GPU' feature (-1).
    Logic:
    - 1 GPU available -> Use -1 (most idle).
    - 2-3 GPUs available -> Use all ([-1, -1] or [-1, -1, -1]).
    - 4+ GPUs available -> Reserve 2, use the rest (e.g., 4->[-1, -1], 8->[-1]*6).
    """
    if not torch.cuda.is_available():
        return 'cpu'
    
    count = torch.cuda.device_count()
    print(f"Total GPUs detected: {count}")
    
    if count == 1:
        return -1 # Use the single most idle GPU
    elif count >= 4:
        # Reserve 2 GPUs, use the rest
        num_to_use = count - 2
        return [-1] * num_to_use
    else:
        # Use all available for counts 2 and 3
        return [-1] * count

device = select_ideal_gpus()
print(f"Using device: {device}")

# 3. Configuration
DATA_YAML = r'd:\bdd100k\yolov11_dataset\data.yaml'
PROJECT_NAME = 'BDD100K_YOLOv11'

[34m[1mwandb[0m: Currently logged in as: [33makash012[0m ([33makash012-iit-mandi[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Total GPUs detected: 1
Using device: -1


#### 1. Training on 10% dataset
 


In [None]:
# # Initialize a Nano model for the sanity check
# model_sanity = YOLO('yolo11n.pt')

# print("Starting Sanity Check Training (10% Data)...")

# model_sanity.train(
#     data=DATA_YAML,
#     project=PROJECT_NAME,
#     name='sanity_check_fractional',
    
#     # Hyperparameters
#     epochs=5,               # Short run
#     imgsz=640,              # Standard size
#     batch=-1,               # AutoBatch (finds max batch size for GPU)
#     fraction=0.1,           # <--- TRAIN ON 10% OF DATA
#     device=device,          # Use detected GPU(s)
#     patience=2,            # Early stopping
    
#     # W&B Logging
#     exist_ok=True,          # Overwrite existing experiment if name matches
#     verbose=True
# )

# print("Sanity check complete. Check W&B dashboard.")

#### 2. Full Training on 100% dataset

In [2]:
# List of image sizes to train on
image_sizes = [640]

for size in image_sizes:
    print(f"\n{'='*20}\nStarting Full Training with Image Size: {size}\n{'='*20}")
    
    # Load a fresh model for each run (e.g., YOLOv11 Small)
    # You can change 'yolo11s.pt' to 'n', 'm', 'l', or 'x' based on your needs
    model = YOLO('yolo11s.pt') 
    
    model.train(
        data=DATA_YAML,
        project=PROJECT_NAME,
        name=f'train_full_img{size}',
        
        # Hyperparameters
        epochs=50,             # Standard starting point
        imgsz=size,             # 640 or 1280
        batch=-1,               # AutoBatch
        fraction=1.0,           # <--- USE ALL DATA
        device=device,
        patience=5,            # <--- EARLY STOPPING (10 epochs of no improvement)
        
        # Optimization
        optimizer='auto',       # Auto-select (SGD/AdamW)
        
        # W&B
        save=True,              # Save checkpoints
        plots=True              # Save plots
    )


Starting Full Training with Image Size: 640
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt to 'yolo11s.pt': 100% ━━━━━━━━━━━━ 18.4MB 976.6KB/s 19.3s9.3s<0.1sss
Searching for 1 idle GPUs with free memory >= 20.0% and free utilization >= 0.0%...
Selected idle CUDA devices [0]
Ultralytics 8.3.237  Python-3.10.19 torch-2.9.1+cu126 CUDA:0 (NVIDIA GeForce RTX 3080 Laptop GPU, 8192MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=d:\bdd100k\yolov11_dataset\data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, h

Overriding model.yaml nc=80 with nc=7

                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     26080  ultralytics.nn.modules.block.C3k2            [64, 128, 1, False, 0.25]     
  3                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
  4                  -1  1    103360  ultralytics.nn.modules.block.C3k2            [128, 256, 1, False, 0.25]    
  5                  -1  1    590336  ultralytics.nn.modules.conv.Conv             [256, 256, 3, 2]              
  6                  -1  1    346112  ultralytics.nn.modules.block.C3k2            [256, 256, 1, True]           
  7                  -1  1   1180672  ultralytics

KeyboardInterrupt: 

#### 3. Validation and Testing


In [None]:
# ==========================================
# UNCOMMENT WHEN TRAINING IS COMPLETE
# ==========================================
# Refer to 'd:\bdd100k\model_reasoning.md' for detailed explanation of these parameters.

# # Load the best model from the training run (example path, adjust 'train_full_img640' as needed)
# best_model_path = f'{PROJECT_NAME}/train_full_img640/weights/best.pt'
# model = YOLO(best_model_path)

# # ---------------------------------------------------------
# # 1. VALIDATION (on 'val' split)
# # ---------------------------------------------------------
# metrics = model.val(
#     data=DATA_YAML,
#     split='val',        # Explicitly use validation set
#     imgsz=640,          # Match training size
#     batch=16,           # Fixed batch size for stability during val
#     device=device,
    
#     # METRIC SETTINGS (See model_reasoning.md)
#     conf=0.001,         # Low confidence to capture full Precision-Recall curve for mAP
#     iou=0.6,            # Balanced NMS to handle crowded scenes
#     rect=True,          # Rectangular inference for speed
#     save_json=True      # Save raw results for analysis
# )

# print(f"mAP50: {metrics.box.map50}")
# print(f"mAP50-95: {metrics.box.map}")


# # ---------------------------------------------------------
# # 2. TESTING (on 'test' split)
# # ---------------------------------------------------------
# # Evaluate on the unseen test set
# test_metrics = model.val(
#     data=DATA_YAML,
#     split='test',       # <--- CRITICAL: Use unseen TEST set
#     imgsz=640,
#     device=device,
#     conf=0.001,         # Keep low for accurate metrics
#     iou=0.6,
#     rect=True,
#     name='test_evaluation'
# )

# # ---------------------------------------------------------
# # 3. VISUAL PREDICTION (Sanity Check on Test Images)
# # ---------------------------------------------------------
# # Generate images with drawn bounding boxes
# results = model.predict(
#     source=r'd:\bdd100k\yolov11_dataset\test\images', # Path to test images
#     imgsz=640,
#     device=device,
    
#     # VISUAL SETTINGS (See model_reasoning.md)
#     conf=0.25,          # High confidence to show only "real" detections
#     iou=0.45,           # Stricter NMS for cleaner visuals
#     save=True,          # Save annotated images
#     save_txt=True,      # Save labels
#     line_width=2        # Thinner boxes
# )