In [1]:
import os
import shutil
import yaml
from sklearn.model_selection import train_test_split
import glob 
from ultralytics import YOLO
import torch
import torch_directml


In [2]:
# Verify it works before starting the big training loop
print(f"Torhc version: {torch.__version__}")
print(f"Is GPU detected? {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"Device Name: {torch.cuda.get_device_name(0)}")
else:
    print("‚ùå GPU still not detected. Check 'video' group permissions.")
    # Stop here if no GPU, otherwise training will crash or be slow on CPU
    #exit()

Torhc version: 2.4.1+cu121
Is GPU detected? False
‚ùå GPU still not detected. Check 'video' group permissions.


export HSA_OVERRIDE_GFX_VERSION=10.3.0

In [3]:
SOURCE_DIR = os.path.join(os.getcwd(), 'YoloFormatLabels')
DEST_DIR = os.path.join(os.getcwd(), 'fdm_manual_dataset')

def organize_dataset():
    images = []
    # FIXED: Added recursive=True and used ** pattern to look in all subfolders
    extensions = ['*.jpg', '*.png', '*.jpeg']
    
    print(f">>> Scanning {SOURCE_DIR} recursively for images...")
    
    for ext in extensions:
        # The ** pattern combined with recursive=True searches all subdirectories
        search_pattern = os.path.join(SOURCE_DIR, '**', ext)
        images.extend(glob.glob(search_pattern, recursive=True))
    
    annotated_pairs = []
    
    print(f">>> Scanning {SOURCE_DIR} for annotated images...")
    
    for img_path in images:
        base_name = os.path.splitext(img_path)[0]
        txt_path = base_name + ".txt"
        
        # We only keep images that have a matching .txt label file
        if os.path.exists(txt_path):
            annotated_pairs.append((img_path, txt_path))
        else:

            print(f"Skipping un-annotated image: {os.path.basename(img_path)}")

    if not annotated_pairs:
        raise ValueError("No annotated (.txt) files found! Did you save them in YOLO format inside the 'manual_annotation_workspace' folder?")

    
    classes_file = os.path.join(SOURCE_DIR, 'classes.txt')
    if not os.path.exists(classes_file):
        raise FileNotFoundError("classes.txt not found. LabelImg creates this automatically when you save. Did you define classes?")
    
    with open(classes_file, 'r') as f:
        class_names = [line.strip() for line in f.readlines() if line.strip()]

    print(f"Found {len(annotated_pairs)} annotated images.")
    print(f"Classes found: {class_names}")

    
    train_pairs, val_pairs = train_test_split(annotated_pairs, test_size=0.1, random_state=42)

    
    if os.path.exists(DEST_DIR):
        try:
            shutil.rmtree(DEST_DIR)
        except:
            pass 
            
    for split, pairs in [('train', train_pairs), ('val', val_pairs)]:
        img_dest = os.path.join(DEST_DIR, 'images', split)
        lbl_dest = os.path.join(DEST_DIR, 'labels', split)
        os.makedirs(img_dest, exist_ok=True)
        os.makedirs(lbl_dest, exist_ok=True)
        
        for img_src, txt_src in pairs:
            shutil.copy(img_src, os.path.join(img_dest, os.path.basename(img_src)))
            shutil.copy(txt_src, os.path.join(lbl_dest, os.path.basename(txt_src)))

    # 5. Create data.yaml
    yaml_content = {
        'path': DEST_DIR,
        'train': 'images/train',
        'val': 'images/val',
        'nc': len(class_names),
        'names': {i: name for i, name in enumerate(class_names)}
    }
    
    yaml_path = os.path.join(DEST_DIR, 'data.yaml')
    with open(yaml_path, 'w') as f:
        yaml.dump(yaml_content, f)

    print(f"\n>>> SUCCESS! Dataset ready at: {DEST_DIR}")
    return yaml_path


organize_dataset()

>>> Scanning /home/aipc/Desly/DefectClassification/YoloFormatLabels recursively for images...
>>> Scanning /home/aipc/Desly/DefectClassification/YoloFormatLabels for annotated images...
Found 250 annotated images.
Classes found: ['Cracking', 'Warping', 'Stringing', 'Layer_shifting', 'Off_Platform']

>>> SUCCESS! Dataset ready at: /home/aipc/Desly/DefectClassification/fdm_manual_dataset


'/home/aipc/Desly/DefectClassification/fdm_manual_dataset/data.yaml'

In [None]:
DATASET_YAML = os.path.join(os.getcwd(), 'fdm_manual_dataset', 'data.yaml')
#MODELS = ['yolov5m6u.pt', 'yolov8m.pt', 'yolov8x.pt', 'yolov9e.pt' ]
MODELS = [r"/home/aipc/Desly/DefectClassification/yolov8m.pt"]

dml_device = torch_directml.device()
print(f"Training on: {torch_directml.device_name(dml_device.index)}")

DATASET_YAML = os.path.join(os.getcwd(), 'fdm_manual_dataset', 'data.yaml')
MODELS = [r"/home/aipc/Desly/DefectClassification/yolov8m.pt"]

HYPERPARAMS = {
    'epochs': 100,
    'batch': 2,        # Keep small for APU stability
    'imgsz': 640,
    'optimizer': 'Adam', 
    'lr0': 0.0001,
    'lrf': 0.2,
    'momentum': 0.9,
    'weight_decay': 0.0005,
    'augment': True      
}

def train_and_validate(model_toTrain, project_name):
    # FIX 1: Extract just the filename (e.g., "yolov8m") instead of the full path
    # Using the full path as a folder name causes errors.
    model_name = os.path.basename(model_toTrain).replace(".pt", "")
    
    model = YOLO(model_toTrain) 

    print(f">>> Starting training using: {DATASET_YAML}")

    # 2. Train (with Validation Disabled to prevent crashes)
    model.train(
        data=DATASET_YAML,
        device=dml_device,   # Train on GPU (DirectML)
        amp=False,
        workers=0,           # Mandatory for DirectML
        project=project_name,
        name=model_name,
        exist_ok=True,
        verbose=True,
        val=False,           # <--- Critical: Disable auto-validation crashes
        save=True,
        **HYPERPARAMS
    )

    # 3. Locate the Saved Model
    # Ultralytics saves best.pt in: project_name/model_name/weights/
    best_weight_path = os.path.join(os.getcwd(), project_name, model_name, 'weights', 'best.pt')
    
    # 4. Manual Validation on CPU (The Fix)
    print("\n" + "="*30)
    print(">>> Switch to CPU for Safe Validation...")
    print("="*30)
    
    if os.path.exists(best_weight_path):
        # Load the best model specifically for validation
        val_model = YOLO(best_weight_path)
        
        # Force device='cpu' to avoid "version_counter" crash
        metrics = val_model.val(data=DATASET_YAML, device='cpu')

        # 5. Print Accuracy Scores
        print("\n" + "="*30)
        print("      FINAL RESULTS (CPU Verified)")
        print("="*30)
        
        # Access metrics using modern Ultralytics attributes
        print(f"Precision: {metrics.box.mp:.4f}")
        print(f"Recall:    {metrics.box.mr:.4f}")
        print(f"mAP@0.5:   {metrics.box.map50:.4f}")
        print(f"mAP@0.95:  {metrics.box.map:.4f}")
        
        print("\n" + "="*30)
        print(f"MODEL SAVED AT: {best_weight_path}")
        print("="*30)
        
        return best_weight_path
    else:
        print(f"‚ùå Error: Could not find {best_weight_path}")
        return None

# --- Main Loop ---
for model in MODELS:
    print(f"Training model : {model}")
    project_name = "fdm_manual_training"
    saved_path = train_and_validate(model, project_name)

Training on: AMD Radeon(TM) 8060S Graphics 
Training model : /home/aipc/Desly/DefectClassification/yolov8m.pt


Dropped Escape call with ulEscapeCode : 0x03007703


>>> Starting training using: /home/aipc/Desly/DefectClassification/fdm_manual_dataset/data.yaml
New https://pypi.org/project/ultralytics/8.4.7 available üòÉ Update with 'pip install -U ultralytics'
[34m[1mengine/trainer: [0magnostic_nms=False, amp=False, angle=1.0, augment=True, auto_augment=randaugment, batch=2, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/home/aipc/Desly/DefectClassification/fdm_manual_dataset/data.yaml, degrees=0.0, deterministic=True, device=privateuseone:0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.0001, lrf=0.2, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=/home/aipc/De

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K      1/100         0G       2.16      7.154      1.736          2        640: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 113/113 2.7it/s 41.8s0.3s

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K      2/100         0G      1.854      6.256      1.512          1        640: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 113/113 3.5it/s 32.0s0.3s

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K      3/100         0G      1.775      6.075      1.422          5        640: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 113/113 3.7it/s 30.7s0.2s

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K      4/100         0G      1.807      5.968      1.439          3        640: 50% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ï∏‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 56/113 12.2it/s 15.6s<4.7s


KeyboardInterrupt: 