In [8]:
import os
import shutil
import yaml
import glob 
from ultralytics import YOLO
import torch
import sys

In [9]:
# Verify it works before starting the big training loop
print(f"Torch version: {torch.__version__}")
print(f"Is GPU detected? {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"Device Name: {torch.cuda.get_device_name(0)}")
else:
    print("‚ùå GPU still not detected. Check 'video' group permissions.")
    # Stop here if no GPU, otherwise training will crash or be slow on CPU
    #exit()



print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"Device count: {torch.cuda.device_count()}")

Torch version: 2.5.1+cu121
Is GPU detected? True
Device Name: NVIDIA GeForce RTX 3050 Laptop GPU
Python version: 3.10.2 (tags/v3.10.2:a58ebcc, Jan 17 2022, 14:12:15) [MSC v.1929 64 bit (AMD64)]
PyTorch version: 2.5.1+cu121
CUDA available: True
CUDA version: 12.1
Device count: 1


In [10]:
# SOURCE_DIR = os.path.join(os.getcwd(), 'YoloFormatLabels')
# DEST_DIR = os.path.join(os.getcwd(), 'fdm_manual_dataset')

# def organize_dataset():
#     images = []
    
#     extensions = ['*.jpg', '*.png', '*.jpeg']
    
#     print(f">>> Scanning {SOURCE_DIR} recursively for images...")
    
#     for ext in extensions:
#         # The ** pattern combined with recursive=True searches all subdirectories
#         search_pattern = os.path.join(SOURCE_DIR, '**', ext)
#         images.extend(glob.glob(search_pattern, recursive=True))
    
#     annotated_pairs = []
    
#     print(f">>> Scanning {SOURCE_DIR} for annotated images...")
    
#     for img_path in images:
#         base_name = os.path.splitext(img_path)[0]
#         txt_path = base_name + ".txt"
        
#         # We only keep images that have a matching .txt label file
#         if os.path.exists(txt_path):
#             annotated_pairs.append((img_path, txt_path))
#         else:

#             print(f"Skipping un-annotated image: {os.path.basename(img_path)}")

#     if not annotated_pairs:
#         raise ValueError("No annotated (.txt) files found! Did you save them in YOLO format inside the 'manual_annotation_workspace' folder?")

    
#     classes_file = os.path.join(SOURCE_DIR, 'classes.txt')
#     if not os.path.exists(classes_file):
#         raise FileNotFoundError("classes.txt not found. LabelImg creates this automatically when you save. Did you define classes?")
    
#     with open(classes_file, 'r') as f:
#         class_names = [line.strip() for line in f.readlines() if line.strip()]

#     print(f"Found {len(annotated_pairs)} annotated images.")
#     print(f"Classes found: {class_names}")

    
#     train_pairs, val_pairs = train_test_split(annotated_pairs, test_size=0.1, random_state=42)

    
#     if os.path.exists(DEST_DIR):
#         try:
#             shutil.rmtree(DEST_DIR)
#         except:
#             pass 
            
#     for split, pairs in [('train', train_pairs), ('val', val_pairs)]:
#         img_dest = os.path.join(DEST_DIR, 'images', split)
#         lbl_dest = os.path.join(DEST_DIR, 'labels', split)
#         os.makedirs(img_dest, exist_ok=True)
#         os.makedirs(lbl_dest, exist_ok=True)
        
#         for img_src, txt_src in pairs:
#             shutil.copy(img_src, os.path.join(img_dest, os.path.basename(img_src)))
#             shutil.copy(txt_src, os.path.join(lbl_dest, os.path.basename(txt_src)))

   
#     yaml_content = {
#         'path': DEST_DIR,
#         'train': 'images/train',
#         'val': 'images/val',
#         'nc': len(class_names),
#         'names': {i: name for i, name in enumerate(class_names)}
#     }
    
#     yaml_path = os.path.join(DEST_DIR, 'data.yaml')
#     with open(yaml_path, 'w') as f:
#         yaml.dump(yaml_content, f)

#     print(f"\n>>> SUCCESS! Dataset ready at: {DEST_DIR}")
#     return yaml_path


# organize_dataset()

In [None]:
#MODELS = ['yolov5m6u.pt', 'yolov8m.pt', 'yolov8x.pt', 'yolov9e.pt' ]

#DATASET_YAML = os.path.join(os.getcwd(), 'merged_3d_printing_dataset', 'data.yaml')
# MODELS = [
#     r"/home/aipc/Desly/DefectClassification/models/yolov8m.pt",
#     r"/home/aipc/Desly/DefectClassification/models/yolov8x.pt", 
#     r"/home/aipc/Desly/DefectClassification/models/yolov5m6u.pt"
# ]


DATASET_YAML = os.path.join(os.getcwd(), 'fdm_manual_dataset', 'data.yaml')

# # List of models to train
MODELS = [
    'yolov8m.pt'
]

HYPERPARAMS = {
    'epochs': 2,        # High ceiling
    'patience': 15,        # Early Stopping active
    'batch': 16,           
    'imgsz': 512,
    'optimizer': 'Adam', 
    'lr0': 0.0001,
    'lrf': 0.2,
    'momentum': 0.9,
    'weight_decay': 0.0005,
    'augment': True      
}

def force_float32(trainer):
    """
    Forces the model to use Float32 precision right before training starts.
    This prevents the DirectML 'element 0 of tensors does not require grad' error.
    """
    print("‚ö° DIRECTML CALLBACK: Forcing model to Float32 for stability...")
    trainer.model.float()

# ==========================================
# 4. TRAINING FUNCTION (With Callback)
# ==========================================
def train_and_validate(model_path, project_name):
    model_name = os.path.basename(model_path).replace(".pt", "")
    
    # Load Model
    model = YOLO(model_path) 
    
    # ‚úÖ REGISTER THE CALLBACK
    model.add_callback("on_train_start", force_float32)

    print(f"\n>>> [PHASE 1] TRAINING: {model_name}")

    try:
        model.train(
            data=DATASET_YAML,
            device=0,
            amp=True,           # Disable Mixed Precision (Crucial)
            workers=4,           # Single-thread loading (Crucial for Windows/DML)
            project=project_name,
            name=model_name,
            exist_ok=True,
            val=True,            # Validation enabled
            **HYPERPARAMS
        )
    except Exception as e:
        print(f"‚ùå Training failed for {model_name}: {e}")
        return None

    # Path handling
    best_weight_path = os.path.join(os.getcwd(), project_name, model_name, 'weights', 'best.pt')
    
    if os.path.exists(best_weight_path):
        print(f"\n‚úÖ Training Complete. Best weights: {best_weight_path}")
        return best_weight_path
    else:
        print(f"‚ùå Error: Could not find weights at {best_weight_path}")
        return None



In [None]:

for m in MODELS:
    project = "defect_detection_manual_training"
    path = train_and_validate(m, project)
    if path:
        print(f"‚úÖ Training cycle complete for {m}")


print("\nüéâ ALL MODELS PROCESSED.")


>>> [PHASE 1] TRAINING: yolov8m
Ultralytics 8.4.7  Python-3.10.2 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=True, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=d:\Backup\WORK\MACH-3D\DefectClassification\fdm_manual_dataset\data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=2, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=512, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.0001, lrf=0.2, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=DefectClassification/models/yolov8m.pt, momentum=0.9, mosaic=1.0, multi_scale=0.0, na