In [2]:
import os

# Base directory for the dataset
base_dir = '/kaggle/working/datasets'

# Create the directory structure required by YOLO
# datasets/
# ‚îú‚îÄ‚îÄ images/ (train, val)
# ‚îî‚îÄ‚îÄ labels/ (train, val)
for split in ['train', 'val']:
    for dtype in ['images', 'labels']:
        dir_path = os.path.join(base_dir, dtype, split)
        os.makedirs(dir_path, exist_ok=True)

print("Directory structure created successfully!")

Directory structure created successfully!


In [3]:
import yaml
import shutil
import os
import cv2  # Needed for image verification
from tqdm import tqdm

# --- CONFIGURATION ---
# Check your specific input path on Kaggle (it might be 'linemod-preprocessed' or 'line-mode')
source_root = '/kaggle/input/line-mode/Linemod_preprocessed/data' 
dest_root = '/kaggle/working/datasets'
obj_folders = ['01', '02', '04', '05', '06', '08', '09', '10', '11', '12', '13', '14', '15']

# --- CLEANUP (START FRESH) ---
# We remove the destination folder if it exists to avoid mixing old/corrupt files
if os.path.exists(dest_root):
    print(f"Cleaning up old {dest_root}...")
    shutil.rmtree(dest_root)

# Create directory structure
for split in ['train', 'val']:
    for dtype in ['images', 'labels']:
        os.makedirs(os.path.join(dest_root, dtype, split), exist_ok=True)

# --- HELPER FUNCTIONS ---

def convert_box(size, box):
    """
    Converts bounding box from pixel coordinates [x, y, w, h]
    to YOLO normalized format [x_center, y_center, w_norm, h_norm].
    """
    dw = 1. / size[0]
    dh = 1. / size[1]

    x = box[0]
    y = box[1]
    w = box[2]
    h = box[3]

    x_center = x + w / 2.0
    y_center = y + h / 2.0

    # Normalize
    x_center = x_center * dw
    w = w * dw
    y_center = y_center * dh
    h = h * dh

    return (x_center, y_center, w, h)

def safe_copy_and_verify(src_path, dst_path):
    """
    Copies a file and verifies its integrity immediately.
    Returns True if success, False if corrupt.
    """
    try:
        # 1. Perform Copy
        shutil.copy(src_path, dst_path)
        
        # 2. Check File Size (Critical for Kaggle)
        if os.path.getsize(dst_path) == 0:
            print(f"‚ùå Copy Failed (0 bytes): {dst_path}")
            os.remove(dst_path) # Clean up bad file
            return False
            
        # 3. Check Readability with OpenCV (Sanity Check)
        # This ensures the header is valid and image is not truncated
        img = cv2.imread(dst_path)
        if img is None:
            print(f"‚ùå Copy Failed (Unreadable): {dst_path}")
            os.remove(dst_path)
            return False
            
        return True
        
    except Exception as e:
        print(f"‚ùå Error copying {src_path}: {e}")
        if os.path.exists(dst_path):
            os.remove(dst_path)
        return False

# --- MAIN LOOP ---

print("Starting robust data conversion...")
total_count = 0
success_count = 0

for obj_id in obj_folders:
    print(f"Processing Object ID: {obj_id}...")

    # 1. Load Ground Truth file
    gt_path = os.path.join(source_root, obj_id, 'gt.yml')
    if not os.path.exists(gt_path):
        print(f"Warning: GT file not found for {obj_id}, skipping.")
        continue

    with open(gt_path, 'r') as f:
        gt_data = yaml.safe_load(f)

    img_ids = list(gt_data.keys())
    split_idx = int(len(img_ids) * 0.15)

    for i, img_key in enumerate(tqdm(img_ids)):
        subset = 'val' if i < split_idx else 'train'

        filename = f"{img_key:04d}.png"
        src_img_path = os.path.join(source_root, obj_id, 'rgb', filename)

        if not os.path.exists(src_img_path):
            continue

        # Destination filename with prefix
        dst_filename = f"{obj_id}_{filename}"
        
        dst_img_path = os.path.join(dest_root, 'images', subset, dst_filename)
        dst_txt_path = os.path.join(dest_root, 'labels', subset, dst_filename.replace('.png', '.txt'))

        # 2. EXECUTE SAFE COPY
        # Only proceed to create the label if the image copy was successful
        if safe_copy_and_verify(src_img_path, dst_img_path):
            
            # 3. Prepare the Label
            class_id = int(obj_id) - 1
            img_h, img_w = 480, 640
            
            # Retrieve box
            # gt_data[img_key] is a list, take first element
            raw_box = gt_data[img_key][0]['obj_bb'] 

            # Convert
            yolo_box = convert_box((img_w, img_h), raw_box)

            # 4. Write the .txt file
            with open(dst_txt_path, 'w') as f:
                line = f"{class_id} {yolo_box[0]:.6f} {yolo_box[1]:.6f} {yolo_box[2]:.6f} {yolo_box[3]:.6f}\n"
                f.write(line)
            
            success_count += 1
        
        total_count += 1

print("\n------------------------------------------------")
print(f"‚úÖ Conversion Finished.")
print(f"Total Images Processed: {total_count}")
print(f"Successfully Copied: {success_count}")
print(f"Corrupted/Failed: {total_count - success_count}")
print("Dataset is ready for training.")

Cleaning up old /kaggle/working/datasets...
Starting robust data conversion...
Processing Object ID: 01...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1236/1236 [00:28<00:00, 43.43it/s]


Processing Object ID: 02...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1214/1214 [00:28<00:00, 43.25it/s]


Processing Object ID: 04...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1201/1201 [00:27<00:00, 43.53it/s]


Processing Object ID: 05...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1196/1196 [00:27<00:00, 43.40it/s]


Processing Object ID: 06...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1179/1179 [00:28<00:00, 41.17it/s]


Processing Object ID: 08...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1188/1188 [00:29<00:00, 40.61it/s]


Processing Object ID: 09...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1254/1254 [00:29<00:00, 42.73it/s]


Processing Object ID: 10...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1253/1253 [00:29<00:00, 42.24it/s]


Processing Object ID: 11...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1220/1220 [00:28<00:00, 43.28it/s]


Processing Object ID: 12...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1237/1237 [00:29<00:00, 41.96it/s]


Processing Object ID: 13...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1152/1152 [00:27<00:00, 41.94it/s]


Processing Object ID: 14...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1227/1227 [00:29<00:00, 41.79it/s]


Processing Object ID: 15...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1243/1243 [00:29<00:00, 42.74it/s]


------------------------------------------------
‚úÖ Conversion Finished.
Total Images Processed: 15800
Successfully Copied: 15800
Corrupted/Failed: 0
Dataset is ready for training.





In [4]:
# Define the content for the YOLO configuration file
yaml_content = """
# Dataset root directory
path: /kaggle/working/datasets

# Train and Validation subdirectories
train: images/train
val: images/val

# Number of classes (LineMod has 15 objects typically)
nc: 15

# Class names (Order must match the ID - 1)
# 01->0 (Ape), 02->1 (Benchvise), etc.
names: [
  'ape', 'benchvise', 'bowl', 'camera', 'can',
  'cat', 'cup', 'driller', 'duck', 'eggbox',
  'glue', 'holepuncher', 'iron', 'lamp', 'phone'
]
"""

# Save the file to disk
with open('/kaggle/working/linemod.yaml', 'w') as f:
    f.write(yaml_content)

print("Configuration file 'linemod.yaml' created successfully!")

Configuration file 'linemod.yaml' created successfully!


In [None]:
!pip install ultralytics
!pip install --upgrade --force-reinstall numpy==1.26.4 matplotlib==3.7.2

# FOR MULTIGPU
import os
from ultralytics import YOLO

# --- CONFIGURATION ---
dataset_root = '/kaggle/working/datasets'

print("\nüöÄ Starting Multi-GPU Training ...")

# Load YOLO model
model = YOLO('yolo11m.pt')  # pretrained YOLO11M

# --- TRAINING ---
results = model.train(
    data='/kaggle/working/linemod.yaml',  # path to dataset YAML
    epochs=100,                           # total epochs
    imgsz=640,                            # image size
    batch=32,                             # total batch, split across GPUs
    device=[0,1],                         # use GPU 0 and 1
    patience=10,                          # early stopping patience
    amp=True,                              # mixed precision for memory efficiency
    augment=True,                         # enable Albumentations augmentations
    #auto_augment=None,                     # disable RandAugment for safety
    erasing=0,                             # disable CutOut (might erase object)
    #mosaic=0,                              # disable mosaic/cutmix (bounding boxes may break)
    workers=4,                             # dataloader CPU workers
    project='LineMod_YOLO_multiGPU',                # output project folder
    name='yolo11m_final_multiGPU',         # experiment name
    verbose=True,                           # print detailed logs
    exist_ok=True,                          # overwrite if exists
    save=True                               # save weights & results
)


Collecting numpy>=1.23.0 (from ultralytics)
  Using cached numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Using cached numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.4
    Uninstalling numpy-1.26.4:
      Successfully uninstalled numpy-1.26.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
mkl-umath 0.1.1 requires numpy<1.27.0,>=1.26.4, but you have numpy 2.2.6 which is incompatible.
mkl-random 1.2.4 requires numpy<1.27.0,>=1.26.4, but you have numpy 2.2.6 which is incompatible.
mkl-fft 1.3.8 requires numpy<1.27.0,>=1.26.4, but you have numpy 2.2.6 which is incompatible.
numba 0.

In [21]:
!pip install ultralytics

import os
import glob
import cv2
import numpy as np
from ultralytics import YOLO

# --- CONFIGURATION ---
dataset_root = '/kaggle/working/datasets'

print("\nüöÄ Starting Training ...")

model = YOLO('yolo11m.pt')

results = model.train(
    data='/kaggle/working/linemod.yaml',
    epochs=100,
    imgsz=640,
    batch=32,
    device=0,
    patience=10,         #for early stop
    # --- FIX CRITICO ---
    #augment=False,        # Disable pipeline Albumentations
    #auto_augment=None,    # No RandAugment (rotation too much, color shift exagerate )
    erasing=0,            # No CutOut (use albumentations) becase can happen that delete the object
    #mosaic=0,             # No mosaic/cutmix (use albumentations) bb false

    # --- STABILIT√Ä KAGGLE ---
    workers=4,
    #amp=False,
    #cache=False,

    project='LineMod_YOLO',
    name='yolo11m_final_ok',
    verbose=True,
    exist_ok=True,
    save=True
)



üöÄ Starting Training ...
Ultralytics 8.3.233 üöÄ Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/kaggle/working/linemod.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11m.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolo11m_final_ok, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, over

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 15.89 GiB of which 5.12 MiB is free. Process 4675 has 15.87 GiB memory in use. Of the allocated memory 15.45 GiB is allocated by PyTorch, and 72.21 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)