In [8]:
# CELL 1: SETUP AND DATA DOWNLOAD
import os

# 1. Define Paths
# We ensure the data goes exactly into '6D-Pose-Estimation-Project/data'
project_root = '/content'
data_dir = os.path.join(project_root, 'data')
os.makedirs(data_dir, exist_ok=True)

# 2. Move to data directory
%cd {data_dir}

# 3. Download the entire folder (Exactly as your Professor did)
!pip install gdown -q
print("Downloading folder from Drive...")
# This downloads the folder structure provided in the link
!gdown --folder "https://drive.google.com/drive/folders/19ivHpaKm9dOrr12fzC8IDFczWRPFxho7" --remaining-ok

# 4. Find and Unzip
# The download creates a folder (usually named 'DenseFusion').
# We look for 'Linemod_preprocessed.zip' inside it and unzip it.
import glob
zip_files = glob.glob("**/Linemod_preprocessed.zip", recursive=True)

if zip_files:
    zip_path = zip_files[0]
    print(f"Unzipping {zip_path}...")
    !unzip -q -o "{zip_path}"
    print("Extraction complete!")
else:
    print("Error: Linemod_preprocessed.zip not found. Check the download.")

# 5. Return to Project Root
%cd {project_root}

/content/6D-Pose-Estimation-Project/data
Downloading folder from Drive...
Retrieving folder contents
Processing file 1YFUra533pxS_IHsb9tB87lLoxbcHYXt8 Linemod_preprocessed.zip
Processing file 1gfOnOojzVdEwPzSaPmS3t3aJaQptbys6 trained_checkpoints.zip
Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From (original): https://drive.google.com/uc?id=1YFUra533pxS_IHsb9tB87lLoxbcHYXt8
From (redirected): https://drive.google.com/uc?id=1YFUra533pxS_IHsb9tB87lLoxbcHYXt8&confirm=t&uuid=ed1658b9-b487-4b9b-bee0-80c6f5d16853
To: /content/6D-Pose-Estimation-Project/data/DenseFusion/Linemod_preprocessed.zip
100% 8.97G/8.97G [01:54<00:00, 78.5MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1gfOnOojzVdEwPzSaPmS3t3aJaQptbys6
From (redirected): https://drive.google.com/uc?id=1gfOnOojzVdEwPzSaPmS3t3aJaQptbys6&confirm=t&uuid=9b3a016c-f33a-4cae-9b6e-00a63fefa018
To: /content/6D-Pose-Estimation-Project/data/DenseFusion

In [2]:
import os

# Base directory for the dataset
base_dir = '/content/datasets'

# Create the directory structure required by YOLO
# datasets/
# ‚îú‚îÄ‚îÄ images/ (train, val)
# ‚îî‚îÄ‚îÄ labels/ (train, val)
for split in ['train', 'val']:
    for dtype in ['images', 'labels']:
        dir_path = os.path.join(base_dir, dtype, split)
        os.makedirs(dir_path, exist_ok=True)

print("Directory structure created successfully!")

Directory structure created successfully!


In [9]:
import yaml
import shutil
import os
from tqdm import tqdm

# --- CONFIGURATION ---
source_root = '/content/6D-Pose-Estimation-Project/data/Linemod_preprocessed/data'     # Path where LineMod zip was extracted
dest_root = '/content/datasets'   # Path for YOLO ready dataset
# List of object folders to process (e.g., '01' is Ape, '02' is Benchvise...)
obj_folders = ['01', '02', '04', '05', '06', '08', '09', '10', '11', '12', '13', '14', '15']

def convert_box(size, box):
    """
    Converts bounding box from pixel coordinates [x, y, w, h]
    to YOLO normalized format [x_center, y_center, w_norm, h_norm].

    Args:
        size (tuple): Image size (width, height)
        box (list): [x, y, w, h] in pixels

    Returns:
        tuple: (x_c, y_c, w, h) normalized between 0 and 1
    """
    dw = 1. / size[0]
    dh = 1. / size[1]

    x = box[0]
    y = box[1]
    w = box[2]
    h = box[3]

    x_center = x + w / 2.0
    y_center = y + h / 2.0

    # Normalize
    x_center = x_center * dw
    w = w * dw
    y_center = y_center * dh
    h = h * dh

    return (x_center, y_center, w, h)

print("Starting data conversion...")

for obj_id in obj_folders:
    print(f"Processing Object ID: {obj_id}...")

    # 1. Load Ground Truth file
    gt_path = os.path.join(source_root, obj_id, 'gt.yml')
    if not os.path.exists(gt_path):
        print(f"Warning: GT file not found for {obj_id}, skipping.")
        continue

    with open(gt_path, 'r') as f:
        gt_data = yaml.safe_load(f)

    # Get all image IDs that have annotations
    img_ids = list(gt_data.keys())

    # Simple Split: First 15% for validation, rest for training
    split_idx = int(len(img_ids) * 0.15)

    # Use tqdm to show progress bar
    for i, img_key in enumerate(tqdm(img_ids)):
        # Determine split (train or val)
        subset = 'val' if i < split_idx else 'train'

        # Original filename (e.g., 0000.png)
        filename = f"{img_key:04d}.png"
        src_img_path = os.path.join(source_root, obj_id, 'rgb', filename)


        if not os.path.exists(src_img_path):
            continue

        # Destination filename (e.g., 01_0000.png)
        # CRITICAL: We add the object ID prefix to avoid overwriting files
        # since every folder starts with 0000.png
        dst_filename = f"{obj_id}_{filename}"

        # Paths for image and label
        dst_img_path = os.path.join(dest_root, 'images', subset, dst_filename)
        dst_txt_path = os.path.join(dest_root, 'labels', subset, dst_filename.replace('.png', '.txt'))

        # 2. Copy the image
        shutil.copy(src_img_path, dst_img_path)

        # 3. Prepare the Label
        # LineMod IDs are 1-based (01..15), YOLO needs 0-based indices (0..14)
        class_id = int(obj_id) - 1

        # Assuming standard LineMod resolution.
        # If images vary, load image with cv2 to get shape: h, w, _ = img.shape
        img_h, img_w = 480, 640

        # Retrieve the bounding box from GT data
        # gt_data[img_key] is a list of objects, we take the first one [0]
        # as LineMod usually has one main object per folder
        raw_box = gt_data[img_key][0]['obj_bb'] # Format: [x, y, w, h]

        # Convert to YOLO format
        yolo_box = convert_box((img_w, img_h), raw_box)

        # 4. Write the .txt file
        with open(dst_txt_path, 'w') as f:
            # Format: class_id x_center y_center width height
            line = f"{class_id} {yolo_box[0]:.6f} {yolo_box[1]:.6f} {yolo_box[2]:.6f} {yolo_box[3]:.6f}\n"
            f.write(line)

print("Conversion completed! Dataset is ready for YOLO.")

Starting data conversion...
Processing Object ID: 01...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1236/1236 [00:06<00:00, 191.72it/s]


Processing Object ID: 02...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1214/1214 [00:06<00:00, 179.51it/s]


Processing Object ID: 04...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1201/1201 [00:04<00:00, 289.93it/s]


Processing Object ID: 05...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1196/1196 [00:08<00:00, 138.37it/s]


Processing Object ID: 06...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1179/1179 [00:04<00:00, 273.32it/s]


Processing Object ID: 08...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1188/1188 [00:07<00:00, 163.28it/s]


Processing Object ID: 09...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1254/1254 [00:05<00:00, 236.14it/s]


Processing Object ID: 10...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1253/1253 [00:07<00:00, 171.56it/s]


Processing Object ID: 11...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1220/1220 [00:04<00:00, 250.58it/s]


Processing Object ID: 12...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1237/1237 [00:08<00:00, 150.77it/s]


Processing Object ID: 13...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1152/1152 [00:04<00:00, 241.35it/s]


Processing Object ID: 14...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1227/1227 [00:07<00:00, 155.87it/s]


Processing Object ID: 15...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1243/1243 [00:08<00:00, 154.35it/s]

Conversion completed! Dataset is ready for YOLO.





In [4]:
# Define the content for the YOLO configuration file
yaml_content = """
# Dataset root directory
path: /content/datasets

# Train and Validation subdirectories
train: images/train
val: images/val

# Number of classes (LineMod has 15 objects typically)
nc: 15

# Class names (Order must match the ID - 1)
# 01->0 (Ape), 02->1 (Benchvise), etc.
names: [
  'ape', 'benchvise', 'bowl', 'camera', 'can',
  'cat', 'cup', 'driller', 'duck', 'eggbox',
  'glue', 'holepuncher', 'iron', 'lamp', 'phone'
]
"""

# Save the file to disk
with open('linemod.yaml', 'w') as f:
    f.write(yaml_content)

print("Configuration file 'linemod.yaml' created successfully!")

Configuration file 'linemod.yaml' created successfully!


In [10]:
!pip install ultralytics
from ultralytics import YOLO

# 1. Load a pre-trained model
# Using YOLOv8 Nano for speed. You can change to 'yolo11n.pt' if preferred.
model = YOLO('yolo11m.pt')

# 2. Start Training
print("Starting training process...")
results = model.train(
    data='linemod.yaml', # Path to the config file created above
    epochs=50,           # Number of training epochs (increase to 100 for better results)
    imgsz=640,           # Input image size
    batch=16,            # Batch size (adjust based on GPU memory)
    name='yolo_linemod', # Name of the experiment folder in runs/detect/
    verbose=True         # Print details during training
)

print("Training finished.")

Starting training process...
Ultralytics 8.3.233 üöÄ Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=linemod.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolo_linemod2, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100

KeyboardInterrupt: 