# ðŸ“¦ CARD_BOX_V2 (Verified Training)
**Goal:** Train a clean, high-accuracy model using ONLY the 2 high-quality datasets.
**Removed:** The 'Logistics' dataset (source of ghost detections).

### Instructions:
1.  **Runtime -> Change runtime type -> T4 GPU**.
2.  **Run All Cells**.

In [None]:
# 1. SETUP
%pip install ultralytics roboflow
import os
import shutil
from roboflow import Roboflow
from ultralytics import YOLO

# Clean Workspace
if os.path.exists('final_dataset'): shutil.rmtree('final_dataset')
os.makedirs('final_dataset/train/images', exist_ok=True)
os.makedirs('final_dataset/train/labels', exist_ok=True)
os.makedirs('final_dataset/valid/images', exist_ok=True)
os.makedirs('final_dataset/valid/labels', exist_ok=True)

In [None]:
# 2. DOWNLOAD (Only the 2 Good Datasets)
rf = Roboflow(api_key="9Dro6WbBZ9bW4iaU1Z53")

print("Downloading Dataset 1 (Small & Clean)...")
d1 = rf.workspace("project-wfbsj").project("cardboard-box-8uolq").version(1).download("yolov8")

print("Downloading Dataset 2 (Medium & Verified)...")
d2 = rf.workspace("cardboard-box").project("cardboard-box-hql8b").version(1).download("yolov8")

In [None]:
# 3. SAFE MERGE (Renaming files to prevent errors)
def merge_dataset(source_folder):
    folder_name = os.path.basename(source_folder)
    print(f"Merging {folder_name}...")
    
    for split in ['train', 'valid']:
        src_img = f"{source_folder}/{split}/images"
        dst_img = f"final_dataset/{split}/images"
        src_lbl = f"{source_folder}/{split}/labels"
        dst_lbl = f"final_dataset/{split}/labels"
        
        if os.path.exists(src_img):
            files = os.listdir(src_img)
            for i, f in enumerate(files):
                # Short Name Generator (DatasetName_Split_001.jpg)
                ext = os.path.splitext(f)[1]
                short_name = f"{folder_name}_{split}_{i:05d}{ext}"
                txt_name = f"{folder_name}_{split}_{i:05d}.txt"
                
                # Copy Image
                shutil.copy(f"{src_img}/{f}", f"{dst_img}/{short_name}")
                
                # Update & Copy Label (Force Class 0)
                original_txt = f.rsplit('.', 1)[0] + ".txt"
                if os.path.exists(f"{src_lbl}/{original_txt}"):
                    with open(f"{src_lbl}/{original_txt}", 'r') as file:
                        lines = file.readlines()
                    
                    new_lines = []
                    for line in lines:
                        parts = line.strip().split()
                        if not parts: continue
                        # Force everything to Class 0 (Cardboard Box)
                        new_lines.append(f"0 {' '.join(parts[1:])}\n")
                            
                    if new_lines:
                        with open(f"{dst_lbl}/{txt_name}", 'w') as file:
                            file.writelines(new_lines)

merge_dataset(d1.location)
merge_dataset(d2.location)

# Create Config
with open("final_dataset/data.yaml", "w") as f:
    f.write("train: ../train/images\nval: ../valid/images\nnc: 1\nnames: ['cardboard_box']")
print("Datasets Merged Successfully!")

In [None]:
# 4. TRAIN (20 Epochs)
!yolo detect train data=final_dataset/data.yaml model=yolov8n.pt epochs=20 imgsz=640 name=yolov8n_clean_box

In [None]:
# 5. DOWNLOAD
from google.colab import files
files.download('/content/runs/detect/yolov8n_clean_box/weights/best.pt')