# ðŸ“¦ MEGA-MODEL TRAINING (Merged Datasets)
This notebook combines 3 datasets into one SUPER dataset:
1.  Cardboard Box (Small)
2.  Cardboard Box (Medium)
3.  Logistics (Massive - Filtered for Boxes only)

### Steps:
1.  **Runtime -> Change runtime type -> T4 GPU**.
2.  Run all cells.

In [None]:
# 1. Install & Setup
%pip install ultralytics roboflow
import os
import shutil
from roboflow import Roboflow
from ultralytics import YOLO

# Create a clean 'mega_dataset' folder
if os.path.exists('mega_dataset'): shutil.rmtree('mega_dataset')
os.makedirs('mega_dataset/train/images', exist_ok=True)
os.makedirs('mega_dataset/train/labels', exist_ok=True)
os.makedirs('mega_dataset/valid/images', exist_ok=True)
os.makedirs('mega_dataset/valid/labels', exist_ok=True)

In [None]:
# 2. Download Datasets (User Provided Codes)
rf = Roboflow(api_key="9Dro6WbBZ9bW4iaU1Z53")

# Dataset 1: Cardboard Box 1
print("Downloading Dataset 1...")
d1 = rf.workspace("project-wfbsj").project("cardboard-box-8uolq").version(1).download("yolov8")

# Dataset 2: Cardboard Box 2
print("Downloading Dataset 2...")
d2 = rf.workspace("cardboard-box").project("cardboard-box-hql8b").version(1).download("yolov8")

# Dataset 3: Logistics (Big One)
print("Downloading Dataset 3 (Logistics)...")
d3 = rf.workspace("roboflow-ngkro").project("logistics-h0uec").version(10).download("yolov8")

In [None]:
# 3. MERGE LOGIC (The Magic Step)
# We copy all files into 'mega_dataset'.
# Key: The Logistics dataset keeps class names in 'data.yaml'. 
# We will SIMPLIFY everything to just 1 class: 'box' (Index 0).

def merge_dataset(source_folder, is_logistics=False):
    for split in ['train', 'valid']:
        # Images
        src_img = f"{source_folder}/{split}/images"
        dst_img = f"mega_dataset/{split}/images"
        if os.path.exists(src_img):
            for f in os.listdir(src_img):
                shutil.copy(f"{src_img}/{f}", f"{dst_img}/{source_folder}_{f}")
        
        # Labels (Rewriting Class ID to 0)
        src_lbl = f"{source_folder}/{split}/labels"
        dst_lbl = f"mega_dataset/{split}/labels"
        if os.path.exists(src_lbl):
            for f in os.listdir(src_lbl):
                with open(f"{src_lbl}/{f}", 'r') as file:
                    lines = file.readlines()
                
                new_lines = []
                for line in lines:
                    parts = line.strip().split()
                    cls = int(parts[0])
                    
                    # For Logistics, 'cardboard box' is class 2. We skip others.
                    if is_logistics:
                        if cls == 2: # 2 is usually box in this list
                            # CHANGE to 0 (Unified Class)
                            new_lines.append(f"0 {' '.join(parts[1:])}\n")
                    else:
                        # For standard box datasets, assume everything is a box (Class 0)
                        new_lines.append(f"0 {' '.join(parts[1:])}\n")
                
                if new_lines: # Only save if we found a box
                    with open(f"{dst_lbl}/{source_folder}_{f}", 'w') as file:
                        file.writelines(new_lines)

print("Merging Datasets...")
merge_dataset(d1.location, is_logistics=False)
merge_dataset(d2.location, is_logistics=False)
merge_dataset(d3.location, is_logistics=True)
print("Merge Complete!")

# Create data.yaml
yaml_content = """
train: ../train/images
val: ../valid/images

nc: 1
names: ['cardboard_box']
"""
with open("mega_dataset/data.yaml", "w") as f:
    f.write(yaml_content)

In [None]:
# 4. TRAIN (The Result)
!yolo detect train data=mega_dataset/data.yaml model=yolov8n.pt epochs=20 imgsz=640 name=yolov8n_mega_box

In [None]:
# 5. Download
from google.colab import files
files.download('/content/runs/detect/yolov8n_mega_box/weights/best.pt')