# Direct Auto-Labeling Pipeline (No Drive)

**Description**: Runs the filtered auto-labeling pipeline directly in this session.
**Requirement**: You must upload your `best.pt` and a zip of your images.

**Steps**:
1. Upload Files.
2. Auto-Label (Hardhat, Person, Vest).
3. Download Result Zip.

In [None]:
# Step 1: Install Dependencies
!pip install ultralytics segment-anything opencv-python-headless roboflow

In [None]:
# Step 2: Upload Files
import os
from google.colab import files
import zipfile

# Create directories
os.makedirs("input_images", exist_ok=True)
os.makedirs("models", exist_ok=True)

print("--- UPLOAD INSTRUCTIONS ---")
print("1. Upload your 'best.pt' model file.")
print("2. Upload a ZIP file containing your images (e.g., images.zip).")

uploaded = files.upload()

MODEL_PATH = ""
IMAGES_DIR = "input_images"

for filename in uploaded.keys():
    if filename.endswith('.pt'):
        print(f"Model detected: {filename}")
        os.rename(filename, os.path.join("models", filename))
        MODEL_PATH = os.path.join("models", filename)
    elif filename.endswith('.zip'):
        print(f"Images zip detected: {filename}")
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall(IMAGES_DIR)

if not MODEL_PATH:
    print("WARNING: No .pt file uploaded. Using default 'yolov8n.pt' (Not recommended for custom classes).")
    MODEL_PATH = "yolov8n.pt"

In [None]:
# Step 3: Load Models
from ultralytics import YOLO
from segment_anything import sam_model_registry, SamPredictor
import torch

# Load YOLO
yolo = YOLO(MODEL_PATH)

# Load SAM
SAM_CHECKPOINT = "sam_vit_h_4b8939.pth"
if not os.path.exists(SAM_CHECKPOINT):
    !wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
sam = sam_model_registry["vit_h"](checkpoint=SAM_CHECKPOINT)
sam.to(device=DEVICE)
predictor = SamPredictor(sam)

print("Models Loaded!")

In [None]:
# Step 4: Run Filtered Auto-Labeling
import cv2
import glob
import numpy as np
import shutil

OUTPUT_ROOT = "output_data"
IMAGES_OUT = os.path.join(OUTPUT_ROOT, "images")
LABELS_OUT = os.path.join(OUTPUT_ROOT, "labels")

os.makedirs(IMAGES_OUT, exist_ok=True)
os.makedirs(LABELS_OUT, exist_ok=True)

# Create classes.txt
NEW_CLASSES = ['Hardhat', 'Person', 'Safety Vest']
with open(os.path.join(OUTPUT_ROOT, "classes.txt"), "w") as f:
    for c in NEW_CLASSES:
        f.write(c + "\n")

OLD_NAMES = yolo.names

# Find all images recursively in input_images
files_list = glob.glob(os.path.join(IMAGES_DIR, "**", "*.*RECURSIVE_MATCH*"), recursive=True)
# Fix recursive glob if needed, simpler to just iterate os.walk or simple glob
files_list = []
for root, dirs, files in os.walk(IMAGES_DIR):
    for file in files:
        if file.lower().endswith(('.jpg', '.png', '.jpeg')):
            files_list.append(os.path.join(root, file))

print(f"Processing {len(files_list)} images...")

count = 0

for img_path in files_list:
    filename = os.path.basename(img_path)
    
    image = cv2.imread(img_path)
    if image is None: continue
    h, w = image.shape[:2]

    # Predict
    results = yolo.predict(img_path, conf=0.25, verbose=False)
    boxes = results[0].boxes

    if len(boxes) == 0:
        continue

    valid_lines = []
    
    for box in boxes:
        old_id = int(box.cls[0])
        old_name = OLD_NAMES[old_id]
        
        # --- FILTERING LOGIC ---
        new_id = -1
        if old_name == 'Hardhat': new_id = 0
        elif old_name == 'Person': new_id = 1
        elif old_name == 'Safety Vest': new_id = 2
        
        if new_id != -1:
            # Keep this box
            bx = box.xywhn[0].cpu().numpy()
            valid_lines.append(f"{new_id} {bx[0]:.6f} {bx[1]:.6f} {bx[2]:.6f} {bx[3]:.6f}")

    # Only save if we found valid objects
    if valid_lines:
        # 1. Copy Image
        shutil.copy(img_path, os.path.join(IMAGES_OUT, filename))
        
        # 2. Write Label
        txt_name = os.path.splitext(filename)[0] + ".txt"
        with open(os.path.join(LABELS_OUT, txt_name), "w") as f:
            f.write("\n".join(valid_lines))
            
        count += 1
        if count % 10 == 0: print(f"Saved {count} labeled images...")

print(f"Done! {count} valid images.")

In [None]:
# Step 5: Zip and Download
!zip -r auto_labeled_data.zip output_data

from google.colab import files
files.download('auto_labeled_data.zip')