# Production Auto-Labeling Pipeline (Filtered)

**Input**: `2_processed_images/` (Extracted frames)
**Output**: `3_roboflow_upload/` (Ready for Roboflow)

**Logic**:
1.  Detects objects using YOLO + SAM.
2.  **FILTERS** strictly for: Hardhat (0), Person (1), Safety Vest (2).
3.  Ignores all other classes (Masks, Vehicles, Cones, etc.).
4.  Saves standard YOLO txt files and images.

In [None]:
# Step 1: Install Dependencies
!pip install ultralytics segment-anything opencv-python-headless roboflow

In [None]:
# Step 2: Setup Paths
import os
import shutil
from google.colab import drive

# Mount Drive
drive.mount('/content/drive')

# Define Root Path
REPO_ROOT = "/content/drive/MyDrive/Vision-Project-Prod-Pipeline"
os.chdir(REPO_ROOT)
print(f"Working Directory: {os.getcwd()}")

# Paths
MODEL_PATH = os.path.join(REPO_ROOT, "models/best.pt")
INPUT_DIR = os.path.join(REPO_ROOT, "2_processed_images")
OUTPUT_ROOT = os.path.join(REPO_ROOT, "3_roboflow_upload")
IMAGES_OUT = os.path.join(OUTPUT_ROOT, "images")
LABELS_OUT = os.path.join(OUTPUT_ROOT, "labels")

# Clean/Create Output Dirs
os.makedirs(IMAGES_OUT, exist_ok=True)
os.makedirs(LABELS_OUT, exist_ok=True)

# WRITE PURE CLASSES.TXT
NEW_CLASSES = ['Hardhat', 'Person', 'Safety Vest']
with open(os.path.join(OUTPUT_ROOT, "classes.txt"), "w") as f:
    for c in NEW_CLASSES:
        f.write(c + "\n")

In [None]:
# Step 3: Load Models
from ultralytics import YOLO
from segment_anything import sam_model_registry, SamPredictor
import torch

# Load YOLO
yolo = YOLO(MODEL_PATH)

# Load SAM
SAM_CHECKPOINT = "sam_vit_h_4b8939.pth"
if not os.path.exists(SAM_CHECKPOINT):
    !wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
sam = sam_model_registry["vit_h"](checkpoint=SAM_CHECKPOINT)
sam.to(device=DEVICE)
predictor = SamPredictor(sam)

print("Models Loaded!")

In [None]:
# Step 4: Run Filtered Auto-Labeling
import cv2
import glob
import numpy as np

# OLD CLASS MAP (From your current best.pt)
# 0:Hardhat, 1:Mask, 2:NO-Hardhat, 3:NO-Mask, 4:NO-Safety Vest, 5:Person, 6:Safety Cone, 7:Safety Vest, ...
OLD_NAMES = yolo.names

files = glob.glob(os.path.join(INPUT_DIR, "*"))
print(f"Processing {len(files)} images...")

count = 0

for img_path in files:
    filename = os.path.basename(img_path)
    if not filename.lower().endswith(('.jpg', '.png', '.jpeg')):
        continue
    
    image = cv2.imread(img_path)
    if image is None: continue
    h, w = image.shape[:2]

    # Predict
    results = yolo.predict(img_path, conf=0.25, verbose=False)
    boxes = results[0].boxes

    if len(boxes) == 0:
        continue

    valid_lines = []
    
    for box in boxes:
        old_id = int(box.cls[0])
        old_name = OLD_NAMES[old_id]
        
        # --- FILTERING LOGIC ---
        new_id = -1
        if old_name == 'Hardhat': new_id = 0
        elif old_name == 'Person': new_id = 1
        elif old_name == 'Safety Vest': new_id = 2
        
        if new_id != -1:
            # Keep this box
            bx = box.xywhn[0].cpu().numpy()
            valid_lines.append(f"{new_id} {bx[0]:.6f} {bx[1]:.6f} {bx[2]:.6f} {bx[3]:.6f}")

    # Only save if we found valid objects
    if valid_lines:
        # 1. Copy Image
        shutil.copy(img_path, os.path.join(IMAGES_OUT, filename))
        
        # 2. Write Label
        txt_name = os.path.splitext(filename)[0] + ".txt"
        with open(os.path.join(LABELS_OUT, txt_name), "w") as f:
            f.write("\n".join(valid_lines))
            
        count += 1
        if count % 10 == 0: print(f"Saved {count} labeled images...")

print(f"Done! {count} valid images ready for Roboflow in '3_roboflow_upload'.")