In [None]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In [22]:
import cv2
import torch
import numpy as np
from PIL import Image
from torchvision import transforms
from src.classifier.model import get_model
from src.classifier.utils import get_transforms


## Load Model & Transforms

In [25]:
# Setup path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
model_path = model_path = os.path.join(project_root, 'saved_models', 'best_model.pth')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model
model = get_model(num_classes=5)  # adjust if needed
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval().to(device)

# Load transforms
_, preprocess = get_transforms(224)

## Capture Image 

In [None]:
class_names = ['cardboard', 'glass', 'metal', 'plastic', 'trash']

cap = cv2.VideoCapture(1)
roi_box = (800, 500, 500, 500)  # (x, y, w, h)

print("Press 'q' to quit.")
while True:
    ret, frame = cap.read()
    if not ret:
        break

    x, y, w, h = roi_box
    roi = frame[y:y+h, x:x+w]
    
    # Convert ROI to PIL and preprocess
    img = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(img)
    input_tensor = preprocess(pil_img).unsqueeze(0).to(device)

    # Inference
    with torch.no_grad():
        outputs = model(input_tensor)
        _, pred = torch.max(outputs, 1)
        pred_class = class_names[pred.item()]

    # Draw ROI and prediction
    cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
    cv2.putText(frame, f"Pred: {pred_class}", (x, y-10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)

    # Show frame
    cv2.imshow('Webcam Inference', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

Press 'q' to quit.
