In [2]:
#using resnet
import os
import cv2
import numpy as np
def generate_dataset():
    # Load the pre-trained ResNet-based face detection model
    model_path = r"C:\Users\OMEN\Desktop\PJT\deploy.prototxt"
    weights_path = r"C:\Users\OMEN\Desktop\PJT\res10_300x300_ssd_iter_140000.caffemodel"
    face_net = cv2.dnn.readNetFromCaffe(model_path, weights_path)
    
    # Function to crop detected face
    def face_cropped(img):
        h, w = img.shape[:2]
        blob = cv2.dnn.blobFromImage(img, 1.0, (300, 300), (104.0, 177.0, 123.0))
        face_net.setInput(blob)
        detections = face_net.forward()
        
        for i in range(detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            if confidence > 0.7:  # Confidence threshold
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (x, y, x1, y1) = box.astype("int")
                cropped_face = img[y:y1, x:x1]
                return cropped_face
        return None
    
    # Initialize video capture from webcam
    cap = cv2.VideoCapture(0)
    user_id = "Joshua"  # Set a unique user ID
    img_id = 0   # Counter for the number of images captured
    
    # Specify the target directory for saving images
    target_dir = r"C:\Users\OMEN\Desktop\PJT\images1"
    
    # Create the directory if it doesn't exist
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    
    print("Starting image capture. Press 'Enter' to stop.")
    
    while True:
        ret, frame = cap.read()
        
        if not ret:
            print("Failed to capture image. Exiting...")
            break
        
        cropped_face = face_cropped(frame)
        if cropped_face is not None:
            img_id += 1
            # Resize and convert face to grayscale
            face = cv2.resize(cropped_face, (200, 200))
            face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
            
            # Save the face image in the specified directory
            file_name_path = os.path.join(target_dir, f"user.{user_id}.{img_id}.jpg")
            cv2.imwrite(file_name_path, face)
            
            # Display the face with ID on the screen
            cv2.putText(face, str(img_id), (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)
            cv2.imshow("Cropped Face", face)
        
        # Stop if 'Enter' key is pressed or 500 images are captured
        if cv2.waitKey(1) == 13 or img_id == 500:  # ASCII 13 is the 'Enter' key
            break
    
    cap.release()
    cv2.destroyAllWindows()
    print("Image capture complete. Data saved to:", target_dir)



# Call the function to start dataset generation
generate_dataset()

Starting image capture. Press 'Enter' to stop.
Image capture complete. Data saved to: C:\Users\OMEN\Desktop\PJT\images1


In [1]:
import os
import cv2
import numpy as np
import pickle
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from keras_facenet import FaceNet  # Importing keras-facenet library
import tensorflow

# Load FaceNet using keras-facenet
facenet_model = FaceNet()

# Paths to ResNet face detection model files
model_path = r"C:\Users\OMEN\Desktop\PJT\deploy.prototxt"
weights_path = r"C:\Users\OMEN\Desktop\PJT\res10_300x300_ssd_iter_140000.caffemodel"

# Load ResNet-based face detector
face_net = cv2.dnn.readNetFromCaffe(model_path, weights_path)

def extract_face_features(image_path):
    """Extract face features using FaceNet."""
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Cannot read image at path: {image_path}")
    
    h, w = image.shape[:2]
    blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), (104.0, 177.0, 123.0))
    face_net.setInput(blob)
    detections = face_net.forward()

    features = []
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.7:  # Confidence threshold
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (x, y, x1, y1) = box.astype("int")
            face = image[y:y1, x:x1]  # Extract the face
            if face.shape[0] == 0 or face.shape[1] == 0:
                continue
            face_resized = cv2.resize(face, (160, 160))  # Resize to FaceNet input size
            face_array = np.expand_dims(face_resized, axis=0)  # Add batch dimension
            embeddings = facenet_model.embeddings(face_array)  # Get FaceNet embeddings
            features.append(embeddings[0])  # Append the first (and only) embedding
    return features

def train_classifier(data_dir):
    """Train a classifier using FaceNet features."""
    features = []
    labels = []
    valid_extensions = (".jpg", ".jpeg", ".png", ".bmp", ".tiff")
    
    # Process all images in the directory
    for file_name in os.listdir(data_dir):
        if not file_name.lower().endswith(valid_extensions):
            continue
        image_path = os.path.join(data_dir, file_name)
        label = file_name.split(".")[1]  # Extract label from file name
        try:
            face_features = extract_face_features(image_path)
            for feature in face_features:
                features.append(feature)
                labels.append(label)
        except Exception as e:
            print(f"Error processing {image_path}: {e}")
    
    # Ensure at least two unique labels for training
    print("Labels found:", set(labels))
    if len(set(labels)) <= 1:
        raise ValueError("The dataset must contain at least two unique classes.")
    
    # Encode string labels into integers
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)
    
    # Train/test split
    X_train, X_test, y_train, y_test = train_test_split(
        features, labels_encoded, test_size=0.2, random_state=42
    )
    
    # Train an SVM classifier
    clf = SVC(kernel='rbf', probability=True)  # Use RBF kernel
    clf.fit(X_train, y_train)
    
    # Save the trained classifier and label encoder
    with open("classifier.pkl", "wb") as model_file:
        pickle.dump(clf, model_file)
    
    with open("label_encoder.pkl", "wb") as encoder_file:
        pickle.dump(label_encoder, encoder_file)
    
    print("Model trained and saved successfully.")
    print(f"Training accuracy: {clf.score(X_train, y_train):.2f}")
    print(f"Test accuracy: {clf.score(X_test, y_test):.2f}")

# Directory containing images (update path accordingly)
data_directory = r"C:\Users\OMEN\Desktop\PJT\images1"

# Train the classifier
train_classifier(data_directory)





Labels found: {'Athul', 'Dawn', 'Joshua', 'Sura'}
Model trained and saved successfully.
Training accuracy: 1.00
Test accuracy: 1.00


In [4]:
import tensorflow as tf
from keras.models import Model
from keras.layers import Activation, BatchNormalization
import os
import cv2
import numpy as np
import pickle
from keras_facenet import FaceNet  # Importing keras-facenet library

# Load the FaceNet model using keras-facenet
facenet_model = FaceNet()

def preprocess_face(image, target_size=(160, 160)):
    """Preprocess the face for FaceNet."""
    image_resized = cv2.resize(image, target_size)  # Resize to FaceNet input size
    return np.expand_dims(image_resized.astype("float32"), axis=0)  # Add batch dimension

def draw_boundary_with_resnet(img, face_net, threshold, clf, label_encoder):
    """Detect faces using ResNet and draw boundaries with predictions."""
    h, w = img.shape[:2]
    blob = cv2.dnn.blobFromImage(img, scalefactor=1.0, size=(300, 300), mean=(104.0, 177.0, 123.0))
    face_net.setInput(blob)
    detections = face_net.forward()

    results = []  # To store results for each face

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > threshold:  # Filter out low-confidence detections
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            
            # Ensure the box is within the frame
            startX, startY = max(0, startX), max(0, startY)
            endX, endY = min(w, endX), min(h, endY)

            face = img[startY:endY, startX:endX]
            try:
                # Preprocess the face for prediction
                face_preprocessed = preprocess_face(face)
                embeddings = facenet_model.embeddings(face_preprocessed)  # Get FaceNet embeddings

                # Predict using SVM classifier
                probabilities = clf.predict_proba(embeddings)
                predicted_confidence = np.max(probabilities) * 100
                predicted_label = np.argmax(probabilities)
                name = label_encoder.inverse_transform([predicted_label])[0]

                # If confidence is above a threshold, show bounding box and label
                if predicted_confidence > 70:
                    cv2.rectangle(img, (startX, startY), (endX, endY), (255, 255, 255), 2)
                    cv2.putText(
                        img, f"{name}: {predicted_confidence:.2f}%", 
                        (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2
                    )
                    results.append(f"Face: {name}, Confidence: {predicted_confidence:.2f}%")
            except Exception as e:
                print(f"Error processing face: {e}")
    
    # Print all results for detected faces
    if results:
        print("\n".join(results))
    
    return img

# Load the ResNet-based face detection model
prototxt_path = r"C:\Users\OMEN\Desktop\PJT\deploy.prototxt"
model_path = r"C:\Users\OMEN\Desktop\PJT\res10_300x300_ssd_iter_140000.caffemodel"
face_net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path)

# Load the SVM classifier and label encoder
with open("classifier.pkl", "rb") as model_file:
    clf = pickle.load(model_file)
with open("label_encoder.pkl", "rb") as encoder_file:
    label_encoder = pickle.load(encoder_file)

# Start video capture
video_capture = cv2.VideoCapture(0)

while True:
    ret, img = video_capture.read()
    if not ret:
        print("Failed to capture video frame.")
        break

    img = draw_boundary_with_resnet(img, face_net, threshold=0.7, clf=clf, label_encoder=label_encoder)
    cv2.imshow("Face Detection", img)
    
    if cv2.waitKey(1) == 13:  # Press Enter to exit
        break

video_capture.release()
cv2.destroyAllWindows()

Face: Joshua, Confidence: 97.70%
Face: Joshua, Confidence: 95.45%
Face: Joshua, Confidence: 94.30%
Face: Joshua, Confidence: 96.37%
Face: Joshua, Confidence: 96.93%
Face: Joshua, Confidence: 97.59%
Face: Joshua, Confidence: 96.66%
Face: Joshua, Confidence: 96.25%
Face: Joshua, Confidence: 96.06%
Face: Joshua, Confidence: 95.46%
Face: Joshua, Confidence: 98.46%
Face: Joshua, Confidence: 97.08%
Face: Joshua, Confidence: 95.82%
Face: Joshua, Confidence: 98.44%
Face: Joshua, Confidence: 98.47%
Face: Joshua, Confidence: 98.86%
Face: Joshua, Confidence: 94.00%
Face: Joshua, Confidence: 94.13%
Face: Joshua, Confidence: 96.62%
Face: Joshua, Confidence: 93.66%
Face: Joshua, Confidence: 83.72%
Face: Joshua, Confidence: 92.95%
Face: Joshua, Confidence: 96.55%
Face: Joshua, Confidence: 86.41%
Face: Joshua, Confidence: 94.64%
Face: Joshua, Confidence: 94.64%
Face: Joshua, Confidence: 94.53%
Face: Joshua, Confidence: 98.14%
Face: Joshua, Confidence: 88.44%
Face: Joshua, Confidence: 94.03%
Face: Josh

In [5]:
import os
import cv2
import numpy as np
import torch
import pickle
from keras_facenet import FaceNet  # Import FaceNet for facial recognition
from ultralytics import YOLO

# Load FaceNet for face embeddings
facenet_model = FaceNet()

# Load YOLOv11 model for weapon detection
#yolo_model_path = r"C:\Users\OMEN\Desktop\PJT\best (1).pt"
model = YOLO(r"C:\Users\OMEN\Desktop\PJT\best.pt")

# Load ResNet-based face detection model
prototxt_path = r"C:\Users\OMEN\Desktop\PJT\deploy.prototxt"
model_path = r"C:\Users\OMEN\Desktop\PJT\res10_300x300_ssd_iter_140000.caffemodel"
face_net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path)

# Load the trained SVM model for face recognition
with open("classifier.pkl", "rb") as model_file:
    clf = pickle.load(model_file)
with open("label_encoder.pkl", "rb") as encoder_file:
    label_encoder = pickle.load(encoder_file)

def preprocess_face(image, target_size=(160, 160)):
    """Preprocess the face for FaceNet."""
    image_resized = cv2.resize(image, target_size)  # Resize to FaceNet input size
    return np.expand_dims(image_resized.astype("float32"), axis=0)  # Add batch dimension

def detect_faces_and_recognize(img):
    """Detect faces and recognize prisoners."""
    h, w = img.shape[:2]
    blob = cv2.dnn.blobFromImage(img, scalefactor=1.0, size=(300, 300), mean=(104.0, 177.0, 123.0))
    face_net.setInput(blob)
    detections = face_net.forward()

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.7:  # Confidence threshold
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            
            # Ensure bounding box is within frame
            startX, startY = max(0, startX), max(0, startY)
            endX, endY = min(w, endX), min(h, endY)

            face = img[startY:endY, startX:endX]
            try:
                face_preprocessed = preprocess_face(face)
                embeddings = facenet_model.embeddings(face_preprocessed)  # Get embeddings

                # Predict prisoner identity using SVM classifier
                probabilities = clf.predict_proba(embeddings)
                predicted_confidence = np.max(probabilities) * 100
                predicted_label = np.argmax(probabilities)
                name = label_encoder.inverse_transform([predicted_label])[0]

                # Display bounding box if confidence is high
                if predicted_confidence > 70:
                    cv2.rectangle(img, (startX, startY), (endX, endY), (0, 255, 0), 2)
                    cv2.putText(
                        img, f"{name}: {predicted_confidence:.2f}%", 
                        (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2
                    )
            except Exception as e:
                print(f"Error processing face: {e}")

def detect_weapons(img):
    """Detect weapons using YOLO."""
    results = model.predict(img)  # Run YOLO model on the frame
    for result in results[0].boxes:
        # Extract bounding box, confidence, and class
        box = result.xyxy[0]  # Bounding box (x1, y1, x2, y2)
        conf = result.conf[0]  # Confidence score
        cls = result.cls[0]  # Class index

        if conf > 0.5:  # Confidence threshold
            label = model.names[int(cls)]  # Get class label (e.g., handgun/knife)
            color = (0, 0, 255)  # Red for weapons

            # Draw bounding box
            cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, 2)
            cv2.putText(
                img, f"{label}: {conf:.2f}", 
                (int(box[0]), int(box[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2
            )
    return img




# Start video capture
video_capture = cv2.VideoCapture(0)

while True:
    ret, img = video_capture.read()
    if not ret:
        print("Failed to capture video frame.")
        break

    # Run both detections in parallel
    detect_faces_and_recognize(img)
    detect_weapons(img)

    # Display results
    cv2.imshow("PRISONSECURE: Real-Time Face & Weapon Detection", img)
    
    if cv2.waitKey(1) == 13:  # Press Enter to exit
        break

video_capture.release()
cv2.destroyAllWindows()


0: 576x736 (no detections), 102.1ms
Speed: 8.6ms preprocess, 102.1ms inference, 39.1ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 14.9ms
Speed: 3.2ms preprocess, 14.9ms inference, 0.9ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 16.4ms
Speed: 3.3ms preprocess, 16.4ms inference, 0.9ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 12.9ms
Speed: 3.5ms preprocess, 12.9ms inference, 0.7ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 15.6ms
Speed: 2.9ms preprocess, 15.6ms inference, 0.7ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 12.3ms
Speed: 2.9ms preprocess, 12.3ms inference, 0.6ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 11.7ms
Speed: 2.6ms preprocess, 11.7ms inference, 0.6ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 12.2ms
Speed: 2.7ms preprocess, 12.2m

In [None]:
import os
import cv2
import numpy as np
import torch
import pickle
import threading
import simpleaudio as sa  # Plays sound without external players
from pydub import AudioSegment
from pydub.playback import play
from keras_facenet import FaceNet
from ultralytics import YOLO

# Load FaceNet for face embeddings
facenet_model = FaceNet()

# Load YOLOv11 model for weapon detection
model = YOLO(r"C:\Users\OMEN\Desktop\PJT\best.pt")

# Load ResNet-based face detection model
prototxt_path = r"C:\Users\OMEN\Desktop\PJT\deploy.prototxt"
model_path = r"C:\Users\OMEN\Desktop\PJT\res10_300x300_ssd_iter_140000.caffemodel"
face_net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path)

# Load the trained SVM model for face recognition
with open("classifier.pkl", "rb") as model_file:
    clf = pickle.load(model_file)
with open("label_encoder.pkl", "rb") as encoder_file:
    label_encoder = pickle.load(encoder_file)

# Paths to alert sounds (Update these paths after uploading MP3 files)
gun_sound_path = r"C:\Users\OMEN\Desktop\PJT\alert-GUN.mp3"
knife_sound_path = r"C:\Users\OMEN\Desktop\PJT\alarm-KNIFE.mp3"

# Convert MP3 files to WAV format for playback control
gun_sound = AudioSegment.from_mp3(gun_sound_path)
knife_sound = AudioSegment.from_mp3(knife_sound_path)

# Flags & tracking variables
gun_detected = False
knife_detected = False
current_sound = None  # Track current playing sound
stop_event = threading.Event()  # To stop the sound when needed

def play_sound(sound, duration=5):
    """Plays sound for a fixed duration and allows stopping when needed."""
    global current_sound, stop_event
    
    # Convert sound to raw data
    sound = sound[:duration * 1000]  # Trim sound to required duration
    raw_data = sound.raw_data
    sample_rate = sound.frame_rate
    num_channels = sound.channels
    sample_width = sound.sample_width
    
    # Stop previous sound
    stop_sound()

    # Create new playable object
    current_sound = sa.play_buffer(raw_data, num_channels, sample_width, sample_rate)

def stop_sound():
    """Stops the currently playing sound."""
    global current_sound
    if current_sound:
        current_sound.stop()
        current_sound = None

def detect_weapons(img):
    """Detects weapons, triggers alerts, and stops previous alerts when necessary."""
    global gun_detected, knife_detected
    results = model.predict(img)

    gun_found = False
    knife_found = False

    for result in results[0].boxes:
        box = result.xyxy[0]
        conf = result.conf[0]
        cls = result.cls[0]

        if conf > 0.6:  # Play only if confidence > 0.6
            label = model.names[int(cls)]
            color = (0, 0, 255)

            cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, 2)
            cv2.putText(
                img, f"{label}: {conf:.2f}",
                (int(box[0]), int(box[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2
            )

            # Track detections
            if "handgun" in label.lower():
                gun_found = True
            elif "knife" in label.lower():
                knife_found = True

    # Handle different cases
    if gun_found and knife_found:
        # If both detected, alternate between sounds (1s gun, 1s knife)
        stop_sound()
        threading.Thread(target=play_toggle_alert, daemon=True).start()
    elif gun_found and not gun_detected:
        stop_sound()
        threading.Thread(target=play_sound, args=(gun_sound,), daemon=True).start()
        gun_detected, knife_detected = True, False
    elif knife_found and not knife_detected:
        stop_sound()
        threading.Thread(target=play_sound, args=(knife_sound,), daemon=True).start()
        knife_detected, gun_detected = True, False
    elif not gun_found and not knife_found:
        stop_sound()
        gun_detected, knife_detected = False, False

    return img

def play_toggle_alert():
    """Plays alternating sounds when both weapons are detected."""
    for _ in range(5):  # Play for 5 seconds
        if stop_event.is_set():
            break
        play_sound(gun_sound, duration=1)
        if stop_event.is_set():
            break
        play_sound(knife_sound, duration=1)

# Start video capture
video_capture = cv2.VideoCapture(0)

while True:
    ret, img = video_capture.read()
    if not ret:
        print("Failed to capture video frame.")
        break

    detect_weapons(img)

    cv2.imshow("PRISONSECURE: Real-Time Weapon Detection", img)

    # Stop everything when "Enter" is pressed
    if cv2.waitKey(1) == 13:  # 13 is the Enter key
        stop_event.set()  # Stop any ongoing alert sounds
        stop_sound()
        break

video_capture.release()
cv2.destroyAllWindows()





0: 576x736 (no detections), 77.2ms
Speed: 4.4ms preprocess, 77.2ms inference, 26.2ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 25.7ms
Speed: 3.6ms preprocess, 25.7ms inference, 0.8ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 12.7ms
Speed: 3.5ms preprocess, 12.7ms inference, 0.6ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 14.1ms
Speed: 3.4ms preprocess, 14.1ms inference, 0.6ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 12.1ms
Speed: 3.6ms preprocess, 12.1ms inference, 0.6ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 19.1ms
Speed: 4.5ms preprocess, 19.1ms inference, 1.0ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 14.8ms
Speed: 3.9ms preprocess, 14.8ms inference, 0.7ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 17.1ms
Speed: 3.4ms preprocess, 17.1

In [2]:
import os
import cv2
import numpy as np
import torch
import pickle
import threading
import time
import simpleaudio as sa
from pydub import AudioSegment
from keras_facenet import FaceNet
from ultralytics import YOLO

# ————— Model Loading —————
# FaceNet for face embeddings
facenet_model = FaceNet()

# YOLOv11 for weapon detection
model = YOLO(r"C:\Users\OMEN\Desktop\PJT\best.pt")

# ResNet-based face detector (OpenCV DNN)
prototxt_path = r"C:\Users\OMEN\Desktop\PJT\deploy.prototxt"
caffemodel_path = r"C:\Users\OMEN\Desktop\PJT\res10_300x300_ssd_iter_140000.caffemodel"
face_net = cv2.dnn.readNetFromCaffe(prototxt_path, caffemodel_path)

# SVM classifier and label encoder for face recognition
with open("classifier.pkl", "rb") as f:
    clf = pickle.load(f)
with open("label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

# ————— Alert Sound Setup —————
alert_sound = AudioSegment.from_mp3(r"C:\Users\OMEN\Desktop\PJT\alert-GUN.mp3")

# ————— Globals for Alert Control —————
alert_active = False
stop_event = threading.Event()
play_obj = None

def play_alert():
    """Play a 5-second alert sound unless stopped by stop_event."""
    global alert_active, play_obj
    stop_event.clear()
    segment = alert_sound[:5 * 1000]  # first 5 seconds
    raw_data = segment.raw_data

    # Start playback (non-blocking)
    play_obj = sa.play_buffer(
        raw_data,
        num_channels=segment.channels,
        bytes_per_sample=segment.sample_width,
        sample_rate=segment.frame_rate
    )

    start_time = time.time()
    while time.time() - start_time < 5:
        if stop_event.is_set():
            play_obj.stop()
            break
        time.sleep(0.1)

    alert_active = False

def preprocess_face(image, target_size=(160, 160)):
    """Resize and prepare face for FaceNet."""
    img_resized = cv2.resize(image, target_size)
    return np.expand_dims(img_resized.astype("float32"), axis=0)

def detect_faces_and_recognize(img):
    """Detect faces, compute embeddings, and classify identities."""
    h, w = img.shape[:2]
    blob = cv2.dnn.blobFromImage(img, 1.0, (300, 300), (104, 177, 123))
    face_net.setInput(blob)
    detections = face_net.forward()

    for i in range(detections.shape[2]):
        conf = detections[0, 0, i, 2]
        if conf > 0.7:
            box = (detections[0, 0, i, 3:7] * np.array([w, h, w, h])).astype(int)
            startX, startY, endX, endY = box
            startX, startY = max(0, startX), max(0, startY)
            endX, endY = min(w, endX), min(h, endY)

            face = img[startY:endY, startX:endX]
            try:
                face_input = preprocess_face(face)
                embeddings = facenet_model.embeddings(face_input)
                probs = clf.predict_proba(embeddings)[0]
                idx = np.argmax(probs)
                confidence = probs[idx] * 100
                name = label_encoder.inverse_transform([idx])[0]

                if confidence > 70:
                    cv2.rectangle(img, (startX, startY), (endX, endY), (0, 255, 0), 2)
                    cv2.putText(
                        img, f"{name}: {confidence:.1f}%",
                        (startX, startY - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2
                    )
            except Exception as e:
                print(f"Face recognition error: {e}")

def detect_weapons(img):
    """Detect weapons and trigger a single 5s alert if not already active."""
    global alert_active
    results = model.predict(img)
    weapon_found = False

    for box in results[0].boxes:
        conf = float(box.conf[0])
        if conf > 0.5:
            weapon_found = True
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
            label = model.names[int(box.cls[0])]
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2)
            cv2.putText(
                img, f"{label}: {conf:.2f}",
                (x1, y1 - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2
            )

    if weapon_found and not alert_active:
        alert_active = True
        threading.Thread(target=play_alert, daemon=True).start()

    return img

# ————— Main Loop —————
video_capture = cv2.VideoCapture(0)
while True:
    ret, frame = video_capture.read()
    if not ret:
        print("Failed to capture frame.")
        break

    detect_faces_and_recognize(frame)
    detect_weapons(frame)

    cv2.imshow("PRISONSECURE: Real-Time Detection", frame)
    key = cv2.waitKey(1)

    if key == 13:  # Enter key
        stop_event.set()  # stop any ongoing alert immediately
        break

video_capture.release()
cv2.destroyAllWindows()



0: 576x736 (no detections), 88.9ms
Speed: 6.4ms preprocess, 88.9ms inference, 41.3ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 11.8ms
Speed: 3.1ms preprocess, 11.8ms inference, 0.7ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 11.8ms
Speed: 3.2ms preprocess, 11.8ms inference, 0.7ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 12.2ms
Speed: 3.0ms preprocess, 12.2ms inference, 0.6ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 12.2ms
Speed: 3.0ms preprocess, 12.2ms inference, 0.7ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 12.0ms
Speed: 2.9ms preprocess, 12.0ms inference, 0.6ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 12.2ms
Speed: 3.1ms preprocess, 12.2ms inference, 0.6ms postprocess per image at shape (1, 3, 576, 736)

0: 576x736 (no detections), 11.9ms
Speed: 3.0ms preprocess, 11.9ms 