### Uday Singh (22B1262) 
### Ankit Maurya (22B1266)

### Github Link: https://github.com/Uday1Singh/AI_Guard_Agent/tree/master

In [2]:
pip install deepface opencv-python speechrecognition pyttsx3

Collecting deepface
  Downloading deepface-0.0.95-py3-none-any.whl.metadata (35 kB)
Collecting opencv-python
  Downloading opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (19 kB)
Collecting speechrecognition
  Downloading SpeechRecognition-3.10.4-py2.py3-none-any.whl.metadata (28 kB)
Collecting pyttsx3
  Downloading pyttsx3-2.99-py3-none-any.whl.metadata (6.2 kB)
Collecting gdown>=3.10.1 (from deepface)
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Collecting Flask>=1.1.2 (from deepface)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting flask-cors>=4.0.1 (from deepface)
  Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl.metadata (5.5 kB)
Collecting mtcnn>=0.1.0 (from deepface)
  Downloading mtcnn-0.1.1-py3-none-any.whl.metadata (5.8 kB)
Collecting retina-face>=0.0.14 (from deepface)
  Downloading retina_face-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting fire>=0.4.0 (from deepface)
  Downloading fire-0.7.1-py3-none-any.whl.metadata (5

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torch 2.1.2 requires fsspec, which is not installed.
sqlalchemy 2.0.34 requires typing-extensions>=4.6.0, but you have typing-extensions 4.5.0 which is incompatible.


In [4]:
pip install pyaudio

Collecting pyaudioNote: you may need to restart the kernel to use updated packages.

  Downloading PyAudio-0.2.14-cp38-cp38-win_amd64.whl.metadata (2.7 kB)
Downloading PyAudio-0.2.14-cp38-cp38-win_amd64.whl (164 kB)
   ---------------------------------------- 0.0/164.1 kB ? eta -:--:--
   -- ------------------------------------- 10.2/164.1 kB ? eta -:--:--
   ---- ---------------------------------- 20.5/164.1 kB 217.9 kB/s eta 0:00:01
   ---- ---------------------------------- 20.5/164.1 kB 217.9 kB/s eta 0:00:01
   --------- ----------------------------- 41.0/164.1 kB 217.9 kB/s eta 0:00:01
   -------------- ------------------------ 61.4/164.1 kB 251.0 kB/s eta 0:00:01
   -------------------------------------  163.8/164.1 kB 653.6 kB/s eta 0:00:01
   -------------------------------------- 164.1/164.1 kB 578.3 kB/s eta 0:00:00
Installing collected packages: pyaudio
Successfully installed pyaudio-0.2.14


In [17]:
import tensorflow as tf
print(tf.__version__)

2.13.0


In [6]:
import os
import cv2
import numpy as np
from deepface import DeepFace

MODEL = "Facenet512"
DETECTOR = "retinaface"

# ------------------------------
# Generate augmented images
# ------------------------------
def augment_image(img):
    h, w = img.shape[:2]
    augmented = [img]

    # Flip horizontally
    augmented.append(cv2.flip(img, 1))

    # Brightness changes
    for alpha in [0.8, 1.2]:  # darker, brighter
        bright = cv2.convertScaleAbs(img, alpha=alpha, beta=0)
        augmented.append(bright)

    # Rotations
    for angle in [-10, 10]:
        M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0)
        rotated = cv2.warpAffine(img, M, (w, h))
        augmented.append(rotated)

    # Scaling (zoom in/out)
    for scale in [0.9, 1.1]:
        resized = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
        # center crop back to original size
        y1 = max(0, (resized.shape[0] - h)//2)
        x1 = max(0, (resized.shape[1] - w)//2)
        cropped = resized[y1:y1+h, x1:x1+w]
        cropped = cv2.resize(cropped, (w, h))
        augmented.append(cropped)

    return augmented


# ------------------------------
# Compute embedding for an image
# ------------------------------
def get_embedding_from_image(img):
    rep = DeepFace.represent(
        img_path=img,
        model_name=MODEL,
        detector_backend=DETECTOR,
        enforce_detection=False
    )
    emb = np.array(rep[0]["embedding"])
    return emb / np.linalg.norm(emb)


def get_embedding(img_path):
    img = cv2.imread(img_path)
    if img is None:
        raise ValueError(f"Cannot read {img_path}")

    augmented_images = augment_image(img)
    embeddings = []

    for aug in augmented_images:
        try:
            emb = get_embedding_from_image(aug)
            embeddings.append(emb)
        except Exception as e:
            print(f"⚠️ Augmentation skipped: {e}")
            continue

    # Average embedding across all augmentations
    if len(embeddings) == 0:
        raise ValueError("No valid embeddings generated.")
    mean_embedding = np.mean(embeddings, axis=0)
    return mean_embedding / np.linalg.norm(mean_embedding)


# ------------------------------
# Compute embeddings for all images
# ------------------------------
def compute_all(folder):
    embeddings, names = [], []
    for f in os.listdir(folder):
        if f.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
            path = os.path.join(folder, f)
            try:
                emb = get_embedding(path)
                embeddings.append(emb)
                names.append(f)
                print(f"✅ {f} processed with augmentation.")
            except Exception as e:
                print(f"⚠️ {f} skipped: {e}")
    return np.array(embeddings), names


# ------------------------------
# Main
# ------------------------------
if __name__ == "__main__":
    trusted_embeddings, _ = compute_all("trusted_faces")
    random_embeddings, _ = compute_all("random_faces")

    np.savez("embeddings.npz", trusted=trusted_embeddings, random=random_embeddings)
    print("✅ Embeddings saved to embeddings.npz")


✅ Uday_Singh.jpg processed with augmentation.
✅ 1.jpg processed with augmentation.
✅ 2.jpg processed with augmentation.
✅ 3.jpg processed with augmentation.
✅ 4.webp processed with augmentation.
✅ 5.webp processed with augmentation.
✅ 6.jpeg processed with augmentation.
✅ Embeddings saved to embeddings.npz


In [7]:
trusted_embeddings

array([[ 4.28407786e-02,  9.43758612e-02,  6.14525407e-03,
         2.22708325e-02,  3.98800564e-02, -1.84416585e-02,
         4.60061828e-02,  3.10852949e-03,  3.24337335e-02,
        -2.53494142e-02, -1.30162903e-02, -2.92052044e-02,
        -8.23839804e-02,  4.81789380e-02,  6.19577349e-02,
        -3.23829688e-02,  3.92418648e-02,  6.64720354e-02,
        -2.20090428e-02,  2.63145139e-03, -8.79192306e-02,
        -1.29556966e-02,  4.39562625e-02, -2.71157954e-02,
        -1.21709071e-02, -2.27222263e-02, -4.69552221e-03,
        -6.93889172e-02,  3.33963229e-03, -2.03577709e-02,
        -3.61025960e-02,  2.19211477e-02, -9.23424928e-03,
         2.89601424e-02,  3.44935050e-02,  2.58279721e-02,
         2.75875055e-02, -2.45519243e-02, -1.94094841e-02,
         2.82676615e-02,  1.39163203e-02, -1.36141110e-02,
        -1.68047280e-02, -1.30647817e-02, -3.71060843e-02,
        -6.41474148e-02, -4.89022878e-02,  9.07705282e-02,
        -7.63883167e-02, -3.54672826e-03,  2.05125787e-0

In [1]:
pip install openai

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install pygame

Defaulting to user installation because normal site-packages is not writeable
Collecting pygame
  Using cached pygame-2.6.1-cp313-cp313-win_amd64.whl.metadata (13 kB)
Downloading pygame-2.6.1-cp313-cp313-win_amd64.whl (10.6 MB)
   ---------------------------------------- 0.0/10.6 MB ? eta -:--:--
   ------- -------------------------------- 2.1/10.6 MB 11.2 MB/s eta 0:00:01
   ----------- ---------------------------- 3.1/10.6 MB 10.6 MB/s eta 0:00:01
   ------------------- -------------------- 5.2/10.6 MB 9.0 MB/s eta 0:00:01
   ------------------------- -------------- 6.8/10.6 MB 8.5 MB/s eta 0:00:01
   -------------------------------- ------- 8.7/10.6 MB 8.5 MB/s eta 0:00:01
   ---------------------------------------  10.5/10.6 MB 8.6 MB/s eta 0:00:01
   ---------------------------------------- 10.6/10.6 MB 8.2 MB/s  0:00:01
Installing collected packages: pygame
Successfully installed pygame-2.6.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install -U google-genai


Defaulting to user installation because normal site-packages is not writeable
Collecting google-genai
  Using cached google_genai-1.45.0-py3-none-any.whl.metadata (45 kB)
Downloading google_genai-1.45.0-py3-none-any.whl (238 kB)
Downloading google_genai-1.45.0-py3-none-any.whl (238 kB)
Downloading google_genai-1.45.0-py3-none-any.whl (238 kB)
Installing collected packages: google-genai
Successfully installed google-genai-1.45.0
Note: you may need to restart the kernel to use updated packages.




In [2]:
import cv2
import numpy as np
import time
import datetime
import os
import speech_recognition as sr
import threading
import pygame
from deepface import DeepFace
import difflib
from gtts import gTTS
import tempfile
from google import genai

# ========== GEMINI API KEY ==========

def query_gemini(prompt):
    # Initialize GenAI client with your API key
    client = genai.Client(api_key="AIzaSyDIFttbAx7aMskpQtroxB7SsMRBpL15Q_Y")

    # Query Gemini 2.5 Pro
    response = client.models.generate_content(
        model="gemini-2.5-pro",
        contents="Generate a very short, polite but firm verbal warning for an unknown intruder detected in a private room. Give only what to say as it will be read aloud as it is."
    )
    return response.text

# Initialize pygame mixer once
pygame.mixer.init()
buzzer_sound = pygame.mixer.Sound("buzzer.mp3")  # Ensure this mp3 file exists locally
buzzer_playing = False

def start_buzzer():
    global buzzer_playing
    if not buzzer_playing:
        buzzer_playing = True
        buzzer_sound.play(-1)

def stop_buzzer():
    global buzzer_playing
    if buzzer_playing:
        buzzer_sound.stop()
        buzzer_playing = False

def speak(text):
    def _play_audio():
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
            filename = tmp.name
        tts = gTTS(text=text, lang='en')
        tts.save(filename)
        pygame.mixer.music.load(filename)
        pygame.mixer.music.play()
        while pygame.mixer.music.get_busy():
            pygame.time.Clock().tick(10)
        for _ in range(5):
            try:
                os.remove(filename)
                break
            except PermissionError:
                time.sleep(0.1)
    threading.Thread(target=_play_audio, daemon=True).start()

def cosine_sim(a, b):
    return np.dot(a, b) / (np.linalg.norm(a)*np.linalg.norm(b))

# Load embeddings and calculate threshold
data = np.load("embeddings.npz")
trusted_embeddings = data["trusted"]
random_embeddings = data["random"]
trusted_centroid = np.mean(trusted_embeddings, axis=0)
trusted_centroid /= np.linalg.norm(trusted_centroid)
trusted_sims = [cosine_sim(trusted_centroid, t) for t in trusted_embeddings]
random_sims = [cosine_sim(trusted_centroid, r) for r in random_embeddings]
THRESHOLD = (np.mean(trusted_sims) + np.mean(random_sims))/2
print(f"🔹 Auto-calibrated threshold: {THRESHOLD:.3f}")

def wait_for_activation():
    recognizer = sr.Recognizer()
    mic = sr.Microphone()
    target_phrase = "guard my room"
    print("🎙️ Say 'Guard my room' to activate.")
    speak("Say guard my room to activate.")
    while True:
        with mic as source:
            recognizer.adjust_for_ambient_noise(source)
            try:
                audio = recognizer.listen(source, timeout=5)
                command = recognizer.recognize_google(audio).lower()
                print(f"🗣️ Heard: {command}")
                if difflib.get_close_matches(command, [target_phrase], n=1, cutoff=0.6):
                    speak("Guarding mode activated.")
                    print("🛡️ Guarding mode ON")
                    return
                else:
                    print("🤔 Didn't match activation phrase.")
            except (sr.WaitTimeoutError, sr.UnknownValueError):
                pass
            except Exception as e:
                print(f"⚠️ Recognizer error: {e}")

def start_guarding():
    cap = cv2.VideoCapture(0)
    frame_count = 0
    unknown_start_time = None
    last_unknown_save = 0
    SAVE_COOLDOWN = 10
    UNKNOWN_ALERT_TIME = 5
    unknown_dir = "unknown_faces"
    os.makedirs(unknown_dir, exist_ok=True)

    print("🎥 Camera active. Press 'q' to quit.")

    while True:
        ret, frame = cap.read()
        if not ret:
            continue

        frame_count += 1
        if frame_count % 3 != 0:
            continue

        frame_resized = cv2.resize(frame, (480, 360))
        try:
            detections = DeepFace.extract_faces(
                img_path=frame_resized,
                detector_backend="opencv",
                enforce_detection=False
            )
        except Exception as e:
            print(f"⚠️ Face extraction error: {e}")
            detections = []

        intruder_found = False

        for det in detections:
            face = det.get("face")
            area = det.get("facial_area", {})
            if face is None or not area:
                continue

            x, y, w, h = int(area.get("x", 0)), int(area.get("y", 0)), int(area.get("w", 0)), int(area.get("h", 0))

            try:
                rep = DeepFace.represent(
                    img_path=face,
                    model_name="Facenet512",
                    detector_backend="skip",
                    enforce_detection=False
                )
                emb = np.array(rep[0]["embedding"])
                emb /= np.linalg.norm(emb)
            except Exception as e:
                print(f"⚠️ Embedding extraction error: {e}")
                continue

            sim = cosine_sim(emb, trusted_centroid)
            now = time.time()

            if sim > THRESHOLD:
                label = f"TRUSTED ({sim:.2f})"
                color = (0, 255, 0)
                unknown_start_time = None
                stop_buzzer()
            else:
                label = f"UNKNOWN ({sim:.2f})"
                color = (0, 0, 255)
                intruder_found = True
                if unknown_start_time is None:
                    unknown_start_time = now
                elif now - unknown_start_time > UNKNOWN_ALERT_TIME:
                    if now - last_unknown_save > SAVE_COOLDOWN:
                        ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
                        fname = os.path.join(unknown_dir, f"unknown_{ts}.jpg")

                        # Convert face to BGR uint8 for saving
                        face_bgr = cv2.cvtColor((face * 255).astype(np.uint8), cv2.COLOR_RGB2BGR)
                        cv2.imwrite(fname, face_bgr)
                        print(f"💾 Unknown face saved: {fname}")

                        if 0:
                            warning_msg = query_gemini("Generate a very short, polite but firm verbal warning for an unknown intruder detected in a private room. Give only what to say as it will be read aloud as it is.")
                        else:
                            warning_msg = "Warning! You are not authorized to be here. Please leave immediately. Owner has been notified and you are being recorded."
                        print("🤖 LLM warning:", warning_msg)

                        speak(warning_msg)
                        start_buzzer()
                        last_unknown_save = now

            # Draw bounding box and label on original frame (resized, so coordinates must fit)
            cv2.rectangle(frame_resized, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame_resized, label, (x, max(y - 10, 0)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

        if not intruder_found:
            unknown_start_time = None
            stop_buzzer()

        cv2.imshow("Face Verification", frame_resized)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    stop_buzzer()

if __name__ == "__main__":
    wait_for_activation()
    start_guarding()


🔹 Auto-calibrated threshold: 0.603
🎙️ Say 'Guard my room' to activate.
🗣️ Heard: guard my room
🛡️ Guarding mode ON
🎥 Camera active. Press 'q' to quit.
💾 Unknown face saved: unknown_faces\unknown_20251018_230952.jpg
💾 Unknown face saved: unknown_faces\unknown_20251018_231002.jpg
