In [None]:
pip install tensorflow keras-facenet


In [None]:
import cv2
import mediapipe as mp
import os
import numpy as np
from keras_facenet import FaceNet
import subprocess  


embedder = FaceNet()


dataset_path = "picuus"


known_face_encodings = []
known_face_names = []


for filename in os.listdir(dataset_path):
    if filename.endswith(('.jpg', '.png', '.jpeg')):
        image_path = os.path.join(dataset_path, filename)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (160, 160))  
        face_encoding = embedder.embeddings([image])[0]  

        known_face_encodings.append(face_encoding)
        known_face_names.append(os.path.splitext(filename)[0])  

print(f"Loaded {len(known_face_names)} faces from dataset.")


mp_face_detection = mp.solutions.face_detection


video_capture = cv2.VideoCapture(0)
final_detected_name = "Unknown"

with mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.7) as face_detection:
    while video_capture.isOpened():
        ret, frame = video_capture.read()
        if not ret:
            break

        
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_detection.process(rgb_frame)

        if results.detections:
            for detection in results.detections:
                bbox = detection.location_data.relative_bounding_box
                h, w, _ = frame.shape
                x, y, w, h = int(bbox.xmin * w), int(bbox.ymin * h), int(bbox.width * w), int(bbox.height * h)

                
                x, y, w, h = max(x, 0), max(y, 0), min(w, frame.shape[1] - x), min(h, frame.shape[0] - y)
                face = frame[y:y+h, x:x+w]

                if face.size == 0:
                    continue  

                
                face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
                face = cv2.resize(face, (160, 160))  
                face_encoding = embedder.embeddings([face])[0]

                
                name = "Unknown"
                if len(known_face_encodings) > 0:
                    distances = np.linalg.norm(known_face_encodings - face_encoding, axis=1)
                    best_match = np.argmin(distances)

                    if distances[best_match] < 1.1:  
                        name = known_face_names[best_match]

                final_detected_name = name

                
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(frame, name, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)

                
                if name == "Aayush":
                    print(f"Recognized {name}, opening Jupyter Notebook...")
                    subprocess.run(["jupyter", "notebook", "pdf_extract.ipynb"])  
                    video_capture.release()
                    cv2.destroyAllWindows()
                    exit()  

        cv2.imshow("Face Recognition", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break


video_capture.release()
cv2.destroyAllWindows()

print(f"Final detected person: {final_detected_name}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [3]:
import cv2
import mediapipe as mp
import os
import numpy as np
from keras_facenet import FaceNet
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
from comtypes import CLSCTX_ALL

embedder = FaceNet()
dataset_path = "picuus"

known_face_encodings = []
known_face_names = []

for filename in os.listdir(dataset_path):
    if filename.endswith(('.jpg', '.png', '.jpeg')):
        image_path = os.path.join(dataset_path, filename)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (160, 160))  
        face_encoding = embedder.embeddings([image])[0]  
        known_face_encodings.append(face_encoding)
        known_face_names.append(os.path.splitext(filename)[0])

print(f"Loaded {len(known_face_names)} faces from dataset")


mp_face_detection = mp.solutions.face_detection
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = interface.QueryInterface(IAudioEndpointVolume)

video_capture = cv2.VideoCapture(0)
final_detected_name = "Unknown"
is_verified = False  

with mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.7) as face_detection:
    while video_capture.isOpened():
        ret, frame = video_capture.read()
        if not ret:
            break

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_detection.process(rgb_frame)

        if results.detections:
            for detection in results.detections:
                bbox = detection.location_data.relative_bounding_box
                h, w, _ = frame.shape
                
                
                box_x = int(bbox.xmin * w)
                box_y = int(bbox.ymin * h)
                box_w = int(bbox.width * w)
                box_h = int(bbox.height * h)

               
                box_x, box_y, box_w, box_h = max(box_x, 0), max(box_y, 0), min(box_w, frame.shape[1] - box_x), min(box_h, frame.shape[0] - box_y)
                face = frame[box_y:box_y+box_h, box_x:box_x+box_w]
                
                if face.shape[0] == 0 or face.shape[1] == 0:
                    continue
                
                face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
                face = cv2.resize(face, (160, 160))  
                face_encoding = embedder.embeddings([face])[0]
                
                name = "Unknown"
                if len(known_face_encodings) > 0:
                    distances = np.linalg.norm(np.array(known_face_encodings) - face_encoding, axis=1)
                    best_match = np.argmin(distances)

                    if distances[best_match] < 1.1: 
                        name = known_face_names[best_match]
                        is_verified = name == "Aayush" 

                final_detected_name = name

                cv2.rectangle(frame, (box_x, box_y), (box_x + box_w, box_y + box_h), (0, 255, 0), 2)
                cv2.putText(frame, name, (box_x, box_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)

        if is_verified:
            hand_results = hands.process(rgb_frame)
            if hand_results.multi_hand_landmarks:
                for hand_landmarks in hand_results.multi_hand_landmarks:
                    x_thumb = hand_landmarks.landmark[4].x
                    y_thumb = hand_landmarks.landmark[4].y
                    x_index = hand_landmarks.landmark[8].x
                    y_index = hand_landmarks.landmark[8].y
                    
                    pinch_distance = np.linalg.norm(np.array([x_thumb, y_thumb]) - np.array([x_index, y_index]))

                
                    current_volume = volume.GetMasterVolumeLevelScalar() or 0.5
                    new_volume = current_volume  

                    if pinch_distance < 0.05: 
                        new_volume = 0.5
                        volume.SetMasterVolumeLevelScalar(new_volume, None)
                    elif y_index < y_thumb:  
                        new_volume = min(1.0, current_volume + 0.05)
                        volume.SetMasterVolumeLevelScalar(new_volume, None)
                    elif y_index > y_thumb:
                        new_volume = max(0.0, current_volume - 0.05)
                        volume.SetMasterVolumeLevelScalar(new_volume, None)

                    cv2.putText(frame, f"Volume: {int(new_volume * 100)}%", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.rectangle(frame, (50, 70), (50 + int(new_volume * 200), 100), (0, 255, 0), -1)

     
        cv2.imshow("Face & Hand Control", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

video_capture.release()
cv2.destroyAllWindows()

print(f"Final detected person: {final_detected_name}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[

In [None]:
pip install pycaw
