In [1]:
import cv2
from PIL import Image, ImageDraw
import torch
import numpy as np
from facenet_pytorch import MTCNN, InceptionResnetV1
from deepface import DeepFace

  from .autonotebook import tqdm as notebook_tqdm


# Capture and Save Owner Embedding

In [4]:
def capture_and_save_owner_embedding():
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    mtcnn = MTCNN(keep_all=True, device=device)
    resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)
    
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        raise IOError("Cannot open webcam")
    
    while True:
        ret, frame = cap.read()
        if not ret:
            continue
        
        # Convert to RGB and detect faces
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        pil_img = Image.fromarray(frame_rgb)
        boxes, _ = mtcnn.detect(pil_img)
        
        if boxes is not None:
            # Extract the face with the largest area (most likely the main subject)
            areas = [(box[2] - box[0]) * (box[3] - box[1]) for box in boxes]
            largest_face_index = areas.index(max(areas))
            largest_face = mtcnn.extract(pil_img, [boxes[largest_face_index]], None)
            
            # Save the embedding of the largest detected face
            owner_embedding = resnet(largest_face).detach()
            torch.save(owner_embedding, 'owner_embedding.pt')
            
            print("Owner's face embedding captured and saved.")
            break
    
    cap.release()
    cv2.destroyAllWindows()
    return owner_embedding 

Running on device: cpu


# Recognize and Highlight Owner

In [None]:
def recognize_and_highlight():
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    mtcnn = MTCNN(keep_all=True, device=device)
    resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)
    owner_embedding = torch.load('owner_embedding.pt')

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        raise IOError("Cannot open webcam")

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_img = Image.fromarray(frame_rgb)

            # Detect faces
            boxes, _ = mtcnn.detect(pil_img)
            draw = ImageDraw.Draw(pil_img)
            if boxes is not None:
                faces = mtcnn.extract(pil_img, boxes, None)
                embeddings = resnet(faces).detach()

                for i, box in enumerate(boxes):
                    # Calculate distance to the owner's embedding
                    distance = (embeddings[i] - owner_embedding).norm().item()
                    if distance < 0.6:  # threshold for recognition, tune based on your dataset
                        outline_color = (0, 255, 0)  # Green for owner
                    else:
                        outline_color = (255, 0, 0)  # Red for others

                    draw.rectangle(box.tolist(), outline=outline_color, width=6)

            # Display the image
            cv_frame = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
            cv2.imshow('Webcam', cv_frame)
            
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    finally:
        cap.release()
        cv2.destroyAllWindows()

# Is Owner Present?

In [None]:
def is_owner_present():
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    mtcnn = MTCNN(keep_all=True, device=device)
    resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)
    owner_embedding = torch.load('owner_embedding.pt')

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        raise IOError("Cannot open webcam")

    owner_detected = False  # Flag to indicate if the owner is detected

    try:
        # Capture a single frame
        ret, frame = cap.read()
        if ret:
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_img = Image.fromarray(frame_rgb)

            # Detect faces
            boxes, _ = mtcnn.detect(pil_img)
            if boxes is not None:
                faces = mtcnn.extract(pil_img, boxes, None)
                embeddings = resnet(faces).detach()

                for embedding in embeddings:
                    # Calculate distance to the owner's embedding
                    distance = (embedding - owner_embedding).norm().item()
                    if distance < 0.6:  # threshold for recognition, tune based on your dataset
                        owner_detected = True
                        break

        # Optionally, display the frame with or without marking
        # Comment out if not needed
        cv_frame = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
        cv2.imshow('Webcam', cv_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            pass

    finally:
        cap.release()
        cv2.destroyAllWindows()

    return owner_detected

# Example of using the function
owner_present = is_owner_present()
print("Owner present:", owner_present)

In [3]:
import time

def is_owner_present():
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    mtcnn = MTCNN(keep_all=True, device=device)
    resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)
    owner_embedding = torch.load('owner_embedding.pt')

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        raise IOError("Cannot open webcam")

    owner_detected = False  # Flag to indicate if the owner is detected
    start_time = time.time()

    try:
        while time.time() - start_time < 5:  # Check for 5 seconds
            ret, frame = cap.read()
            if not ret:
                continue  # Skip this loop if frame is not captured correctly

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_img = Image.fromarray(frame_rgb)

            # Detect faces
            boxes, _ = mtcnn.detect(pil_img)
            if boxes is not None:
                faces = mtcnn.extract(pil_img, boxes, None)
                embeddings = resnet(faces).detach()

                for embedding in embeddings:
                    # Calculate distance to the owner's embedding
                    distance = (embedding - owner_embedding).norm().item()
                    if distance < 0.6:  # threshold for recognition, tune based on your dataset
                        owner_detected = True
                        break

            if owner_detected:
                break  # Stop checking if owner is already detected

            # Display the frame (optional)
            cv_frame = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
            cv2.imshow('Webcam', cv_frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    finally:
        cap.release()
        cv2.destroyAllWindows()

    return owner_detected

In [5]:
owner_present = is_owner_present()
print("Owner present:", owner_present)

Owner present: True


# Emotion Detection

In [12]:
# Load face cascade classifier
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Start capturing video
cap = cv2.VideoCapture(0)

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Convert frame to grayscale
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Convert grayscale frame to RGB format
    rgb_frame = cv2.cvtColor(gray_frame, cv2.COLOR_GRAY2RGB)

    # Detect faces in the frame
    faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    for (x, y, w, h) in faces:
        # Extract the face ROI (Region of Interest)
        face_roi = rgb_frame[y:y + h, x:x + w]

        
        # Perform emotion analysis on the face ROI
        result = DeepFace.analyze(face_roi, actions=['emotion'], enforce_detection=False)

        # Determine the dominant emotion
        emotion = result[0]['dominant_emotion']

        # Draw rectangle around face and label with predicted emotion
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
        cv2.putText(frame, emotion, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)

    # Display the resulting frame
    cv2.imshow('Real-time Emotion Detection', frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture and close all windows
cap.release()
cv2.destroyAllWindows()

In [16]:
import cv2
from deepface import DeepFace

def continuous_emotion_detection():
    # Load face cascade classifier
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    # Start capturing video
    cap = cv2.VideoCapture(0)

    if not cap.isOpened():
        raise IOError("Cannot open webcam")

    try:
        while True:
            # Capture frame-by-frame
            ret, frame = cap.read()
            if not ret:
                continue  # If no frame is captured, skip to the next iteration

            # Convert frame to grayscale for face detection
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            # Detect faces in the frame
            faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

            for (x, y, w, h) in faces:
                # Extract the face ROI (Region of Interest)
                face_roi = cv2.cvtColor(frame[y:y + h, x:x + w], cv2.COLOR_BGR2RGB)  # Use original frame to get RGB ROI

                # Perform emotion analysis on the face ROI
                try:
                    # Perform emotion analysis on the face ROI
                    result = DeepFace.analyze(face_roi, actions=['emotion'], enforce_detection=False)

                    # Determine the dominant emotion
                    emotion = result[0]['dominant_emotion']
                    yield emotion  # Yield the dominant emotion detected
                except Exception as e:
                    print(f"Error in emotion analysis: {e}")

    finally:
        # Release the capture and close all windows
        cap.release()

# Example usage
for detected_emotion in continuous_emotion_detection():
    print("Detected Emotion:", detected_emotion)
    # Additional logic can be added here based on the application's needs


Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: angry
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: happy
Detected Emotion: happy
Detected Emotion: happy
Detected Emotion: happy
Detected Emotion: sad
Detected Emotion: happy
Detected Emotion: happy
Detected Emotion: happy
Detected Emotion: happy
Detected Emotion: angry
Detected Emotion: angry
Detected Emotion: happy
Detected Emotion: sad
Detected Emotion: happy
Detected Emotion: happy
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: angry
Detected Emotion: sad
Detected Emotion: happy
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: angry
Detected Emotion: angry
Detected Emotion: sad
Detected Emotion: sad
Detected Emotion: 

KeyboardInterrupt: 

# Chatbot

In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

def chat():
    tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium", padding_side='left')
    model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")

    # Chatbot initialization message
    print("Woof! I'm your chat pet. Type 'quit' to stop playing with me.")
    
    chat_history_ids = None

    while True:
        user_input = input("You: ")
        if user_input.lower() == 'quit':
            print("Woof woof! Bye!")
            break

        # Custom responses for pet-like behavior
        if "what are you" in user_input.lower():
            print("Bot: I'm your friendly chat pet! I like pats and treats!")
            continue
        elif "do you like" in user_input.lower():
            print("Bot: I love everything about you!")
            continue

        # Encode and generate response
        new_user_input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors='pt')
        bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=1) if chat_history_ids is not None else new_user_input_ids
        chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)
        bot_output = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)

        print("Bot: ", bot_output)

if __name__ == "__main__":
    chat()

KeyboardInterrupt: 

In [9]:
import cv2
from deepface import DeepFace

def emotion_detection():
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    cap = cv2.VideoCapture(0)
    
    if not cap.isOpened():
        raise IOError("Cannot open webcam")
    
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                continue

            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            rgb_frame = cv2.cvtColor(gray_frame, cv2.COLOR_GRAY2RGB)
            faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

            for (x, y, w, h) in faces:
                face_roi = rgb_frame[y:y+h, x:x+w]
                try:
                    result = DeepFace.analyze(face_roi, actions=['emotion'], enforce_detection=False)
                    emotion = result['dominant_emotion']
                    yield emotion
                except Exception as e:
                    print(f"Error in emotion detection: {e}")
    finally:
        cap.release()
        cv2.destroyAllWindows()

In [11]:
emotions = emotion_detection()

for detected_emotion in emotions:
    user_input = f"I sense you're {detected_emotion}. What happened?" if detected_emotion == 'sad' else \
        "Did I do something wrong?" if detected_emotion == 'angry' else \
        "You seem happy! Want to play?" if detected_emotion == 'happy' else \
        "How are you feeling?"

    print(f"Pet senses: {user_input}")

Error in emotion detection: list indices must be integers or slices, not str
Error in emotion detection: list indices must be integers or slices, not str
Error in emotion detection: list indices must be integers or slices, not str
Error in emotion detection: list indices must be integers or slices, not str
Error in emotion detection: list indices must be integers or slices, not str
Error in emotion detection: list indices must be integers or slices, not str
Error in emotion detection: list indices must be integers or slices, not str
Error in emotion detection: list indices must be integers or slices, not str
Error in emotion detection: list indices must be integers or slices, not str
Error in emotion detection: list indices must be integers or slices, not str
Error in emotion detection: list indices must be integers or slices, not str
Error in emotion detection: list indices must be integers or slices, not str
Error in emotion detection: list indices must be integers or slices, not str

KeyboardInterrupt: 

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

def chat():
    tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium", padding_side='left')
    model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")

    print("Woof! I'm your chat pet. Type 'quit' to stop playing with me.")
    chat_history_ids = None

    # Start the emotion detection generator
    emotions = emotion_detection()

    try:
        for detected_emotion in emotions:
            user_input = f"I sense you're {detected_emotion}. What happened?" if detected_emotion == 'sad' else \
                         "Did I do something wrong?" if detected_emotion == 'angry' else \
                         "You seem happy! Want to play?" if detected_emotion == 'happy' else \
                         "How are you feeling?"

            print(f"Pet senses: {user_input}")

            new_user_input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors='pt')
            bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=1) if chat_history_ids is not None else new_user_input_ids
            chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)
            bot_output = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
            print("Bot: ", bot_output)
    except KeyboardInterrupt:
        print("Chat interrupted.")

if __name__ == "__main__":
    chat()