In [None]:
import cv2
import torch
import mediapipe as mp
import time
import torchvision.transforms as transforms
from PIL import Image


# Load the saved model and move it to the GPU or CPU
model = torch.load('C:/Users/Bruss/Desktop/Speciale/models/deployments/finalmodel_efficientnet_landmarks.pt')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


# Preprocessing of frame
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4429, 0.3043, 0.2806], 
                          std=[0.1187, 0.0874, 0.0728]),])

# Open camera feed
cap = cv2.VideoCapture(0)

# Initialize the frame counter and the start time for FPS
frame_count = 0
start_time = time.time()

# Create a Mediapipe Hands and Face Detection objects
mp_hands = mp.solutions.hands
mp_face_detection = mp.solutions.face_detection

# Create a Mediapipe Hands and Face Detection objects

hands = mp_hands.Hands()
face_detection = mp_face_detection.FaceDetection()


# Loop through each frame in the video stream
while True:
    # Read a frame from the video stream
    start_time = time.time()
    ret, frame = cap.read()
    
    # Flip the frame horizontally for a mirror-like effect
    frame = cv2.flip(frame, 1)

    # Convert the image to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Detect hands
    hands_results = hands.process(frame_rgb)

    # Detect faces
    face_detection_results = face_detection.process(frame_rgb)

    # Draw landmarks for hands
    if hands_results.multi_hand_landmarks:
        for hand_landmarks in hands_results.multi_hand_landmarks:
            mp.solutions.drawing_utils.draw_landmarks(
                frame,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS)

    # Draw landmarks for faces
    if face_detection_results.detections:
        for detection in face_detection_results.detections:
            mp.solutions.drawing_utils.draw_detection(
                frame,
                detection)
            
    
    
    # Preprocess the image
    pil_image = Image.fromarray(frame)
    image_tensor = preprocess(pil_image)
    image_tensor = image_tensor.unsqueeze(0)

    # Move the data to the GPU
    image_tensor = image_tensor.to(device)

    # Pass the image through the model
    with torch.no_grad():
        output = model(image_tensor)

    # Move the output to the CPU
    output = output.cpu()

    # Get the predicted class
    _, predicted = torch.max(output.data, 1)
    predicted_class = predicted.item()

    # Define the class names
    class_names = ['afraid',  'alone', 'boss', 'hello', 'tough']

    # Display the predicted class on the frame
    predicted_class_name = class_names[predicted_class]
    stop_time = time.time()
    cv2.putText(frame, predicted_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    cv2.putText(frame, "EffecientNet with landmarks", (10, 450), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
    cv2.putText(frame, f"{round((stop_time - start_time) * 1000,3)} ms", (460, 450), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)


    # Display the resulting image
    cv2.imshow('frame', frame)
    
    # Exit if the user presses the 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
        

# Release the VideoCapture object and close the OpenCV window
cap.release()
cv2.destroyAllWindows()
