In [33]:
import cv2
from mtcnn import MTCNN
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.imagenet_utils import preprocess_input
import tensorflow as tf
import numpy as np

# Load MTCNN model for face detection
detector = MTCNN()

# Load FaceNet model for face embedding extraction
facenet_model = tf.keras.applications.ResNet50(weights='imagenet')  # Provide the path to your FaceNet model

def preprocess_image(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    img = img / 255.0  # Normalize pixel values
    return img

def extract_face_embeddings(frame):
    faces = detector.detect_faces(frame)
    face_embeddings = []
    for face in faces:
        x, y, w, h = face['box']
        x1, y1 = max(x, 0), max(y, 0)
        x2, y2 = min(x + w, frame.shape[1]), min(y + h, frame.shape[0])
        cropped_face = frame[y1:y2, x1:x2]
        
        # Preprocess the face for FaceNet model
        preprocessed_face = preprocess_image(cropped_face)
        preprocessed_face = np.expand_dims(preprocessed_face, axis=0)
        
        # Get face embeddings using FaceNet model
        face_embedding = facenet_model.predict(preprocessed_face)
        face_embeddings.append(face_embedding)
    return face_embeddings

# Read video
video_capture = cv2.VideoCapture('MELD.Raw\dev_splits_complete\dia0_utt1.mp4')  # Provide the path to your input video

while True:
    ret, frame = video_capture.read()
    if not ret:
        break
    
    # Extract face embeddings from each frame
    extracted_embeddings = extract_face_embeddings(frame)
    
    # Perform further processing or save the embeddings for later use
    
    # Display the frame with bounding boxes around detected faces
    for face in detector.detect_faces(frame):
        x, y, w, h = face['box']
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
    
    cv2.imshow('Video', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

video_capture.release()
cv2.destroyAllWindows()




In [39]:
import cv2
from mtcnn import MTCNN
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.imagenet_utils import preprocess_input
import numpy as np

# Load MTCNN model for face detection
detector = MTCNN()

# Load FaceNet model for face embedding extraction
facenet_model = tf.keras.applications.ResNet50(weights='imagenet')  # Provide the path to your FaceNet model

def preprocess_image(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    img = img / 255.0  # Normalize pixel values
    return img

def extract_face_embeddings(frame):
    faces = detector.detect_faces(frame)
    face_embeddings = []
    for face in faces:
        x, y, w, h = face['box']
        x1, y1 = max(x, 0), max(y, 0)
        x2, y2 = min(x + w, frame.shape[1]), min(y + h, frame.shape[0])
        cropped_face = frame[y1:y2, x1:x2]
        
        # Preprocess the face for FaceNet model
        preprocessed_face = preprocess_image(cropped_face)
        preprocessed_face = np.expand_dims(preprocessed_face, axis=0)
        
        # Get face embeddings using FaceNet model
        face_embedding = facenet_model.predict(preprocessed_face)
        face_embeddings.append(face_embedding)
    return face_embeddings

# Read video
video_capture = cv2.VideoCapture('MELD.Raw\dev_splits_complete\dia0_utt1.mp4')  # Provide the path to your input video

all_embeddings = []  # List to store embeddings for all frames

while True:
    ret, frame = video_capture.read()
    if not ret:
        break
    
    # Extract face embeddings from each frame
    extracted_embeddings = extract_face_embeddings(frame)
    
    all_embeddings.append(extracted_embeddings)  # Append embeddings for this frame
    
    # Display the frame with bounding boxes around detected faces
    for face in detector.detect_faces(frame):
        x, y, w, h = face['box']
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
    
    cv2.imshow('Video', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

video_capture.release()
cv2.destroyAllWindows()

# 'all_embeddings' now contains embeddings for all frames




In [73]:
len(all_embeddings)

32

In [62]:
video_capture = cv2.VideoCapture('MELD.Raw\dev_splits_complete\dia0_utt1.mp4')  # Provide the path to your input video
a = video_capture.read()

In [68]:
frame = a[1]
d = extract_face_embeddings(frame)

d



[array([[1.65067715e-04, 3.08962568e-04, 5.68546639e-05, 1.02821868e-04,
         4.97376241e-05, 4.95207147e-04, 8.41247311e-06, 3.32195341e-05,
         1.58717867e-05, 1.36021088e-04, 7.17857212e-04, 1.34599759e-04,
         5.56620253e-05, 7.73491920e-05, 1.53545006e-05, 4.53807406e-05,
         1.18944103e-04, 2.67633804e-05, 5.47217460e-05, 4.72638530e-05,
         1.86047895e-04, 1.60284457e-03, 9.36483731e-04, 2.12545114e-04,
         8.22547445e-05, 1.05596388e-04, 3.12896213e-04, 2.24479329e-04,
         1.62987373e-04, 1.86084188e-04, 3.98459597e-05, 3.10626492e-04,
         8.45354880e-05, 1.10271110e-04, 2.67886237e-04, 1.48533945e-05,
         1.20097386e-04, 1.02365484e-05, 5.76063897e-03, 3.49752736e-05,
         4.43581739e-05, 1.94220644e-04, 2.31418599e-04, 1.62499506e-04,
         5.20346621e-05, 1.19800199e-04, 3.74639894e-05, 2.21135502e-04,
         3.51638337e-05, 3.51263297e-05, 1.63033081e-04, 4.09773667e-04,
         2.76751205e-04, 4.93040599e-04, 2.46404437

In [71]:
detector.detect_faces(frame)



[{'box': [858, 264, 153, 197],
  'confidence': 0.9999891519546509,
  'keypoints': {'left_eye': (908, 341),
   'right_eye': (979, 339),
   'nose': (947, 384),
   'mouth_left': (914, 419),
   'mouth_right': (973, 418)}},
 {'box': [381, 155, 134, 175],
  'confidence': 0.9998505115509033,
  'keypoints': {'left_eye': (419, 220),
   'right_eye': (483, 218),
   'nose': (451, 261),
   'mouth_left': (425, 293),
   'mouth_right': (478, 292)}},
 {'box': [249, 80, 60, 80],
  'confidence': 0.7647380232810974,
  'keypoints': {'left_eye': (265, 101),
   'right_eye': (291, 102),
   'nose': (275, 118),
   'mouth_left': (265, 134),
   'mouth_right': (289, 135)}}]