In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

from mtcnn import MTCNN
import cv2
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.imagenet_utils import preprocess_input

import os
import re

In [2]:
df = pd.read_csv('MELD.Raw/dev_sent_emo.csv',encoding='utf-8')
df['Utterance'] = df.Utterance.str.replace('',"'")
# df['gender'] = df.Speaker.apply(lambda x: 'male' if x == 'Ross' or x == 'Joey' or x == 'Chandler' else 'female')
df = df.sample(frac=0.01,ignore_index=True)
df

Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime
0,64,What?! They took mine to give to you!,Phoebe,surprise,negative,6,8,8,3,"0:03:33,589","0:03:33,746"
1,535,There are children coming into the world in th...,Phoebe,anger,negative,49,10,1,23,"00:09:38,578","00:09:45,458"
2,900,We're really good.,Chandler,joy,positive,83,7,8,23,"00:10:53,474","00:10:56,143"
3,640,What?,Phoebe,neutral,neutral,61,3,3,21,"00:12:49,393","00:12:50,393"
4,214,"Well for the regular guy, it's bad, but Chandl...",Phoebe,fear,negative,18,7,6,22,"00:16:28,946","00:16:33,074"
5,315,That is not true!,Monica,anger,negative,26,13,2,2,"00:11:36,445","00:11:37,696"
6,813,"And your like 'Mom, get outta here!'",Chandler,anger,negative,76,10,3,1,"00:16:54,054","00:16:57,724"
7,139,How scary is that?,Frank,surprise,positive,13,7,5,3,"00:16:28,529","00:16:30,405"
8,242,"It's just, it's just the luckiest baby in the ...",Phoebe,joy,positive,19,21,1,23,"00:15:28,010","00:15:30,553"
9,487,"Well, they never have any paper in there y'kno...",Rachel,neutral,neutral,45,7,3,17,"00:09:10,591","00:09:24,479"


In [11]:
face_model = tf.keras.models.Sequential()

face_model.add(tf.keras.layers.Conv2D(75, (3, 3), strides=1, padding="same", activation="relu", 
                 input_shape=(224, 224, 3)))



In [3]:
from keras_vggface.vggface import VGGFace
# pip install git+https://github.com/rcmalli/keras-vggface

face_model = VGGFace(model='resnet50')


Downloading data from https://github.com/rcmalli/keras-vggface/releases/download/v2.0/rcmalli_vggface_tf_resnet50.h5


In [5]:
detector = MTCNN()

def preprocess_image(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    img = img / 255.0 
    return img

def extract_face_embeddings(frame):
    global preprocessed_face
    faces = detector.detect_faces(frame)
    face_embeddings = []
    for face in faces:
        x, y, w, h = face['box']
        x1, y1 = max(x, 0), max(y, 0)
        x2, y2 = min(x + w, frame.shape[1]), min(y + h, frame.shape[0])
        cropped_face = frame[y1:y2, x1:x2]
        
        # Preprocess
        preprocessed_face = preprocess_image(cropped_face)
        preprocessed_face = np.expand_dims(preprocessed_face, axis=0)

        face_embedding = face_model.predict(preprocessed_face)
        face_embeddings.append(np.squeeze(face_embedding))
        
        return face_embeddings

# Read video
folder_path = 'MELD.Raw/dev_splits_complete/'
one_face_videos = {}

for idx, row in df.iterrows():
    file_name = 'dia' + str(row['Dialogue_ID']) + '_utt' + str(row['Utterance_ID']) + '.mp4'
    video_path = folder_path + file_name
    if os.path.isfile(video_path):
        video_capture = cv2.VideoCapture(video_path)

        single_video_embeddings = []  # List to store embeddings for all frames

        frame_counter = 0

        while frame_counter < 2:
            ret, frame = video_capture.read()
            if not ret:
                break

            # check for more than 1 face
            if len(detector.detect_faces(frame)) == 1:

                # extract face embeddings from each frame
                extracted_embeddings = extract_face_embeddings(frame)

                single_video_embeddings.append(extracted_embeddings)  # Append embeddings for this frame

                # bounding boxes
                for face in detector.detect_faces(frame):
                    x, y, w, h = face['box']
                    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

                cv2.imshow('Video', frame)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
                frame_counter += 1

            else:
                continue

        video_capture.release()
        cv2.destroyAllWindows()

        if single_video_embeddings != []:            
            one_face_videos[file_name] = single_video_embeddings
        else:
            one_face_videos[file_name] = 'too many faces'




In [14]:
len(single_video_embeddings[0][0])

8631

In [None]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.imagenet_utils import preprocess_input
import numpy as np

# Load pre-trained FaceNet model
model = load_model('facenet_keras_weights.h5')  # Replace with the actual path

# Function to preprocess and extract embeddings from an image
def get_face_embeddings(img_path, model):
    img = image.load_img(img_path, target_size=(160, 160))  # Resizing the image to match FaceNet's input size
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)

    embedding = model.predict(img)
    return embedding

# Path to the image you want to extract embeddings from
image_path = 'path_to_image.jpg'  # Replace with the actual image path

# Get the embeddings for the image
embeddings = get_face_embeddings(image_path, model)
print(embeddings)
