In [1]:
import pandas as pd
import json
import cv2
import numpy as np

In [2]:
# Load dataset from train sample videos json file
def load_json_into_df(file:str):
    files = []
    file_path = f"../../train_sample_videos/{file}"
    with open(file_path) as f:
        data = json.load(f)
        for x in data:
            files.append({
                "name":x,
                "label":data[x]["label"],
                "split":data[x]["split"],
                "original":data[x]["original"]
            })
    return pd.DataFrame(files)

In [3]:
train_df = load_json_into_df("metadata.json")

In [4]:
train_df.head()

Unnamed: 0,name,label,split,original
0,aagfhgtpmv.mp4,FAKE,train,vudstovrck.mp4
1,aapnvogymq.mp4,FAKE,train,jdubbvfswz.mp4
2,abarnvbtwb.mp4,REAL,train,
3,abofeumbvv.mp4,FAKE,train,atvmxvwyns.mp4
4,abqwwspghj.mp4,FAKE,train,qzimuostzz.mp4


In [5]:
def extract_frames(video_path:str, num_frames:int=10):
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    # Get the total number of frames
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Determine no of frames to extract
    num_frames = total_frames if num_frames > total_frames else num_frames


    # Compute the frame indices to extract
    frame_indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int)


    # Initialize an empty list to store the frames
    frames = []


    # Loop through the selected frame indices and extract the frames
    for index in frame_indices:
        # Set the frame index
        cap.set(cv2.CAP_PROP_POS_FRAMES, index)

        # Read the frame from the video file
        ret, frame = cap.read()

        # Convert the frame to RGB
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Add the frame to the list
        frames.append(frame)


    # Release the video file
    cap.release()


    # Return the list of frames as a NumPy array
    return np.array(frames)

In [6]:
def extract_faces(frame):
    face_cascade = cv2.CascadeClassifier('../../models/haarcascade_frontalface_default.xml')
    faces = face_cascade.detectMultiScale(
        frame,
        scaleFactor= 1.1,
        minNeighbors= 20,
        minSize=(10, 10)
    )
    faces_detected = format(len(faces)) + " faces detected!"
    print(faces_detected)
    # Draw a rectangle around the faces
    faces_frames = []
    for (x,y,w,h) in faces:
        face = frame[y:y+h, x:x+w]
        faces_frames.append(face)
    return faces_frames

In [7]:
def extract_frames_faces(file_name:str):
    video_path = f"../../train_sample_videos/{file_name}"
    fms = extract_frames(video_path)
    faces_list = []
    for fm in fms:
        faces = extract_faces(fm)
        faces_list.append(faces)
    return np.array(faces_list)

In [None]:
train_df["faces"] = train_df["name"].apply(lambda x: extract_frames_faces(x))

1 faces detected!
1 faces detected!
1 faces detected!
2 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!


  return np.array(faces_list)


0 faces detected!
0 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
0 faces detected!
0 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
0 faces detected!
0 faces detected!
0 faces detected!
0 faces detected!
0 faces detected!
0 faces detected!
0 faces detected!
0 faces detected!
0 faces detected!
0 faces detected!
0 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
0 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
0 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces detected!
1 faces de