### Import libs

In [1]:
import json
import pandas as pd
import cv2
import numpy as np
import argparse

In [2]:
import os
os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp"

### Extract face frames from video files

In [3]:
face_cascade = cv2.CascadeClassifier('models/haarcascades/haarcascade_frontalface_alt.xml')


def detect_faces(frame):
    frame_gray = cv2.equalizeHist(frame)
    faces = face_cascade.detectMultiScale(frame_gray)
    return faces
 
def get_faces_from_video_file(file_path, fake):

    cap = cv2.VideoCapture(file_path)
    file_name = file_path.split('/')[-1]

    frame_values = [80 ,100, 150, 200]
    #frame_values = [200]

    for frame_number in frame_values:
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number);
        ret, frame = cap.read()
        
        cv2.imshow(f'img-{frame_number}', frame)
        
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = detect_faces(gray)
        
#         print(faces)
#         print(len(faces))
        
        if faces is not None and len(faces) > 0:
            for (x,y,w,h) in faces:
                faceROI = frame[y:y+h,x:x+w]
                cv2.imshow(f'face-{frame_number}', faceROI)
                
            ### gets only the last face found
            #cv2.imshow(f'frame-{frame_number}', faceROI)
            #cv2.imwrite(file_path.replace(".mp4","") + f'-f{frame_number}.jpg',frame)
            cv2.imwrite(f'data/{fake}/{file_name}-f{frame_number}.jpg',faceROI)

        else:
            print(f'face not found for file {file_path}, frame {frame_number}')
            
    cv2.waitKey()

In [4]:
data_path = 'deepfake-detection-challenge-data/'

with open(f'{data_path}metadata.json') as json_file:
    data = json.load(json_file)

In [5]:
# real_path = 'data/real/'
# fake_path = 'data/fake/'

for key in data.keys():
    file_name = key
    fake = data[key]['label'].lower()
    print('extracting images for file',file_name,':',fake)
    get_faces_from_video_file(data_path + file_name, fake)

extracting images for file aagfhgtpmv.mp4 : fake
extracting images for file aapnvogymq.mp4 : fake
extracting images for file abarnvbtwb.mp4 : real
extracting images for file abofeumbvv.mp4 : fake
extracting images for file abqwwspghj.mp4 : fake
extracting images for file acifjvzvpm.mp4 : fake
extracting images for file acqfdwsrhi.mp4 : fake
extracting images for file acxnxvbsxk.mp4 : fake
extracting images for file acxwigylke.mp4 : fake
extracting images for file aczrgyricp.mp4 : fake
extracting images for file adhsbajydo.mp4 : fake
face not found for file deepfake-detection-challenge-data/adhsbajydo.mp4, frame 80
extracting images for file adohikbdaz.mp4 : fake
extracting images for file adylbeequz.mp4 : fake
face not found for file deepfake-detection-challenge-data/adylbeequz.mp4, frame 200
extracting images for file aelfnikyqj.mp4 : real
extracting images for file aelzhcnwgf.mp4 : fake
extracting images for file aettqgevhz.mp4 : fake
extracting images for file aevrfsexku.mp4 : fake


extracting images for file bctvsmddgq.mp4 : fake
extracting images for file bdbhekrrwo.mp4 : fake
extracting images for file bddjdhzfze.mp4 : real
extracting images for file bdgipnyobr.mp4 : fake
extracting images for file bdnaqemxmr.mp4 : real
extracting images for file bdxuhamuqx.mp4 : fake
extracting images for file beboztfcme.mp4 : real
extracting images for file bejhvclboh.mp4 : real
face not found for file deepfake-detection-challenge-data/bejhvclboh.mp4, frame 150
face not found for file deepfake-detection-challenge-data/bejhvclboh.mp4, frame 200
extracting images for file benmsfzfaz.mp4 : fake
face not found for file deepfake-detection-challenge-data/benmsfzfaz.mp4, frame 200
extracting images for file beyebyhrph.mp4 : real
extracting images for file bffwsjxghk.mp4 : real
extracting images for file bgaogsjehq.mp4 : fake
extracting images for file bggsurpgpr.mp4 : fake
extracting images for file bghphrsfxf.mp4 : fake
extracting images for file bgmlwsoamc.mp4 : fake
face not foun

extracting images for file cepxysienc.mp4 : fake
extracting images for file cettndmvzl.mp4 : fake
face not found for file deepfake-detection-challenge-data/cettndmvzl.mp4, frame 200
extracting images for file ceymbecxnj.mp4 : fake
extracting images for file cferslmfwh.mp4 : fake
face not found for file deepfake-detection-challenge-data/cferslmfwh.mp4, frame 100
face not found for file deepfake-detection-challenge-data/cferslmfwh.mp4, frame 150
extracting images for file cffffbcywc.mp4 : fake
extracting images for file cfxkpiweqt.mp4 : real
extracting images for file cfyduhpbps.mp4 : fake
extracting images for file cglxirfaey.mp4 : fake
face not found for file deepfake-detection-challenge-data/cglxirfaey.mp4, frame 200
extracting images for file cgvrgibpfo.mp4 : fake
face not found for file deepfake-detection-challenge-data/cgvrgibpfo.mp4, frame 150
extracting images for file chtapglbcj.mp4 : real
extracting images for file chviwxsfhg.mp4 : real
face not found for file deepfake-detectio

extracting images for file dhjmzhrcav.mp4 : fake
extracting images for file dhkwmjxwrn.mp4 : fake
extracting images for file dhoqofwoxa.mp4 : fake
extracting images for file dhxctgyoqj.mp4 : real
face not found for file deepfake-detection-challenge-data/dhxctgyoqj.mp4, frame 200
extracting images for file diomeixhrg.mp4 : fake
extracting images for file diopzaywor.mp4 : fake
extracting images for file diqraixiov.mp4 : fake
face not found for file deepfake-detection-challenge-data/diqraixiov.mp4, frame 80
face not found for file deepfake-detection-challenge-data/diqraixiov.mp4, frame 100
face not found for file deepfake-detection-challenge-data/diqraixiov.mp4, frame 150
extracting images for file diuzrpqjli.mp4 : fake
face not found for file deepfake-detection-challenge-data/diuzrpqjli.mp4, frame 80
face not found for file deepfake-detection-challenge-data/diuzrpqjli.mp4, frame 150
face not found for file deepfake-detection-challenge-data/diuzrpqjli.mp4, frame 200
extracting images for 

face not found for file deepfake-detection-challenge-data/ellavthztb.mp4, frame 80
face not found for file deepfake-detection-challenge-data/ellavthztb.mp4, frame 150
face not found for file deepfake-detection-challenge-data/ellavthztb.mp4, frame 200
extracting images for file elvvackpjh.mp4 : fake
extracting images for file emaalmsonj.mp4 : fake
extracting images for file emfbhytfhc.mp4 : fake
extracting images for file emgjphonqb.mp4 : fake
extracting images for file ensyyivobf.mp4 : fake
face not found for file deepfake-detection-challenge-data/ensyyivobf.mp4, frame 80
face not found for file deepfake-detection-challenge-data/ensyyivobf.mp4, frame 100
face not found for file deepfake-detection-challenge-data/ensyyivobf.mp4, frame 150
extracting images for file eoewqcpbgt.mp4 : fake
extracting images for file eprybmbpba.mp4 : fake
extracting images for file epymyyiblu.mp4 : fake
extracting images for file eqjscdagiv.mp4 : fake
extracting images for file eqnoqyfquo.mp4 : real
face not

### Single file test

In [5]:
#get_faces_from_video_file(data_path + 'agqphdxmwt.mp4','fake')

face not found for file deepfake-detection-challenge-data/agqphdxmwt.mp4, frame 100
face not found for file deepfake-detection-challenge-data/agqphdxmwt.mp4, frame 150
