In [1]:
import time
from pathlib import Path
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt

### Load dataframe

In [2]:
df = pd.read_pickle('/home/mc/dev/deepfake-detection-challenge/notebooks/master_dataframe.pkl')

In [3]:
df

Unnamed: 0,file,label,original,split,filepath,fakes
0,aadqbokerz.mp4,FAKE,uqxeoibzvv.mp4,train,/home/mc/dev/dfdc_train/dfdc_train_part_25/aad...,
1,abingyvkus.mp4,FAKE,ejnleiwyex.mp4,train,/home/mc/dev/dfdc_train/dfdc_train_part_25/abi...,
2,abmnkljiny.mp4,FAKE,mcmztvtjaz.mp4,train,/home/mc/dev/dfdc_train/dfdc_train_part_25/abm...,
3,abpynckiti.mp4,FAKE,esjrjzrfeg.mp4,train,/home/mc/dev/dfdc_train/dfdc_train_part_25/abp...,
4,abroglvegl.mp4,FAKE,kgafyxvlhz.mp4,train,/home/mc/dev/dfdc_train/dfdc_train_part_25/abr...,
...,...,...,...,...,...,...
119149,zylgvbqkte.mp4,FAKE,gvycszbsqv.mp4,train,/home/mc/dev/dfdc_train/dfdc_train_part_5/zylg...,
119150,zyndcjxfwf.mp4,FAKE,ttxuegkaew.mp4,train,/home/mc/dev/dfdc_train/dfdc_train_part_5/zynd...,
119151,zywnhhdcku.mp4,FAKE,lhglvsajka.mp4,train,/home/mc/dev/dfdc_train/dfdc_train_part_5/zywn...,
119152,zzafzthowz.mp4,FAKE,osnuxyotnv.mp4,train,/home/mc/dev/dfdc_train/dfdc_train_part_5/zzaf...,


### Resize and image while maintaining aspect ratio

In [4]:
def image_resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]
    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))
    resized = cv2.resize(image, dim, interpolation=inter)
    return resized

### Sample frames from an mp4

In [5]:
def sample_from_mp4(p, num_samples=5):
    cap = cv2.VideoCapture(p)
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    height= cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    fps = cap.get(cv2.CAP_PROP_FPS)
    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    idxs = np.linspace(0, num_frames-1, num_samples, dtype=int)
    frames = []
    for i in range(0, num_frames):
        ret = cap.grab()
        if i in idxs:
            ret, frame = cap.retrieve()
            if width > 512 or height > 512:
                if width > height: frame = image_resize(frame, width=512)
                else: frame = image_resize(frame, height=512)
            frames.append(frame)
    cap.release()
    return width, height, fps, num_frames, idxs, np.stack(frames)

### Sample frames and save to disk

In [None]:
save_dir = '/media/mc/2TBNVMESSD/sampled_frames/'
start = time.time()
for i in range(len(df)):
    if i != 0 and i % 1000 == 0:
        elapsed = time.time() - start
        start = time.time()
        print(f'{i}/{len(df)} Elapsed: {elapsed:.1f}s')
    try:
        in_path = df.loc[i, 'filepath']
        width, height, fps, num_frames, idxs, imgs = sample_from_mp4(in_path)
        df.at[i, 'orig_width'] = width
        df.at[i, 'orig_height'] = height
        df.at[i, 'new_width'] = imgs.shape[2]
        df.at[i, 'new_height'] = imgs.shape[1]
        df.at[i, 'fps'] = fps
        df.at[i, 'num_frames'] = num_frames
        df.at[i, 'idxs'] = np.array_str(idxs)
        save_path = (Path(save_dir) / Path(in_path).name).as_posix()
        np.savez(save_path, img=imgs)
    except:
        print(f'Error with {in_path}')

1000/119154 Elapsed: 231.4s
2000/119154 Elapsed: 233.9s
3000/119154 Elapsed: 230.4s
4000/119154 Elapsed: 231.3s
5000/119154 Elapsed: 231.6s
6000/119154 Elapsed: 231.3s
7000/119154 Elapsed: 233.1s
8000/119154 Elapsed: 226.6s
9000/119154 Elapsed: 225.5s
10000/119154 Elapsed: 226.0s
11000/119154 Elapsed: 241.5s
12000/119154 Elapsed: 239.0s
13000/119154 Elapsed: 238.9s
14000/119154 Elapsed: 240.1s
15000/119154 Elapsed: 238.7s
16000/119154 Elapsed: 238.8s
17000/119154 Elapsed: 239.9s
18000/119154 Elapsed: 228.3s
19000/119154 Elapsed: 231.1s
20000/119154 Elapsed: 233.2s
21000/119154 Elapsed: 234.0s
22000/119154 Elapsed: 236.5s
23000/119154 Elapsed: 240.0s
24000/119154 Elapsed: 239.1s
25000/119154 Elapsed: 246.1s
26000/119154 Elapsed: 244.9s
27000/119154 Elapsed: 232.6s
28000/119154 Elapsed: 225.9s
29000/119154 Elapsed: 223.5s
30000/119154 Elapsed: 224.9s
31000/119154 Elapsed: 228.8s
32000/119154 Elapsed: 232.5s
33000/119154 Elapsed: 232.6s
34000/119154 Elapsed: 242.1s
