In [1]:
import os
import sys
import time
import json
import glob
import subprocess
from multiprocessing import Pool, cpu_count
from concurrent.futures import ProcessPoolExecutor
from tqdm.notebook import tqdm
import torch
import cv2 as cv
from PIL import Image
import numpy as np
import pandas as pd
from numba import njit
%matplotlib inline
from matplotlib import pyplot as plt
import librosa
import librosa.display

from dfdc.utils import fuzzy_join
from dfdc.utils import fuzzy_diff
from dfdc.dataset_utils import fraction2float
from dfdc.dataset_utils import fraction2tuple
from dfdc.dataset_utils import search_for_record
from dfdc.dataset_utils import create_metadata_master_raw
%reload_ext autoreload
%autoreload 2

In [2]:
def extract_metadata(path, file):
    cap = cv.VideoCapture(os.path.join(path, file))
    ret, frame = cap.read()
    fps = cap.get(cv.CAP_PROP_FPS)
    frame_count = cap.get(cv.CAP_PROP_FRAME_COUNT)
    cap.release()
    return frame.shape, int(frame_count), int(frame_count/fps)

def extract_metadata2(path, files):
    frame_size_list = []
    frame_count_list = []
    duration_list = []
    for file in tqdm(files):
        shape, frame_count, duration = extract_metadata(path, file)
        frame_size_list.append(shape)
        frame_count_list.append(frame_count)
        duration_list.append(duration)

    meta_df = pd.DataFrame(data={'frame_shape':frame_size_list, 'frame_count':frame_count_list, 'duration':duration_list}, index=files)
    return meta_df

In [3]:
label = {'FAKE':1, 'REAL':0}

In [4]:
# filenames = glob.glob('/kaggle/input/deepfake-detection-challenge/test_videos/*.mp4')
data_dir = '/media/Aorus/DATA/dfdc'
meta_dir = os.path.join(data_dir, 'metadata', 'ffprobe')
if not os.path.exists(meta_dir):
    os.makedirs(meta_dir)

In [5]:
def write_metadata(infile):
    outfile = os.path.join(meta_dir, os.path.basename(infile).replace('.mp4', '.json'))
    if os.path.exists(outfile):
        return
    command = f"ffprobe -v error -print_format json -show_format -show_streams {infile} > {outfile}"
    subprocess.call(command, shell=True)

filenames = []
for i in range(50):
    train_dir = os.path.join(data_dir, f'dfdc_train_part_{i}')
    filenames += [os.path.join(train_dir, f) for f in os.listdir(train_dir) if f.endswith('.mp4')]
print(len(filenames))

119146


In [None]:
with ProcessPoolExecutor() as executor:
    for _ in tqdm(executor.map(write_metadata, filenames), total=len(filenames)):
        pass

In [5]:
metadata_raw = create_metadata_master_raw(data_dir)

In [6]:
df = pd.DataFrame.from_dict(metadata_raw, orient='index')
df.to_csv('full_raw_metadata.csv')

In [7]:
df = pd.read_csv('full_raw_metadata.csv', low_memory=False)

In [7]:
df.shape

(119146, 21)

In [8]:
df.head()

Unnamed: 0,label,split,original,zipfile_id,format_start_time,format_duration,format_size,format_bit_rate,video_width,video_height,...,video_r_frame_rate,video_duration_ts,video_duration,video_nb_frames,audio_sample_rate,audio_start_pts,audio_start_time,audio_duration_ts,audio_duration,audio_nb_frames
owxbbpjpch.mp4,1,train,wynotylpnm.mp4,0,-0.02322,10.024,8118089,6478921,1080,1920,...,"(37461, 1250)",375000,10.010411,300,44100,-1024,-0.02322,442024,10.02322,432
vpmyeepbep.mp4,0,train,,0,-0.02322,10.024,4287242,3421581,1080,1920,...,"(299687, 10000)",3000000,10.010444,300,44100,-1024,-0.02322,442024,10.02322,432
fzvpbrzssi.mp4,0,train,,0,-0.02322,10.024,12673703,10114687,1080,1920,...,"(29969, 1000)",300000,10.010344,300,44100,-1024,-0.02322,442024,10.02322,432
htorvhbcae.mp4,1,train,wclvkepakb.mp4,0,-0.02322,10.024,12862531,10265387,1080,1920,...,"(29969, 1000)",300000,10.010344,300,44100,-1024,-0.02322,442024,10.02322,432
fckxaqjbxk.mp4,1,train,vpmyeepbep.mp4,0,-0.02322,10.024,4297751,3429968,1080,1920,...,"(299687, 10000)",3000000,10.010444,300,44100,-1024,-0.02322,442024,10.02322,432


In [None]:
pd.crosstab(df['format_start_time'], df['label'])

In [None]:
pd.crosstab(df['format_duration'], df['label'])

In [None]:
pd.crosstab(df['video_nb_frames'], df['label'])

In [None]:
pd.crosstab(df['video_duration_ts'], df['label'])

In [None]:
pd.crosstab(df['video_duration'], df['label'])

In [None]:
pd.crosstab(df['video_time_base'], df['label'])

In [None]:
pd.crosstab(df['video_codec_time_base'], df['label'])

In [None]:
pd.crosstab(df['video_r_frame_rate'], df['label'])

In [None]:
pd.crosstab([df['video_time_base'], df['video_codec_time_base'], df['video_r_frame_rate']], df['label'])

In [83]:
df['video_time_base0'] = df['video_time_base'].apply(lambda x: x[0])
df['video_time_base1'] = df['video_time_base'].apply(lambda x: x[1])
df['video_codec_time_base0'] = df['video_codec_time_base'].apply(lambda x: x[0])
df['video_codec_time_base1'] = df['video_codec_time_base'].apply(lambda x: x[1])
df['video_r_frame_rate0'] = df['video_r_frame_rate'].apply(lambda x: x[0])
df['video_r_frame_rate1'] = df['video_r_frame_rate'].apply(lambda x: x[1])

In [111]:
df['video_timing_diff'] = df['video_duration'] * df['video_time_base1'] - df['video_duration_ts']

In [109]:
df['video_duration_diff'] = df['video_duration'] - (df['video_r_frame_rate0'] / df['video_r_frame_rate1']) / df['video_nb_frames']

In [None]:
pd.crosstab(df['video_timing_diff'], df['label'])

In [None]:
pd.crosstab(df['video_duration_diff'], df['label'])

In [None]:
df['video_timing_diff'].hist(bins=200)

In [None]:
sum(df['video_time_base0'] != 1)

In [None]:
sum(df['video_time_base1'] != df['video_codec_time_base1'])

In [None]:
sum(df['video_time_base1'] != df['video_r_frame_rate0'])

In [None]:
sum(df['video_codec_time_base1'] != df['video_r_frame_rate0'])

In [None]:
sum(df['video_codec_time_base1'] == df['video_r_frame_rate0'] * 2)

In [None]:
sum(df['video_codec_time_base0'] == df['video_r_frame_rate1'])

In [None]:
sum(df['video_codec_time_base0'] * 2 != df['video_r_frame_rate1'])

In [None]:
sum((df['video_codec_time_base1'] / df['video_r_frame_rate0']) * (df['video_r_frame_rate1'] / df['video_codec_time_base0']) != 2)

In [None]:
df2 = df[df['video_codec_time_base0'] * 2 != df['video_r_frame_rate1']]

In [None]:
pd.crosstab([df2['video_codec_time_base1'], df2['video_r_frame_rate0'], df2['video_time_base1']], df2['label'])

Size and aspect ratio

In [None]:
pd.crosstab(df['video_display_aspect_ratio'], df['label'])

In [None]:
pd.crosstab(df['video_level'], df['label'])

In [None]:
pd.crosstab([df['video_level'], df['video_display_aspect_ratio']], df['label'])

In [None]:
pd.crosstab([df['video_height'], df['video_display_aspect_ratio']], df['label'])

In [None]:
pd.crosstab([df['video_width'], df['video_display_aspect_ratio']], df['label'])

In [None]:
pd.crosstab([df['video_width'], df['video_height'], df['video_display_aspect_ratio']], df['label'])

In [None]:
df['ar_169'] = df['video_display_aspect_ratio'] != '16:9'
df['ar_916'] = df['video_display_aspect_ratio'] != '9:16'

In [None]:
df2 = df[df['ar_169'] == df['ar_916']]

In [None]:
pd.crosstab(df2['video_display_aspect_ratio'], df2['label'])

In [None]:
pd.crosstab([df2['video_display_aspect_ratio'], df2['video_height']], df2['label'])

In [None]:
pd.crosstab([df2['video_display_aspect_ratio'], df2['video_width']], df2['label'])

In [None]:
pd.crosstab([df2['video_display_aspect_ratio'], df2['video_nb_frames']], df2['label'])

In [None]:
9/16, 135/278, 1080/2224

Audio

In [None]:
# f = k*(s/N)
# f - frequency
# k - cycles per window
# s - sample_rate
# N - FFT window length

In [115]:
df['audio_time_base0'] = df['audio_time_base'].apply(lambda x: x[0])
df['audio_time_base1'] = df['audio_time_base'].apply(lambda x: x[1])

In [None]:
sum(df['audio_time_base0'] != 1)

In [None]:
pd.crosstab(df['audio_sample_rate'], df['label'])

In [None]:
pd.crosstab(df['audio_start_time'], df['label'])

In [None]:
pd.crosstab(df['audio_duration_ts'], df['label'])

In [None]:
pd.crosstab(df['audio_duration'], df['label'])

In [None]:
pd.crosstab(df['audio_nb_frames'], df['label'])

In [125]:
pd.crosstab([df['audio_sample_rate'], df['audio_start_time'], df['audio_duration'], df['audio_duration_ts'], df['audio_nb_frames']], df['label'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,label,0,1
audio_sample_rate,audio_start_time,audio_duration,audio_duration_ts,audio_nb_frames,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.0,0.0,0,0,13,17
16000,-0.064,0.604,9664,10,0,1
16000,-0.064,10.064,161024,158,0,5248
44100,-0.02322,10.02322,442024,432,1905,16568
48000,-0.021333,0.562667,27008,27,7,8
48000,-0.021333,4.578667,219776,215,1,1
48000,-0.021333,10.021333,481024,470,17228,78149


In [None]:
df['audio_duration_diff'] = df['audio_duration_ts'] - df['audio_duration'] * df['audio_sample_rate']
pd.crosstab(df['audio_duration_diff'], df['label'])

In [None]:
df['audio_start_diff'] = df['audio_start_pts'] - df['audio_start_time'] * df['audio_sample_rate']
pd.crosstab(df['audio_start_diff'], df['label'])

In [127]:
df['audio_timing_diff'] = (df['audio_duration_ts'] + df['audio_start_pts']) - (df['audio_duration'] + df['audio_start_time']) * df['audio_sample_rate']
pd.crosstab(df['audio_timing_diff'], df['label'])

label,0,1
audio_timing_diff,Unnamed: 1_level_1,Unnamed: 2_level_1
-0.032,1,1
-0.032,7,8
0.0,19146,99983


In [None]:
df['audio_ts_diff'] = df['audio_duration_ts'] + df['audio_start_pts'] - df['audio_sample_rate'] * 10
pd.crosstab(df['audio_ts_diff'], df['label'])

In [None]:
df['audio_duration_frames_ratio'] = (df['audio_duration_ts'] + df['audio_start_pts']) / (df['audio_nb_frames'])
pd.crosstab(df['audio_duration_frames_ratio'], df['label'])

In [None]:
df['audio_duration_frames_ratio'] = df['audio_duration'] * df['audio_sample_rate'] / (1024 * df['audio_nb_frames'])
pd.crosstab(df['audio_duration_frames_ratio'], df['label'])

In [None]:
af, sr = librosa.load(infile, sr=22050)

In [None]:
def read_frames(mov_id, grayscale=True):
    mp4_dir = df[df.index==mov_id]['mp4_dir'].iloc[0]
    movie_path = os.path.join(data_dir, mp4_dir, mov_id)
    num_frames = df[df.index==mov_id]['video_nb_frames'].iloc[0]
#     frame_shape = df[df.index==mov_id]['frame_shape'].iloc[0]
    height = df[df.index==mov_id]['video_height'].iloc[0]
    width = df[df.index==mov_id]['video_width'].iloc[0]
    
    dim = 2 if grayscale else 3
    if grayscale:
        frames = np.empty((num_frames, height, width), dtype=np.uint8)
    else:
        frames = np.empty((num_frames, height, width, 3), dtype=np.uint8)
    capture = cv.VideoCapture(movie_path)
    for i in range(num_frames):
        ret, frame = capture.read()
        if not ret:
            print('bad frame', i)
            continue
#         print(i, frame.shape)
        frame = cv.cvtColor(frame, cv.COLOR_BGR2GRAY if grayscale else cv.COLOR_BGR2RGB)
        frames[i] = frame
    capture.release()
    return frames

def splot(fake_frames, orig_frames, i_frame):
    fake = fake_frames[i_frame]
    orig = orig_frames[i_frame]
    img = np.concatenate((fake, orig), axis=1)
    plt.imshow(img)
    
def dplot(fake_frames, orig_frames, i_frame):
    fake = fake_frames[i_frame]
    orig = orig_frames[i_frame]
    img = fuzzy_join(fake, orig)
    img = (img * (255.0/np.max(img))).astype('uint8')
    plt.imshow(img)
    
def tplot(fake_frames, orig_frames, i_fake_frame, i_orig_frame):
    fake = fake_frames[i_fake_frame]
    orig = orig_frames[i_orig_frame]
    img = fuzzy_join(fake, orig)
    img = (img * (255.0/np.max(img))).astype('uint8')
    plt.imshow(img)

def sizeof(obj):
    """https://github.com/mwojnars/nifty/blob/master/util.py"""
    size = sys.getsizeof(obj)
    if isinstance(obj, dict): return size + sum(map(sizeof, obj.keys())) + sum(map(sizeof, obj.values()))
    if isinstance(obj, (list, tuple, set, frozenset)): return size + sum(map(sizeof, obj))
    return size

In [None]:
# fake_file = 'skfnhumwpn.mp4'
# fake_file = 'gobvnzkjaf.mp4'
# fake_file = 'ctpexqamtx.mp4'
# fake_file = 'ebffuhuesn.mp4'
# fake_file = 'tolotyowoy.mp4'
# fake_file = 'lymrhwqqyw.mp4'
fake_file = 'ahbweevwpv.mp4'
fake_interval = 1000 / fraction2float(df[df.index==fake_file]['video_r_frame_rate'].iloc[0])
mp4_dir = df[df.index==fake_file]['mp4_dir'].iloc[0]
orig_file = df[df.index==fake_file]['original'].iloc[0]
orig_interval = 1000 / fraction2float(df[df.index==orig_file]['video_r_frame_rate'].iloc[0])
assert fake_interval == orig_interval
print(fake_interval)
df[df.index==fake_file].T

In [None]:
extract_metadata(os.path.join(data_dir, mp4_dir), fake_file)

In [None]:
from IPython.display import Video

In [None]:
mp4_dir = df[df.index==fake_file]['mp4_dir'].iloc[0]
full_file = os.path.join(data_dir, mp4_dir, fake_file)
print(full_file)
Video(full_file, embed=True)

In [None]:
fake_frames = read_frames(fake_file)
print(fake_file, fake_frames.shape)

In [None]:
orig_frames = read_frames(orig_file)
print(orig_file, orig_frames.shape)

In [None]:
fakeorig_frames = fuzzy_join(fake_frames[1:], orig_frames[1:])

In [None]:
dfake_frames = fuzzy_join(fake_frames[:-1], fake_frames[1:])
fake_frames = fake_frames[1:]

In [None]:
dorig_frames = fuzzy_join(orig_frames[:-1], orig_frames[1:])
orig_frames = orig_frames[1:]

In [None]:
fig = plt.figure(figsize=(13, 13))
plt.imshow(video[-1,:,:])

In [None]:
from matplotlib import animation
from IPython.display import HTML

# np array with shape (frames, height, width, channels)

axis = 2 if fake_frames.shape[1] > fake_frames.shape[2] else 1
video = np.concatenate((dfake_frames, fake_frames, fakeorig_frames), axis=axis)

fig = plt.figure(figsize=(13, 13))
im = plt.imshow(video[0,:,:])

plt.close() # this is required to not display the generated image

def init():
    im.set_data(video[0,:,:])

def animate(i):
    im.set_data(video[i,:,:])
    return im

anim = animation.FuncAnimation(fig, animate, init_func=init, frames=video.shape[0], interval=fake_interval)
HTML(anim.to_html5_video())


In [None]:
assert fake_frames.shape == orig_frames.shape
n_frames, *_ = fake_frames.shape

In [None]:
fuzzdiff = np.zeros((n_frames,))
for i in range(n_frames):
    fuzzdiff[i] = fuzzy_diff(fake_frames[i], orig_frames[i])

In [None]:
plt.figure(figsize=(15, 5))
plt.plot(fuzzdiff);

In [None]:
i_frame = np.argmax(fuzzdiff)
i_frame

In [None]:
new_width, new_height = 120, 120
fuzzdiff = np.zeros((n_frames,))
for i in range(n_frames):
    dim = (new_width, new_height)
    fake_resized = cv.resize(fake_frames[i], dim, interpolation=cv.INTER_NEAREST)
    orig_resized = cv.resize(orig_frames[i], dim, interpolation=cv.INTER_NEAREST)
    fuzzdiff[i] = fuzzy_diff(fake_resized, orig_resized)

In [None]:
plt.figure(figsize=(15, 5))
plt.plot(fuzzdiff);

In [None]:
i_frame = 94

In [None]:
plt.figure(figsize=(20, 10))
splot(fake_frames, orig_frames, i_frame-1)

In [None]:
plt.figure(figsize=(20, 10))
splot(fake_frames, orig_frames, i_frame)

In [None]:
plt.figure(figsize=(20, 10))
splot(fake_frames, orig_frames, i_frame+1)

In [None]:
plt.figure(figsize=(10, 10))
dplot(fake_frames, orig_frames, i_frame)

In [None]:
i_frame = 96
plt.figure(figsize=(10, 10))
tplot(fake_frames, fake_frames, i_frame, i_frame+1)

In [None]:
plt.figure(figsize=(10, 10))
tplot(orig_frames, orig_frames, i_frame, i_frame+1)