In [1]:
!pip install opencv-python pillow numpy matplotlib

Collecting opencv-python
  Downloading opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl.metadata (20 kB)
Downloading opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl (54.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.8/54.8 MB[0m [31m933.7 kB/s[0m eta [36m0:00:00[0m00:01[0m00:02[0m
[?25hInstalling collected packages: opencv-python
Successfully installed opencv-python-4.10.0.84


In [2]:
!pip install tqdm


Collecting tqdm
  Using cached tqdm-4.66.5-py3-none-any.whl.metadata (57 kB)
Using cached tqdm-4.66.5-py3-none-any.whl (78 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.66.5


In [13]:
import argparse
import os
import urllib
import urllib.request
import tempfile
import time
import sys
import json
import random
from tqdm import tqdm
from os.path import join

# URLs and filenames
FILELIST_URL = 'misc/filelist.json'
DEEPFEAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'
DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5']

# Parameters
DATASETS = {
    'original_youtube_videos': 'misc/downloaded_youtube_videos.zip',
    'original_youtube_videos_info': 'misc/downloaded_youtube_videos_info.zip',
    'original': 'original_sequences/youtube',
    'DeepFakeDetection_original': 'original_sequences/actors',
    'Deepfakes': 'manipulated_sequences/Deepfakes',
    'DeepFakeDetection': 'manipulated_sequences/DeepFakeDetection',
    'Face2Face': 'manipulated_sequences/Face2Face',
    'FaceShifter': 'manipulated_sequences/FaceShifter',
    'FaceSwap': 'manipulated_sequences/FaceSwap',
    'NeuralTextures': 'manipulated_sequences/NeuralTextures'
}
ALL_DATASETS = ['original', 'DeepFakeDetection_original', 'Deepfakes',
                'DeepFakeDetection', 'Face2Face', 'FaceShifter', 'FaceSwap',
                'NeuralTextures']
COMPRESSION = ['raw', 'c23', 'c40']
TYPE = ['videos', 'masks', 'models']
SERVERS = ['EU', 'EU2', 'CA']

def parse_args():
    if 'ipykernel' in sys.modules:
        # Running in Jupyter, bypass argparse
        class Args:
            output_path = 'your_default_output_path'  # Set your default value
            dataset = 'all'
            compression = 'raw'
            type = 'videos'
            num_videos = None
            server = 'EU'
            tos_url = 'https://github.com/ondyari/FaceForensics'  # Set the TOS URL

        return Args()

    else:
        # Regular argparse
        parser = argparse.ArgumentParser(
            description='Downloads FaceForensics v2 public data release.',
            formatter_class=argparse.ArgumentDefaultsHelpFormatter
        )
        parser.add_argument('output_path', type=str, help='Output directory.')
        parser.add_argument('-d', '--dataset', type=str, default='all',
                            choices=list(DATASETS.keys()) + ['all'])
        parser.add_argument('-c', '--compression', type=str, default='raw',
                            choices=COMPRESSION)
        parser.add_argument('-t', '--type', type=str, default='videos',
                            choices=TYPE)
        parser.add_argument('-n', '--num_videos', type=int, default=None)
        parser.add_argument('--server', type=str, default='EU',
                            choices=SERVERS)
        parser.add_argument('--tos_url', type=str,
                            default='https://github.com/ondyari/FaceForensics',
                            help='URL to the terms of service.')

        args, unknown = parser.parse_known_args()
        return args

def download_files(filenames, base_url, output_path, report_progress=True):
    os.makedirs(output_path, exist_ok=True)
    if report_progress:
        filenames = tqdm(filenames)
    for filename in filenames:
        download_file(base_url + filename, join(output_path, filename))

def reporthook(count, block_size, total_size):
    global start_time
    if count == 0:
        start_time = time.time()
        return
    duration = time.time() - start_time
    progress_size = int(count * block_size)
    speed = int(progress_size / (1024 * duration))
    percent = int(count * block_size * 100 / total_size)
    sys.stdout.write("\rProgress: %d%%, %d MB, %d KB/s, %d seconds passed" %
                     (percent, progress_size / (1024 * 1024), speed, duration))
    sys.stdout.flush()

def download_file(url, out_file, report_progress=False):
    out_dir = os.path.dirname(out_file)
    if not os.path.isfile(out_file):
        fh, out_file_tmp = tempfile.mkstemp(dir=out_dir)
        f = os.fdopen(fh, 'w')
        f.close()
        if report_progress:
            urllib.request.urlretrieve(url, out_file_tmp,
                                       reporthook=reporthook)
        else:
            urllib.request.urlretrieve(url, out_file_tmp)
        os.rename(out_file_tmp, out_file)
    else:
        tqdm.write('WARNING: skipping download of existing file ' + out_file)

def main(args):
    # TOS
    print('By pressing any key to continue you confirm that you have agreed ' \
          'to the FaceForensics terms of use as described at:')
    print(args.tos_url)
    print('***')
    print('Press any key to continue, or CTRL-C to exit.')
    _ = input('')

    # Extract arguments
    c_datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS
    c_type = args.type
    c_compression = args.compression
    num_videos = args.num_videos
    output_path = args.output_path
    os.makedirs(output_path, exist_ok=True)

    # Check for special dataset cases
    for dataset in c_datasets:
        dataset_path = DATASETS[dataset]
        # Special cases
        if 'original_youtube_videos' in dataset:
            # Here we download the original youtube videos zip file
            print('Downloading original youtube videos.')
            if not 'info' in dataset_path:
                print('Please be patient, this may take a while (~40gb)')
                suffix = ''
            else:
                suffix = 'info'
            download_file(args.base_url + '/' + dataset_path,
                          out_file=join(output_path,
                                        'downloaded_videos{}.zip'.format(
                                            suffix)),
                          report_progress=True)
            return

        # Else: regular datasets
        print('Downloading {} of dataset "{}"'.format(
            c_type, dataset_path
        ))

        # Get filelists and video lengths list from server
        if 'DeepFakeDetection' in dataset_path or 'actors' in dataset_path:
            filepaths = json.loads(urllib.request.urlopen(args.base_url + '/' +
                                                          DEEPFEAKES_DETECTION_URL).read().decode("utf-8"))
            if 'actors' in dataset_path:
                filelist = filepaths['actors']
            else:
                filelist = filepaths['DeepFakesDetection']
        elif 'original' in dataset_path:
            # Load filelist from server
            file_pairs = json.loads(urllib.request.urlopen(args.base_url + '/' +
                                                           FILELIST_URL).read().decode("utf-8"))
            filelist = []
            for pair in file_pairs:
                filelist += pair
        else:
            # Load filelist from server
            file_pairs = json.loads(urllib.request.urlopen(args.base_url + '/' +
                                                           FILELIST_URL).read().decode("utf-8"))
            # Get filelist
            filelist = []
            for pair in file_pairs:
                filelist.append('_'.join(pair))
                if c_type != 'models':
                    filelist.append('_'.join(pair[::-1]))
        # Maybe limit number of videos for download
        if num_videos is not None and num_videos > 0:
            print('Downloading the first {} videos'.format(num_videos))
            filelist = filelist[:num_videos]

        # Server and local paths
        dataset_videos_url = args.base_url + '{}/{}/{}/'.format(
            dataset_path, c_compression, c_type)
        dataset_mask_url = args.base_url + '{}/{}/videos/'.format(
            dataset_path, 'masks', c_type)

        if c_type == 'videos':
            dataset_output_path = join(output_path, dataset_path, c_compression,
                                       c_type)
            print('Output path: {}'.format(dataset_output_path))
            filelist = [filename + '.mp4' for filename in filelist]
            download_files(filelist, dataset_videos_url, dataset_output_path)
        elif c_type == 'masks':
            dataset_output_path = join(output_path, dataset_path, c_type,
                                       'videos')
            print('Output path: {}'.format(dataset_output_path))
            if 'original' in dataset:
                if args.dataset != 'all':
                    print('Only videos available for original data. Aborting.')
                    return
                else:
                    print('Only videos available for original data. '
                          'Skipping original.\n')
                    continue
            if 'FaceShifter' in dataset:
                print('Masks not available for FaceShifter. Aborting.')
                return
            filelist = [filename + '.mp4' for filename in filelist]
            download_files(filelist, dataset_mask_url, dataset_output_path)

        # Else: models for deepfakes
        else:
            if dataset != 'Deepfakes' and c_type == 'models':
                print('Models only available for Deepfakes. Aborting')
                return
            dataset_output_path = join(output_path, dataset_path, c_type)
            print('Output path: {}'.format(dataset_output_path))

            # Get Deepfakes models
            for folder in tqdm(filelist):
                folder_filelist = DEEPFAKES_MODEL_NAMES
                dataset_videos_url = args.base_url + \
                                     '{}/models/{}/'.format(
                                         dataset_path, folder)
                download_files(folder_filelist, dataset_videos_url,
                               join(dataset_output_path, folder))

    print('Done.')

if __name__ == "__main__":
    args = parse_args()
    main(args)


By pressing any key to continue you confirm that you have agreed to the FaceForensics terms of use as described at:
https://github.com/ondyari/FaceForensics
***
Press any key to continue, or CTRL-C to exit.


KeyboardInterrupt: Interrupted by user

In [14]:

import argparse
import os
import urllib.request
import tempfile
import time
import sys
import json
from tqdm import tqdm
from os.path import join

# URLs and filenames
FILELIST_URL = 'misc/filelist.json'
DEEPFAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'
DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5']

# Parameters
DATASETS = {
    'original_youtube_videos': 'misc/downloaded_youtube_videos.zip',
    'original_youtube_videos_info': 'misc/downloaded_youtube_videos_info.zip',
    'original': 'original_sequences/youtube',
    'DeepFakeDetection_original': 'original_sequences/actors',
    'Deepfakes': 'manipulated_sequences/Deepfakes',
    'DeepFakeDetection': 'manipulated_sequences/DeepFakeDetection',
    'Face2Face': 'manipulated_sequences/Face2Face',
    'FaceShifter': 'manipulated_sequences/FaceShifter',
    'FaceSwap': 'manipulated_sequences/FaceSwap',
    'NeuralTextures': 'manipulated_sequences/NeuralTextures'
}
ALL_DATASETS = ['original', 'DeepFakeDetection_original', 'Deepfakes',
                'DeepFakeDetection', 'Face2Face', 'FaceShifter', 'FaceSwap',
                'NeuralTextures']
COMPRESSION = ['raw', 'c23', 'c40']
TYPE = ['videos', 'masks', 'models']
SERVERS = ['EU', 'EU2', 'CA']

def parse_args():
    parser = argparse.ArgumentParser(
        description='Downloads FaceForensics v2 public data release.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument('output_path', type=str, help='Output directory.')
    parser.add_argument('-d', '--dataset', type=str, default='all',
                        help='Which dataset to download.',
                        choices=list(DATASETS.keys()) + ['all'])
    parser.add_argument('-c', '--compression', type=str, default='raw',
                        help='Which compression degree.',
                        choices=COMPRESSION)
    parser.add_argument('-t', '--type', type=str, default='videos',
                        help='Which file type.',
                        choices=TYPE)
    parser.add_argument('-n', '--num_videos', type=int, default=None,
                        help='Number of videos to download.')
    parser.add_argument('--server', type=str, default='EU',
                        help='Server to download from.',
                        choices=SERVERS)
    args = parser.parse_args()

    # URLs
    server = args.server
    if server == 'EU':
        args.base_url = 'http://canis.vc.in.tum.de:8100/v3/'
    elif server == 'EU2':
        args.base_url = 'http://kaldir.vc.in.tum.de/faceforensics/v3/'
    elif server == 'CA':
        args.base_url = 'http://falas.cmpt.sfu.ca:8100/v3/'
    else:
        raise Exception('Wrong server name. Choices: {}'.format(str(SERVERS)))

    args.tos_url = args.base_url.replace('v3/', 'webpage/FaceForensics_TOS.pdf')
    args.deepfakes_model_url = args.base_url + 'manipulated_sequences/Deepfakes/models/'

    return args

def download_files(filenames, base_url, output_path, report_progress=True):
    os.makedirs(output_path, exist_ok=True)
    if report_progress:
        filenames = tqdm(filenames)
    for filename in filenames:
        download_file(base_url + filename, join(output_path, filename))

def reporthook(count, block_size, total_size):
    global start_time
    if count == 0:
        start_time = time.time()
        return
    duration = time.time() - start_time
    progress_size = int(count * block_size)
    speed = int(progress_size / (1024 * duration))
    percent = int(count * block_size * 100 / total_size)
    sys.stdout.write("\rProgress: %d%%, %d MB, %d KB/s, %d seconds passed" %
                     (percent, progress_size / (1024 * 1024), speed, duration))
    sys.stdout.flush()

def download_file(url, out_file, report_progress=False):
    out_dir = os.path.dirname(out_file)
    if not os.path.isfile(out_file):
        fh, out_file_tmp = tempfile.mkstemp(dir=out_dir)
        f = os.fdopen(fh, 'w')
        f.close()
        if report_progress:
            urllib.request.urlretrieve(url, out_file_tmp, reporthook=reporthook)
        else:
            urllib.request.urlretrieve(url, out_file_tmp)
        os.rename(out_file_tmp, out_file)
    else:
        tqdm.write('WARNING: skipping download of existing file ' + out_file)

def main(args):
    # TOS
    print('By pressing any key to continue you confirm that you have agreed ' \
          'to the FaceForensics terms of use as described at:')
    print(args.tos_url)
    print('***')
    print('Press any key to continue, or CTRL-C to exit.')
    _ = input('')

    # Extract arguments
    c_datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS
    c_type = args.type
    c_compression = args.compression
    num_videos = args.num_videos
    output_path = args.output_path
    os.makedirs(output_path, exist_ok=True)

    # Check for special dataset cases
    for dataset in c_datasets:
        dataset_path = DATASETS[dataset]
        # Special cases
        if 'original_youtube_videos' in dataset:
            print('Downloading original youtube videos.')
            if not 'info' in dataset_path:
                print('Please be patient, this may take a while (~40gb)')
                suffix = ''
            else:
                suffix = 'info'
            download_file(args.base_url + dataset_path,
                          out_file=join(output_path, 'downloaded_videos{}.zip'.format(suffix)),
                          report_progress=True)
            return

        # Else: regular datasets
        print('Downloading {} of dataset "{}"'.format(c_type, dataset_path))

        # Get filelists and video lengths list from server
        if 'DeepFakeDetection' in dataset_path or 'actors' in dataset_path:
            filepaths = json.loads(urllib.request.urlopen(args.base_url + DEEPFAKES_DETECTION_URL).read().decode("utf-8"))
            if 'actors' in dataset_path:
                filelist = filepaths['actors']
            else:
                filelist = filepaths['DeepFakesDetection']
        elif 'original' in dataset_path:
            file_pairs = json.loads(urllib.request.urlopen(args.base_url + FILELIST_URL).read().decode("utf-8"))
            filelist = []
            for pair in file_pairs:
                filelist += pair
        else:
            file_pairs = json.loads(urllib.request.urlopen(args.base_url + FILELIST_URL).read().decode("utf-8"))
            filelist = []
            for pair in file_pairs:
                filelist.append('_'.join(pair))
                if c_type != 'models':
                    filelist.append('_'.join(pair[::-1]))

        # Maybe limit the number of videos for download
        if num_videos is not None and num_videos > 0:
            print('Downloading the first {} videos'.format(num_videos))
            filelist = filelist[:num_videos]

        # Server and local paths
        dataset_videos_url = args.base_url + '{}/{}/{}/'.format(dataset_path, c_compression, c_type)
        dataset_mask_url = args.base_url + '{}/{}/videos/'.format(dataset_path, 'masks')

        if c_type == 'videos':
            dataset_output_path = join(output_path, dataset_path, c_compression, c_type)
            print('Output path: {}'.format(dataset_output_path))
            filelist = [filename + '.mp4' for filename in filelist]
            download_files(filelist, dataset_videos_url, dataset_output_path)
        elif c_type == 'masks':
            dataset_output_path = join(output_path, dataset_path, c_type, 'videos')
            print('Output path: {}'.format(dataset_output_path))
            filelist = [filename + '.png' for filename in filelist]
            download_files(filelist, dataset_mask_url, dataset_output_path)
        elif c_type == 'models':
            if 'Deepfakes' in dataset:
                print('Downloading the {} models for Deepfakes'.format(len(DEEPFAKES_MODEL_NAMES)))
                dataset_output_path = join(output_path, dataset_path, c_compression, c_type)
                print('Output path: {}'.format(dataset_output_path))
                download_files(DEEPFAKES_MODEL_NAMES, args.deepfakes_model_url, dataset_output_path)
            else:
                print('No models to download for dataset {}'.format(dataset))
                continue

if __name__ == "__main__":
    args = parse_args()
    main(args)


usage: ipykernel_launcher.py [-h]
                             [-d {original_youtube_videos,original_youtube_videos_info,original,DeepFakeDetection_original,Deepfakes,DeepFakeDetection,Face2Face,FaceShifter,FaceSwap,NeuralTextures,all}]
                             [-c {raw,c23,c40}] [-t {videos,masks,models}]
                             [-n NUM_VIDEOS] [--server {EU,EU2,CA}]
                             output_path
ipykernel_launcher.py: error: unrecognized arguments: -f


SystemExit: 2

In [15]:
import os
import argparse
import sys

# Simulated command-line arguments
sys.argv = [
    'download_FaceForensics.py',  # Script name
    '/Users/aniketsaxena/Documents/p/python/project/deepFakeDetection/Implementation',  # output_path
    '-d', 'all',  # dataset
    '-c', 'c23',  # compression
    '-t', 'videos',  # type
    '--server', 'EU',  # server
]

def parse_args():
    parser = argparse.ArgumentParser(description='Download FaceForensics dataset.')
    parser.add_argument('output_path', type=str, help='Path to output directory')
    parser.add_argument('-d', '--dataset', choices=['original_youtube_videos', 'original_youtube_videos_info', 'original', 'DeepFakeDetection_original', 'Deepfakes', 'DeepFakeDetection', 'Face2Face', 'FaceShifter', 'FaceSwap', 'NeuralTextures', 'all'], default='all', help='Dataset to download')
    parser.add_argument('-c', '--compression', choices=['raw', 'c23', 'c40'], default='c23', help='Compression quality')
    parser.add_argument('-t', '--type', choices=['videos', 'masks', 'models'], default='videos', help='Type of files to download')
    parser.add_argument('-n', '--num_videos', type=int, default=None, help='Number of videos to download')
    parser.add_argument('--server', choices=['EU', 'EU2', 'CA'], default='EU', help='Server to use')

    args = parser.parse_args()
    return args

def main(args):
    # TOS
    print('By pressing any key to continue you confirm that you have agreed ' \
          'to the FaceForensics terms of use as described at:')
    print('https://github.com/ondyari/FaceForensics')
    print('***')
    print('Press any key to continue, or CTRL-C to exit.')
    input()  # Wait for user input

    # Ensure output path exists
    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)

    # Sample processing based on args (replace this with actual download code)
    print(f"Downloading dataset: {args.dataset}")
    print(f"Compression: {args.compression}")
    print(f"File type: {args.type}")
    print(f"Server: {args.server}")
    if args.num_videos:
        print(f"Number of videos: {args.num_videos}")

    # Your actual downloading logic goes here
    # ...

if __name__ == "__main__":
    args = parse_args()
    main(args)


By pressing any key to continue you confirm that you have agreed to the FaceForensics terms of use as described at:
https://github.com/ondyari/FaceForensics
***
Press any key to continue, or CTRL-C to exit.
Downloading dataset: all
Compression: c23
File type: videos
Server: EU


In [16]:
import argparse
import os
import urllib.request
import tempfile
import time
import sys
import json
from tqdm import tqdm
from os.path import join

# URLs and filenames
FILELIST_URL = 'misc/filelist.json'
DEEPFAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'
DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5']

# Parameters
DATASETS = {
    'original_youtube_videos': 'misc/downloaded_youtube_videos.zip',
    'original_youtube_videos_info': 'misc/downloaded_youtube_videos_info.zip',
    'original': 'original_sequences/youtube',
    'DeepFakeDetection_original': 'original_sequences/actors',
    'Deepfakes': 'manipulated_sequences/Deepfakes',
    'DeepFakeDetection': 'manipulated_sequences/DeepFakeDetection',
    'Face2Face': 'manipulated_sequences/Face2Face',
    'FaceShifter': 'manipulated_sequences/FaceShifter',
    'FaceSwap': 'manipulated_sequences/FaceSwap',
    'NeuralTextures': 'manipulated_sequences/NeuralTextures'
}
ALL_DATASETS = ['original', 'DeepFakeDetection_original', 'Deepfakes',
                'DeepFakeDetection', 'Face2Face', 'FaceShifter', 'FaceSwap',
                'NeuralTextures']
COMPRESSION = ['raw', 'c23', 'c40']
TYPE = ['videos', 'masks', 'models']
SERVERS = ['EU', 'EU2', 'CA']

def parse_args():
    parser = argparse.ArgumentParser(
        description='Downloads FaceForensics v2 public data release.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument('output_path', type=str, help='Output directory.')
    parser.add_argument('-d', '--dataset', type=str, default='all',
                        help='Which dataset to download.',
                        choices=list(DATASETS.keys()) + ['all'])
    parser.add_argument('-c', '--compression', type=str, default='raw',
                        help='Which compression degree.',
                        choices=COMPRESSION)
    parser.add_argument('-t', '--type', type=str, default='videos',
                        help='Which file type.',
                        choices=TYPE)
    parser.add_argument('-n', '--num_videos', type=int, default=None,
                        help='Number of videos to download.')
    parser.add_argument('--server', type=str, default='EU',
                        help='Server to download from.',
                        choices=SERVERS)
    args = parser.parse_args()

    # URLs
    server = args.server
    if server == 'EU':
        args.base_url = 'http://canis.vc.in.tum.de:8100/v3/'
    elif server == 'EU2':
        args.base_url = 'http://kaldir.vc.in.tum.de/faceforensics/v3/'
    elif server == 'CA':
        args.base_url = 'http://falas.cmpt.sfu.ca:8100/v3/'
    else:
        raise Exception('Wrong server name. Choices: {}'.format(str(SERVERS)))

    args.tos_url = args.base_url.replace('v3/', 'webpage/FaceForensics_TOS.pdf')
    args.deepfakes_model_url = args.base_url + 'manipulated_sequences/Deepfakes/models/'

    return args

def download_files(filenames, base_url, output_path, report_progress=True):
    os.makedirs(output_path, exist_ok=True)
    if report_progress:
        filenames = tqdm(filenames)
    for filename in filenames:
        download_file(base_url + filename, join(output_path, filename))

def reporthook(count, block_size, total_size):
    global start_time
    if count == 0:
        start_time = time.time()
        return
    duration = time.time() - start_time
    progress_size = int(count * block_size)
    speed = int(progress_size / (1024 * duration))
    percent = int(count * block_size * 100 / total_size)
    sys.stdout.write("\rProgress: %d%%, %d MB, %d KB/s, %d seconds passed" %
                     (percent, progress_size / (1024 * 1024), speed, duration))
    sys.stdout.flush()

def download_file(url, out_file, report_progress=False):
    out_dir = os.path.dirname(out_file)
    if not os.path.isfile(out_file):
        fh, out_file_tmp = tempfile.mkstemp(dir=out_dir)
        f = os.fdopen(fh, 'w')
        f.close()
        if report_progress:
            urllib.request.urlretrieve(url, out_file_tmp, reporthook=reporthook)
        else:
            urllib.request.urlretrieve(url, out_file_tmp)
        os.rename(out_file_tmp, out_file)
    else:
        tqdm.write('WARNING: skipping download of existing file ' + out_file)

def main(args):
    # TOS
    print('By pressing any key to continue you confirm that you have agreed ' \
          'to the FaceForensics terms of use as described at:')
    print(args.tos_url)
    print('***')
    print('Press any key to continue, or CTRL-C to exit.')
    _ = input('')

    # Extract arguments
    c_datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS
    c_type = args.type
    c_compression = args.compression
    num_videos = args.num_videos
    output_path = args.output_path
    os.makedirs(output_path, exist_ok=True)

    # Check for special dataset cases
    for dataset in c_datasets:
        dataset_path = DATASETS[dataset]
        # Special cases
        if 'original_youtube_videos' in dataset:
            print('Downloading original youtube videos.')
            if not 'info' in dataset_path:
                print('Please be patient, this may take a while (~40gb)')
                suffix = ''
            else:
                suffix = 'info'
            download_file(args.base_url + dataset_path,
                          out_file=join(output_path, 'downloaded_videos{}.zip'.format(suffix)),
                          report_progress=True)
            return

        # Else: regular datasets
        print('Downloading {} of dataset "{}"'.format(c_type, dataset_path))

        # Get filelists and video lengths list from server
        if 'DeepFakeDetection' in dataset_path or 'actors' in dataset_path:
            filepaths = json.loads(urllib.request.urlopen(args.base_url + DEEPFAKES_DETECTION_URL).read().decode("utf-8"))
            if 'actors' in dataset_path:
                filelist = filepaths['actors']
            else:
                filelist = filepaths['DeepFakesDetection']
        elif 'original' in dataset_path:
            file_pairs = json.loads(urllib.request.urlopen(args.base_url + FILELIST_URL).read().decode("utf-8"))
            filelist = []
            for pair in file_pairs:
                filelist += pair
        else:
            file_pairs = json.loads(urllib.request.urlopen(args.base_url + FILELIST_URL).read().decode("utf-8"))
            filelist = []
            for pair in file_pairs:
                filelist.append('_'.join(pair))
                if c_type != 'models':
                    filelist.append('_'.join(pair[::-1]))

        # Maybe limit the number of videos for download
        if num_videos is not None and num_videos > 0:
            print('Downloading the first {} videos'.format(num_videos))
            filelist = filelist[:num_videos]

        # Server and local paths
        dataset_videos_url = args.base_url + '{}/{}/{}/'.format(dataset_path, c_compression, c_type)
        dataset_mask_url = args.base_url + '{}/{}/videos/'.format(dataset_path, 'masks')

        if c_type == 'videos':
            dataset_output_path = join(output_path, dataset_path, c_compression, c_type)
            print('Output path: {}'.format(dataset_output_path))
            filelist = [filename + '.mp4' for filename in filelist]
            download_files(filelist, dataset_videos_url, dataset_output_path)
        elif c_type == 'masks':
            dataset_output_path = join(output_path, dataset_path, c_type, 'videos')
            print('Output path: {}'.format(dataset_output_path))
            filelist = [filename + '.png' for filename in filelist]
            download_files(filelist, dataset_mask_url, dataset_output_path)
        elif c_type == 'models':
            if 'Deepfakes' in dataset:
                print('Downloading the {} models for Deepfakes'.format(len(DEEPFAKES_MODEL_NAMES)))
                dataset_output_path = join(output_path, dataset_path, c_compression, c_type)
                print('Output path: {}'.format(dataset_output_path))
                download_files(DEEPFAKES_MODEL_NAMES, args.deepfakes_model_url, dataset_output_path)
            else:
                print('No models to download for dataset {}'.format(dataset))
                continue

if __name__ == "__main__":
    args = parse_args()
    main(args)



By pressing any key to continue you confirm that you have agreed to the FaceForensics terms of use as described at:
http://canis.vc.in.tum.de:8100/webpage/FaceForensics_TOS.pdf
***
Press any key to continue, or CTRL-C to exit.
Downloading videos of dataset "original_sequences/youtube"


URLError: <urlopen error [Errno 61] Connection refused>

In [23]:
import argparse
import os
import urllib.request
import tempfile
import time
import sys
import json
from tqdm import tqdm
from os.path import join
from urllib.error import URLError

# URLs and filenames
FILELIST_URL = 'misc/filelist.json'
DEEPFAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'
DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5']

# Parameters
DATASETS = {
    'original_youtube_videos': 'misc/downloaded_youtube_videos.zip',
    'original_youtube_videos_info': 'misc/downloaded_youtube_videos_info.zip',
    'original': 'original_sequences/youtube',
    'DeepFakeDetection_original': 'original_sequences/actors',
    'Deepfakes': 'manipulated_sequences/Deepfakes',
    'DeepFakeDetection': 'manipulated_sequences/DeepFakeDetection',
    'Face2Face': 'manipulated_sequences/Face2Face',
    'FaceShifter': 'manipulated_sequences/FaceShifter',
    'FaceSwap': 'manipulated_sequences/FaceSwap',
    'NeuralTextures': 'manipulated_sequences/NeuralTextures'
}
ALL_DATASETS = ['original', 'DeepFakeDetection_original', 'Deepfakes',
                'DeepFakeDetection', 'Face2Face', 'FaceShifter', 'FaceSwap',
                'NeuralTextures']
COMPRESSION = ['raw', 'c23', 'c40']
TYPE = ['videos', 'masks', 'models']
SERVERS = ['EU', 'EU2', 'CA']

def parse_args():
    parser = argparse.ArgumentParser(
        description='Downloads FaceForensics v2 public data release.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument('output_path', type=str, help='Output directory.')
    parser.add_argument('-d', '--dataset', type=str, default='all',
                        help='Which dataset to download.',
                        choices=list(DATASETS.keys()) + ['all'])
    parser.add_argument('-c', '--compression', type=str, default='raw',
                        help='Which compression degree.',
                        choices=COMPRESSION)
    parser.add_argument('-t', '--type', type=str, default='videos',
                        help='Which file type.',
                        choices=TYPE)
    parser.add_argument('-n', '--num_videos', type=int, default=None,
                        help='Number of videos to download.')
    parser.add_argument('--server', type=str, default='EU',
                        help='Server to download from.',
                        choices=SERVERS)
    parser.add_argument('--retry', type=int, default=3,
                        help='Number of retries if a download fails.')
    args = parser.parse_args()

    # URLs
    server = args.server
    if server == 'EU':
        args.base_url = 'http://canis.vc.in.tum.de:8100/v3/'
    elif server == 'EU2':
        args.base_url = 'http://kaldir.vc.in.tum.de/faceforensics/v3/'
    elif server == 'CA':
        args.base_url = 'http://falas.cmpt.sfu.ca:8100/v3/'
    else:
        raise Exception('Wrong server name. Choices: {}'.format(str(SERVERS)))

    args.tos_url = args.base_url.replace('v3/', 'webpage/FaceForensics_TOS.pdf')
    args.deepfakes_model_url = args.base_url + 'manipulated_sequences/Deepfakes/models/'

    return args

def download_files(filenames, base_url, output_path, retry_limit=3):
    os.makedirs(output_path, exist_ok=True)
    filenames = tqdm(filenames)
    for filename in filenames:
        success = False
        for attempt in range(retry_limit):
            try:
                download_file(base_url + filename, join(output_path, filename))
                success = True
                break
            except (URLError, ConnectionRefusedError) as e:
                print(f"Error downloading {filename}: {e}. Retrying ({attempt + 1}/{retry_limit})...")
                time.sleep(2)  # Wait before retrying
        if not success:
            print(f"Failed to download {filename} after {retry_limit} attempts.")

def reporthook(count, block_size, total_size):
    global start_time
    if count == 0:
        start_time = time.time()
        return
    duration = time.time() - start_time
    progress_size = int(count * block_size)
    speed = int(progress_size / (1024 * duration))
    percent = int(count * block_size * 100 / total_size)
    sys.stdout.write("\rProgress: %d%%, %d MB, %d KB/s, %d seconds passed" %
                     (percent, progress_size / (1024 * 1024), speed, duration))
    sys.stdout.flush()

def download_file(url, out_file, report_progress=False):
    out_dir = os.path.dirname(out_file)
    if not os.path.isfile(out_file):
        fh, out_file_tmp = tempfile.mkstemp(dir=out_dir)
        f = os.fdopen(fh, 'w')
        f.close()
        if report_progress:
            urllib.request.urlretrieve(url, out_file_tmp, reporthook=reporthook)
        else:
            urllib.request.urlretrieve(url, out_file_tmp)
        os.rename(out_file_tmp, out_file)
    else:
        tqdm.write('WARNING: skipping download of existing file ' + out_file)

def main(args):
    # TOS
    print('By pressing any key to continue you confirm that you have agreed ' \
          'to the FaceForensics terms of use as described at:')
    print(args.tos_url)
    print('***')
    print('Press any key to continue, or CTRL-C to exit.')
    _ = input('')

    # Extract arguments
    c_datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS
    c_type = args.type
    c_compression = args.compression
    num_videos = args.num_videos
    output_path = args.output_path
    os.makedirs(output_path, exist_ok=True)

    # Check for special dataset cases
    for dataset in c_datasets:
        dataset_path = DATASETS[dataset]
        # Special cases
        if 'original_youtube_videos' in dataset:
            print('Downloading original youtube videos.')
            if not 'info' in dataset_path:
                print('Please be patient, this may take a while (~40gb)')
                suffix = ''
            else:
                suffix = 'info'
            download_file(args.base_url + dataset_path,
                          out_file=join(output_path, 'downloaded_videos{}.zip'.format(suffix)),
                          report_progress=True)
            return

        # Else: regular datasets
        print('Downloading {} of dataset "{}"'.format(c_type, dataset_path))

        # Get filelists and video lengths list from server
        try:
            if 'DeepFakeDetection' in dataset_path or 'actors' in dataset_path:
                filepaths = json.loads(urllib.request.urlopen(args.base_url + DEEPFAKES_DETECTION_URL).read().decode("utf-8"))
                if 'actors' in dataset_path:
                    filelist = filepaths['actors']
                else:
                    filelist = filepaths['DeepFakesDetection']
            elif 'original' in dataset_path:
                file_pairs = json.loads(urllib.request.urlopen(args.base_url + FILELIST_URL).read().decode("utf-8"))
                filelist = []
                for pair in file_pairs:
                    filelist += pair
            else:
                file_pairs = json.loads(urllib.request.urlopen(args.base_url + FILELIST_URL).read().decode("utf-8"))
                filelist = []
                for pair in file_pairs:
                    filelist.append('_'.join(pair))
                    if c_type != 'models':
                        filelist.append('_'.join(pair[::-1]))

            # Maybe limit the number of videos for download
            if num_videos is not None and num_videos > 0:
                print('Downloading the first {} videos'.format(num_videos))
                filelist = filelist[:num_videos]

            # Server and local paths
            dataset_videos_url = args.base_url + '{}/{}/{}/'.format(dataset_path, c_compression, c_type)
            dataset_mask_url = args.base_url + '{}/{}/videos/'.format(dataset_path, 'masks')

            if c_type == 'videos':
                dataset_output_path = join(output_path, dataset_path, c_compression, c_type)
                print('Output path: {}'.format(dataset_output_path))
                filelist = [filename + '.mp4' for filename in filelist]
                download_files(filelist, dataset_videos_url, dataset_output_path, retry_limit=args.retry)
            elif c_type == 'masks':
                dataset_output_path = join(output_path, dataset_path, c_type, 'videos')
                print('Output path: {}'.format(dataset_output_path))
                filelist = [filename + '.png' for filename in filelist]
                download_files(filelist, dataset_mask_url, dataset_output_path, retry_limit=args.retry)
            elif c_type == 'models':
                if 'Deepfakes' in dataset:
                    print('Downloading the {} models for Deepfakes'.format(len(DEEPFAKES_MODEL_NAMES)))
                    dataset_output_path = join(output_path, dataset_path, c_compression, c_type)
                    print('Output path: {}'.format(dataset_output_path))
                    download_files(DEEPFAKES_MODEL_NAMES, args.deepfakes_model_url, dataset_output_path, retry_limit=args.retry)
                else:
                    raise Exception('No model files exist for dataset "{}"'.format(dataset))
            else:
                raise Exception('Unknown data type "{}"'.format(c_type))
        except URLError as e:
            print(f"Failed to retrieve file list: {e}")
            print("Please check your network connection or try a different server.")
            sys.exit(1)

if __name__ == "__main__":
    args = parse_args()
    main(args)


By pressing any key to continue you confirm that you have agreed to the FaceForensics terms of use as described at:
http://canis.vc.in.tum.de:8100/webpage/FaceForensics_TOS.pdf
***
Press any key to continue, or CTRL-C to exit.
Downloading videos of dataset "original_sequences/youtube"
Failed to retrieve file list: <urlopen error [Errno 61] Connection refused>
Please check your network connection or try a different server.


AttributeError: 'tuple' object has no attribute 'tb_frame'

In [24]:
import traceback

def main(args):
    try:
        # TOS
        print('By pressing any key to continue you confirm that you have agreed ' \
              'to the FaceForensics terms of use as described at:')
        print(args.tos_url)
        print('***')
        print('Press any key to continue, or CTRL-C to exit.')
        _ = input('')

        # Extract arguments
        c_datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS
        c_type = args.type
        c_compression = args.compression
        num_videos = args.num_videos
        output_path = args.output_path
        os.makedirs(output_path, exist_ok=True)

        # Check for special dataset cases
        for dataset in c_datasets:
            dataset_path = DATASETS[dataset]
            # Special cases
            if 'original_youtube_videos' in dataset:
                print('Downloading original youtube videos.')
                if not 'info' in dataset_path:
                    print('Please be patient, this may take a while (~40gb)')
                    suffix = ''
                else:
                    suffix = 'info'
                download_file(args.base_url + dataset_path,
                              out_file=join(output_path, 'downloaded_videos{}.zip'.format(suffix)),
                              report_progress=True)
                return

            # Else: regular datasets
            print('Downloading {} of dataset "{}"'.format(c_type, dataset_path))

            # Get filelists and video lengths list from server
            try:
                if 'DeepFakeDetection' in dataset_path or 'actors' in dataset_path:
                    filepaths = json.loads(urllib.request.urlopen(args.base_url + DEEPFAKES_DETECTION_URL).read().decode("utf-8"))
                    if 'actors' in dataset_path:
                        filelist = filepaths['actors']
                    else:
                        filelist = filepaths['DeepFakesDetection']
                elif 'original' in dataset_path:
                    file_pairs = json.loads(urllib.request.urlopen(args.base_url + FILELIST_URL).read().decode("utf-8"))
                    filelist = []
                    for pair in file_pairs:
                        filelist += pair
                else:
                    file_pairs = json.loads(urllib.request.urlopen(args.base_url + FILELIST_URL).read().decode("utf-8"))
                    filelist = []
                    for pair in file_pairs:
                        filelist.append('_'.join(pair))
                        if c_type != 'models':
                            filelist.append('_'.join(pair[::-1]))

                # Maybe limit the number of videos for download
                if num_videos is not None and num_videos > 0:
                    print('Downloading the first {} videos'.format(num_videos))
                    filelist = filelist[:num_videos]

                # Server and local paths
                dataset_videos_url = args.base_url + '{}/{}/{}/'.format(dataset_path, c_compression, c_type)
                dataset_mask_url = args.base_url + '{}/{}/videos/'.format(dataset_path, 'masks')

                if c_type == 'videos':
                    dataset_output_path = join(output_path, dataset_path, c_compression, c_type)
                    print('Output path: {}'.format(dataset_output_path))
                    filelist = [filename + '.mp4' for filename in filelist]
                    download_files(filelist, dataset_videos_url, dataset_output_path, retry_limit=args.retry)
                elif c_type == 'masks':
                    dataset_output_path = join(output_path, dataset_path, c_type, 'videos')
                    print('Output path: {}'.format(dataset_output_path))
                    filelist = [filename + '.png' for filename in filelist]
                    download_files(filelist, dataset_mask_url, dataset_output_path, retry_limit=args.retry)
                elif c_type == 'models':
                    if 'Deepfakes' in dataset:
                        print('Downloading the {} models for Deepfakes'.format(len(DEEPFAKES_MODEL_NAMES)))
                        dataset_output_path = join(output_path, dataset_path, c_compression, c_type)
                        print('Output path: {}'.format(dataset_output_path))
                        download_files(DEEPFAKES_MODEL_NAMES, args.deepfakes_model_url, dataset_output_path, retry_limit=args.retry)
                    else:
                        raise Exception('No model files exist for dataset "{}"'.format(dataset))
                else:
                    raise Exception('Unknown data type "{}"'.format(c_type))
            except URLError as e:
                print(f"Failed to retrieve file list: {e}")
                print("Please check your network connection or try a different server.")
                traceback.print_exc()
                sys.exit(1)
    except Exception as e:
        print(f"An error occurred: {e}")
        traceback.print_exc()
        sys.exit(1)

if __name__ == "__main__":
    args = parse_args()
    main(args)


By pressing any key to continue you confirm that you have agreed to the FaceForensics terms of use as described at:
http://canis.vc.in.tum.de:8100/webpage/FaceForensics_TOS.pdf
***
Press any key to continue, or CTRL-C to exit.
Downloading videos of dataset "original_sequences/youtube"
Failed to retrieve file list: <urlopen error [Errno 61] Connection refused>
Please check your network connection or try a different server.


Traceback (most recent call last):
  File "/opt/anaconda3/envs/untitled/lib/python3.8/urllib/request.py", line 1354, in do_open
    h.request(req.get_method(), req.selector, req.data, headers,
  File "/opt/anaconda3/envs/untitled/lib/python3.8/http/client.py", line 1256, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/opt/anaconda3/envs/untitled/lib/python3.8/http/client.py", line 1302, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/opt/anaconda3/envs/untitled/lib/python3.8/http/client.py", line 1251, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/opt/anaconda3/envs/untitled/lib/python3.8/http/client.py", line 1011, in _send_output
    self.send(msg)
  File "/opt/anaconda3/envs/untitled/lib/python3.8/http/client.py", line 951, in send
    self.connect()
  File "/opt/anaconda3/envs/untitled/lib/python3.8/http/client.py", line 922, in connect
    self.sock = self._create_

AttributeError: 'tuple' object has no attribute 'tb_frame'

In [1]:
!pip install yt-dlp


Collecting yt-dlp
  Downloading yt_dlp-2024.8.6-py3-none-any.whl.metadata (170 kB)
Collecting mutagen (from yt-dlp)
  Downloading mutagen-1.47.0-py3-none-any.whl.metadata (1.7 kB)
Collecting pycryptodomex (from yt-dlp)
  Downloading pycryptodomex-3.20.0-cp35-abi3-macosx_10_9_universal2.whl.metadata (3.4 kB)
Collecting websockets>=12.0 (from yt-dlp)
  Downloading websockets-13.0.1-cp38-cp38-macosx_11_0_arm64.whl.metadata (6.7 kB)
Downloading yt_dlp-2024.8.6-py3-none-any.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m74.2 kB/s[0m eta [36m0:00:00[0ma [36m0:00:04[0m
[?25hDownloading websockets-13.0.1-cp38-cp38-macosx_11_0_arm64.whl (148 kB)
Downloading mutagen-1.47.0-py3-none-any.whl (194 kB)
Downloading pycryptodomex-3.20.0-cp35-abi3-macosx_10_9_universal2.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m69.6 kB/s[0m eta [36m0:00:00[0ma [36m0:00:02[0m
[?25hInstalling collected package

In [34]:
!pip uninstall -y youtube-dl


Found existing installation: youtube-dl 2021.12.17
Uninstalling youtube-dl-2021.12.17:
  Successfully uninstalled youtube-dl-2021.12.17


In [29]:
import os

# Ensure the output directory exists
output_dir = "test_videos"
os.makedirs(output_dir, exist_ok=True)

# List of YouTube video URLs to download
video_urls = [
    "https://youtu.be/iqji2dEX7No?si=o9Ga2uGePRJrFRwX",  # Sample Video 1
    "https://youtu.be/FYEGvuVWW0o?si=ty4187Vpb94ghhJt",  # Sample Video 2
    "https://youtu.be/5WECsbqAQSk?si=coQZ0h30kGirwAOh"   # Sample Video 3
]

# Download command using youtube-dl
download_command = "youtube-dl -f mp4 -o '{}/%(title)s.%(ext)s' ".format(output_dir)

# Download each video
for url in video_urls:
    os.system(download_command + url)

print("Videos downloaded to:", output_dir)


[youtube] iqji2dEX7No: Downloading webpage


ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see  https://yt-dl.org/update  on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.


[youtube] FYEGvuVWW0o: Downloading webpage


ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see  https://yt-dl.org/update  on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.


[youtube] 5WECsbqAQSk: Downloading webpage
Videos downloaded to: test_videos


ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see  https://yt-dl.org/update  on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.


In [37]:
import os

# Ensure the output directory exists
output_dir = "test_videos"
os.makedirs(output_dir, exist_ok=True)

# List of YouTube video URLs to download
video_urls = [
    "https://youtu.be/iqji2dEX7No?si=o9Ga2uGePRJrFRwX",  # Sample Video 1
    "https://youtu.be/FYEGvuVWW0o?si=ty4187Vpb94ghhJt",  # Sample Video 2
    "https://youtu.be/5WECsbqAQSk?si=coQZ0h30kGirwAOh"
]

# Download command using yt-dlp
download_command = "yt-dlp -f mp4 -o '{}/%(title)s.%(ext)s' ".format(output_dir)

# Download each video
for url in video_urls:
    os.system(download_command + url)

print("Videos downloaded to:", output_dir)


[youtube] Extracting URL: https://youtu.be/iqji2dEX7No?si=o9Ga2uGePRJrFRwX
[youtube] iqji2dEX7No: Downloading webpage
[youtube] iqji2dEX7No: Downloading ios player API JSON
[youtube] iqji2dEX7No: Downloading web creator player API JSON
[youtube] iqji2dEX7No: Downloading m3u8 information
[info] iqji2dEX7No: Downloading 1 format(s): 18
[download] test_videos/Deepfake Example Presented by Senator Richard Blumenthal.mp4 has already been downloaded
[download] 100% of    3.25MiB
[youtube] Extracting URL: https://youtu.be/FYEGvuVWW0o?si=ty4187Vpb94ghhJt
[youtube] FYEGvuVWW0o: Downloading webpage
[youtube] FYEGvuVWW0o: Downloading ios player API JSON
[youtube] FYEGvuVWW0o: Downloading web creator player API JSON
[youtube] FYEGvuVWW0o: Downloading m3u8 information
[info] FYEGvuVWW0o: Downloading 1 format(s): 18
[download] Destination: test_videos/Deepfake Scam Example Featuring Martin Lewis.mp4
[download] 100% of  657.62KiB in 00:00:03 at 187.53KiB/s 
[youtube] Extracting URL: https://youtu.be/

In [47]:
import os
import cv2
import numpy as np
from PIL import Image, ImageChops, ImageEnhance

def resize_frame(frame, scale_factor=0.5):
    height, width = frame.shape[:2]
    new_size = (int(width * scale_factor), int(height * scale_factor))
    resized_frame = cv2.resize(frame, new_size)
    return resized_frame

def apply_ela(image_path, output_path, scale_factor=0.5):
    original = Image.open(image_path)
    original = original.convert("RGB")

    # Resize image
    resized = original.resize((int(original.width * scale_factor), int(original.height * scale_factor)), Image.ANTIALIAS)

    # Save resized image
    resized.save("resized_image.png")

    # Reopen resized image
    resized = Image.open("resized_image.png")

    # Convert to numpy arrays
    original_np = np.array(original)
    resized_np = np.array(resized)

    # Calculate error level
    error_level = ImageChops.difference(Image.fromarray(original_np), Image.fromarray(resized_np))

    # Enhance the error level image
    enhancer = ImageEnhance.Contrast(error_level)
    error_level = enhancer.enhance(2)

    # Save ELA image
    error_level.save(output_path)

def process_videos(video_files, output_folder):
    # Create output directory if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for video_file in video_files:
        cap = cv2.VideoCapture(video_file)

        if not cap.isOpened():
            print(f"Error: Could not open video file {video_file}")
            continue

        frame_number = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Resize frame
            resized_frame = resize_frame(frame)

            # Save resized frame
            frame_filename = f"{output_folder}/frame_{frame_number}.png"
            cv2.imwrite(frame_filename, resized_frame)

            # Apply ELA
            ela_output = f"{output_folder}/ela_frame_{frame_number}.png"
            apply_ela(frame_filename, ela_output)

            frame_number += 1

        cap.release()
        print(f"Processing complete for {video_file}")

# Example usage
video_files = ["test_videos/Deepfake Scam Example Featuring Martin Lewis.mp4", "test_videos/Deepfake Example Presented by Senator Richard Blumenthal.mp4", "test_videos/Supporting Local Retailers This Diwali ｜ Not Just A Cadbury Ad Campaign Video.mp4"]
output_folder = "output_frames"

process_videos(video_files, output_folder)


AttributeError: module 'PIL.Image' has no attribute 'ANTIALIAS'

In [48]:
import os
import cv2
import numpy as np
from PIL import Image, ImageChops, ImageEnhance

def resize_frame(frame, scale_factor=0.5):
    height, width = frame.shape[:2]
    new_size = (int(width * scale_factor), int(height * scale_factor))
    resized_frame = cv2.resize(frame, new_size, interpolation=cv2.INTER_LANCZOS4)
    return resized_frame

def apply_ela(image_path, output_path, scale_factor=0.5):
    original = Image.open(image_path)
    original = original.convert("RGB")

    # Resize image
    resized = original.resize((int(original.width * scale_factor), int(original.height * scale_factor)), Image.Resampling.LANCZOS)

    # Save resized image
    resized.save("resized_image.png")

    # Reopen resized image
    resized = Image.open("resized_image.png")

    # Convert to numpy arrays
    original_np = np.array(original)
    resized_np = np.array(resized)

    # Calculate error level
    error_level = ImageChops.difference(Image.fromarray(original_np), Image.fromarray(resized_np))

    # Enhance the error level image
    enhancer = ImageEnhance.Contrast(error_level)
    error_level = enhancer.enhance(2)

    # Save ELA image
    error_level.save(output_path)

def process_videos(video_files, output_folder):
    # Create output directory if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for video_file in video_files:
        cap = cv2.VideoCapture(video_file)

        if not cap.isOpened():
            print(f"Error: Could not open video file {video_file}")
            continue

        frame_number = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Resize frame
            resized_frame = resize_frame(frame)

            # Save resized frame
            frame_filename = f"{output_folder}/frame_{frame_number}.png"
            cv2.imwrite(frame_filename, resized_frame)

            # Apply ELA
            ela_output = f"{output_folder}/ela_frame_{frame_number}.png"
            apply_ela(frame_filename, ela_output)

            frame_number += 1

        cap.release()
        print(f"Processing complete for {video_file}")

# Example usage
video_files = ["test_videos/Deepfake Scam Example Featuring Martin Lewis.mp4", "test_videos/Deepfake Example Presented by Senator Richard Blumenthal.mp4", "test_videos/Supporting Local Retailers This Diwali ｜ Not Just A Cadbury Ad Campaign Video.mp4"]
output_folder = "output_frames"

process_videos(video_files, output_folder)


Processing complete for test_videos/Deepfake Scam Example Featuring Martin Lewis.mp4
Processing complete for test_videos/Deepfake Example Presented by Senator Richard Blumenthal.mp4
Processing complete for test_videos/Supporting Local Retailers This Diwali ｜ Not Just A Cadbury Ad Campaign Video.mp4


In [49]:
# for different folder per video
import os
import cv2
import numpy as np
from PIL import Image, ImageChops, ImageEnhance

def resize_frame(frame, scale_factor=0.5):
    height, width = frame.shape[:2]
    new_size = (int(width * scale_factor), int(height * scale_factor))
    resized_frame = cv2.resize(frame, new_size, interpolation=cv2.INTER_LANCZOS4)
    return resized_frame

def apply_ela(image_path, output_path, scale_factor=0.5):
    original = Image.open(image_path)
    original = original.convert("RGB")

    # Resize image
    resized = original.resize((int(original.width * scale_factor), int(original.height * scale_factor)), Image.Resampling.LANCZOS)

    # Save resized image
    resized.save("resized_image.png")

    # Reopen resized image
    resized = Image.open("resized_image.png")

    # Convert to numpy arrays
    original_np = np.array(original)
    resized_np = np.array(resized)

    # Calculate error level
    error_level = ImageChops.difference(Image.fromarray(original_np), Image.fromarray(resized_np))

    # Enhance the error level image
    enhancer = ImageEnhance.Contrast(error_level)
    error_level = enhancer.enhance(2)

    # Save ELA image
    error_level.save(output_path)

def process_videos(video_files, output_folder):
    # Create the output base directory if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for video_file in video_files:
        # Create a subfolder for each video
        video_name = os.path.splitext(os.path.basename(video_file))[0]
        video_output_folder = os.path.join(output_folder, video_name)
        if not os.path.exists(video_output_folder):
            os.makedirs(video_output_folder)

        cap = cv2.VideoCapture(video_file)

        if not cap.isOpened():
            print(f"Error: Could not open video file {video_file}")
            continue

        frame_number = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Resize frame
            resized_frame = resize_frame(frame)

            # Save resized frame
            frame_filename = os.path.join(video_output_folder, f"frame_{frame_number}.png")
            cv2.imwrite(frame_filename, resized_frame)

            # Apply ELA
            ela_output = os.path.join(video_output_folder, f"ela_frame_{frame_number}.png")
            apply_ela(frame_filename, ela_output)

            frame_number += 1

        cap.release()
        print(f"Processing complete for {video_file}")

# Example usage
video_files = ["test_videos/Deepfake Scam Example Featuring Martin Lewis.mp4", "test_videos/Deepfake Example Presented by Senator Richard Blumenthal.mp4", "test_videos/Supporting Local Retailers This Diwali ｜ Not Just A Cadbury Ad Campaign Video.mp4"]
output_folder = "output_frames"

process_videos(video_files, output_folder)


Processing complete for test_videos/Deepfake Scam Example Featuring Martin Lewis.mp4
Processing complete for test_videos/Deepfake Example Presented by Senator Richard Blumenthal.mp4
Processing complete for test_videos/Supporting Local Retailers This Diwali ｜ Not Just A Cadbury Ad Campaign Video.mp4


In [50]:
import os
import cv2
import numpy as np
from PIL import Image, ImageChops, ImageEnhance
import matplotlib.pyplot as plt

def resize_frame(frame, scale_factor=0.5):
    height, width = frame.shape[:2]
    new_size = (int(width * scale_factor), int(height * scale_factor))
    resized_frame = cv2.resize(frame, new_size, interpolation=cv2.INTER_LANCZOS4)
    return resized_frame

def apply_ela(image_path, scale_factor=0.5):
    original = Image.open(image_path)
    original = original.convert("RGB")

    # Resize image
    resized = original.resize((int(original.width * scale_factor), int(original.height * scale_factor)), Image.Resampling.LANCZOS)

    # Convert to numpy arrays
    original_np = np.array(original)
    resized_np = np.array(resized)

    # Calculate error level
    error_level = ImageChops.difference(Image.fromarray(original_np), Image.fromarray(resized_np))

    # Convert error level image to numpy array
    error_level_np = np.array(error_level)

    # Calculate the mean error level value
    mean_error = np.mean(error_level_np)

    return mean_error

def process_videos(video_files, output_folder):
    # Create the output base directory if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for video_file in video_files:
        # Create a subfolder for each video
        video_name = os.path.splitext(os.path.basename(video_file))[0]
        video_output_folder = os.path.join(output_folder, video_name)
        if not os.path.exists(video_output_folder):
            os.makedirs(video_output_folder)

        cap = cv2.VideoCapture(video_file)

        if not cap.isOpened():
            print(f"Error: Could not open video file {video_file}")
            continue

        frame_number = 0
        mean_errors = []

        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Resize frame
            resized_frame = resize_frame(frame)

            # Save resized frame
            frame_filename = os.path.join(video_output_folder, f"frame_{frame_number}.png")
            cv2.imwrite(frame_filename, resized_frame)

            # Calculate ELA and get mean error level
            mean_error = apply_ela(frame_filename)
            mean_errors.append(mean_error)

            frame_number += 1

        cap.release()
        print(f"Processing complete for {video_file}")

        # Plotting the mean error levels
        plt.figure(figsize=(10, 5))
        plt.plot(mean_errors, label=f'Error Level - {video_name}')
        plt.xlabel('Frame Number')
        plt.ylabel('Mean Error Level')
        plt.title(f'Mean Error Level Across Frames for {video_name}')
        plt.legend()
        plt.grid(True)
        plt.savefig(os.path.join(output_folder, video_name, 'error_level_plot.png'))
        plt.close()

# Example usage
video_files = ["test_videos/Deepfake Scam Example Featuring Martin Lewis.mp4", "test_videos/Deepfake Example Presented by Senator Richard Blumenthal.mp4", "test_videos/Supporting Local Retailers This Diwali ｜ Not Just A Cadbury Ad Campaign Video.mp4"]
output_folder = "output_frames"

process_videos(video_files, output_folder)


Processing complete for test_videos/Deepfake Scam Example Featuring Martin Lewis.mp4
Processing complete for test_videos/Deepfake Example Presented by Senator Richard Blumenthal.mp4
Processing complete for test_videos/Supporting Local Retailers This Diwali ｜ Not Just A Cadbury Ad Campaign Video.mp4


  plt.savefig(os.path.join(output_folder, video_name, 'error_level_plot.png'))


In [None]:
print(f"Processing complete for {video_files}")

In [52]:
import os
import numpy as np
from PIL import Image

def summarize_error_levels(video_files, output_folder):
    for video_file in video_files:
        video_name = os.path.splitext(os.path.basename(video_file))[0]
        video_output_folder = os.path.join(output_folder, video_name)

        if not os.path.exists(video_output_folder):
            print(f"Output folder for {video_name} does not exist.")
            continue

        frame_files = [f for f in os.listdir(video_output_folder) if f.endswith('.png') and 'ela' in f]

        max_differences = []

        for frame_file in frame_files:
            ela_file = os.path.join(video_output_folder, frame_file)
            error_img = Image.open(ela_file)
            error_np = np.array(error_img)
            max_difference = np.max(error_np)
            max_differences.append(max_difference)

        if not max_differences:
            print(f"No ELA frames found for {video_name}.")
            continue

        max_differences = np.array(max_differences)
        summary = {
            'Video': video_name,
            'Number of Frames': len(max_differences),
            'Maximum Error Level': np.max(max_differences),
            'Minimum Error Level': np.min(max_differences),
            'Average Error Level': np.mean(max_differences),
            'Standard Deviation': np.std(max_differences)
        }

        print(f"Summary for {video_name}:")
        print(f"  Number of Frames: {summary['Number of Frames']}")
        print(f"  Maximum Error Level: {summary['Maximum Error Level']}")
        print(f"  Minimum Error Level: {summary['Minimum Error Level']}")
        print(f"  Average Error Level: {summary['Average Error Level']:.2f}")
        print(f"  Standard Deviation: {summary['Standard Deviation']:.2f}")
        print()

# Example usage
summarize_error_levels(video_files, output_folder)


No ELA frames found for Deepfake Scam Example Featuring Martin Lewis.
No ELA frames found for Deepfake Example Presented by Senator Richard Blumenthal.
No ELA frames found for Supporting Local Retailers This Diwali ｜ Not Just A Cadbury Ad Campaign Video.


In [44]:
# import cv2
# from PIL import Image, ImageChops, ImageEnhance
# import os
# 
# # Function to resize the image according to CNN's input layer
# def resize_image(image, target_size=(224, 224)):
#     return image.resize(target_size, Image.LANCZOS)
# 
# # Function to perform ELA
# def perform_ela(original_image, scale=10):
#     # Save the image in a lossless format
#     temp_filename = "temp_ela.png"
#     original_image.save(temp_filename, "PNG")
# 
#     # Reload the saved image
#     compressed_image = Image.open(temp_filename)
# 
#     # Find the difference between the original and compressed images
#     ela_image = ImageChops.difference(original_image, compressed_image)
# 
#     # Debugging: Save the difference image
#     ela_debug_filename = "ela_debug.png"
#     ela_image.save(ela_debug_filename)
# 
#     # Debugging information
#     extrema = ela_image.getextrema()
#     max_diff = max([ex[1] for ex in extrema])
#     print(f"Max difference: {max_diff}")
# 
#     if max_diff == 0:
#         print("No difference detected.")
#         return None
# 
#     # Adjust scale for better visualization
#     scale = scale / max_diff
#     print(f"Scaling factor: {scale}")
# 
#     # Enhance contrast and scale the image
#     ela_image = ImageEnhance.Contrast(ela_image).enhance(scale)
# 
#     # Convert to grayscale for better visualization
#     ela_image = ela_image.convert("L")
# 
#     return ela_image
# 
# # Function to process videos from the dataset
# def process_video(video_path, output_dir, frame_skip=30):
#     video_capture = cv2.VideoCapture(video_path)
#     frame_count = 0
#     saved_frame_count = 0
# 
#     while True:
#         ret, frame = video_capture.read()
#         if not ret:
#             break
# 
#         # Skip frames to reduce processing load
#         if frame_count % frame_skip == 0:
#             # Convert frame to PIL image
#             pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
# 
#             # Resize the image
#             resized_image = resize_image(pil_frame)
# 
#             # Perform ELA
#             ela_image = perform_ela(resized_image)
# 
#             if ela_image is not None:
#                 # Save the ELA image
#                 ela_filename = os.path.join(output_dir, f"ela_frame_{saved_frame_count:04d}.png")
#                 ela_image.save(ela_filename)
#                 saved_frame_count += 1
# 
#         frame_count += 1
# 
#     video_capture.release()
# 
# # Function to process all videos in a directory
# def process_dataset_videos(dataset_dir, output_base_dir, frame_skip=30):
#     for root, dirs, files in os.walk(dataset_dir):
#         for file in files:
#             if file.endswith('.mp4'):
#                 video_path = os.path.join(root, file)
# 
#                 # Create an output directory for the video
#                 output_dir = os.path.join(output_base_dir, os.path.splitext(file)[0])
#                 os.makedirs(output_dir, exist_ok=True)
# 
#                 # Process the video
#                 print(f"Processing video: {video_path}")
#                 process_video(video_path, output_dir, frame_skip)
# 
# # Replace with the path to your downloaded videos
# dataset_dir = "test_videos"
# # Replace with the directory where you want to save ELA frames
# output_base_dir = "ela_results"
# 
# # Process all videos in the dataset
# process_dataset_videos(dataset_dir, output_base_dir, frame_skip=30)


Processing video: test_videos/Deepfake Scam Example Featuring Martin Lewis.mp4
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detected.
Max difference: 0
No difference detecte

In [39]:
# import cv2
# from PIL import Image, ImageChops, ImageEnhance
# import os
# 
# # Function to resize the image according to CNN's input layer
# def resize_image(image, target_size=(224, 224)):
#     return image.resize(target_size, Image.LANCZOS)
# 
# # Function to save and reload the image to perform ELA
# def perform_ela(image, scale=10):
#     # Save the image at a lower quality
#     temp_filename = "temp_ela.jpg"
#     image.save(temp_filename, "JPEG", quality=90)
# 
#     # Reload the saved image
#     compressed_image = Image.open(temp_filename)
# 
#     # Find the difference between the original and compressed images
#     ela_image = ImageChops.difference(image, compressed_image)
# 
#     # Enhance the differences for visualization
#     extrema = ela_image.getextrema()
#     max_diff = max([ex[1] for ex in extrema])
#     scale = scale / max_diff if max_diff > 0 else 1
# 
#     ela_image = ImageEnhance.Brightness(ela_image).enhance(scale)
# 
#     return ela_image
# 
# # Function to process videos from the dataset
# def process_video(video_path, output_dir, frame_skip=30):
#     video_capture = cv2.VideoCapture(video_path)
#     frame_count = 0
#     saved_frame_count = 0
# 
#     while True:
#         ret, frame = video_capture.read()
#         if not ret:
#             break
# 
#         # Skip frames to reduce processing load
#         if frame_count % frame_skip == 0:
#             # Convert frame to PIL image
#             pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
# 
#             # Resize the image
#             resized_image = resize_image(pil_frame)
# 
#             # Perform ELA
#             ela_image = perform_ela(resized_image)
# 
#             # Save the ELA image
#             ela_filename = os.path.join(output_dir, f"ela_frame_{saved_frame_count:04d}.png")
#             ela_image.save(ela_filename)
# 
#             saved_frame_count += 1
# 
#         frame_count += 1
# 
#     video_capture.release()
# 
# # Function to process all videos in a directory
# def process_dataset_videos(dataset_dir, output_base_dir, frame_skip=30):
#     for root, dirs, files in os.walk(dataset_dir):
#         for file in files:
#             if file.endswith('.mp4'):
#                 video_path = os.path.join(root, file)
# 
#                 # Create an output directory for the video
#                 output_dir = os.path.join(output_base_dir, os.path.splitext(file)[0])
#                 os.makedirs(output_dir, exist_ok=True)
# 
#                 # Process the video
#                 print(f"Processing video: {video_path}")
#                 process_video(video_path, output_dir, frame_skip)
# 
# # Replace with the path to your downloaded videos
# dataset_dir = "test_videos"
# # Replace with the directory where you want to save ELA frames
# output_base_dir = "ela_results"
# 
# # Process all videos in the dataset
# process_dataset_videos(dataset_dir, output_base_dir, frame_skip=30)


Processing video: test_videos/Deepfake Scam Example Featuring Martin Lewis.mp4
Processing video: test_videos/Deepfake Example Presented by Senator Richard Blumenthal.mp4
Processing video: test_videos/Supporting Local Retailers This Diwali ｜ Not Just A Cadbury Ad Campaign Video.mp4


In [41]:
# import cv2
# from PIL import Image, ImageChops, ImageEnhance
# import os
# 
# # Function to resize the image according to CNN's input layer
# def resize_image(image, target_size=(224, 224)):
#     return image.resize(target_size, Image.LANCZOS)
# 
# # Function to save and reload the image to perform ELA
# def perform_ela(image, scale=10):
#     # Save the image at a lower quality
#     temp_filename = "temp_ela.jpg"
#     image.save(temp_filename, "JPEG", quality=90)
# 
#     # Reload the saved image
#     compressed_image = Image.open(temp_filename)
# 
#     # Find the difference between the original and compressed images
#     ela_image = ImageChops.difference(image, compressed_image)
# 
#     # Enhance the differences for visualization
#     extrema = ela_image.getextrema()
#     max_diff = max([ex[1] for ex in extrema])
#     scale = scale / max_diff if max_diff > 0 else 1
# 
#     # Increase contrast to make differences more visible
#     ela_image = ImageEnhance.Contrast(ela_image).enhance(scale)
#     # Convert to grayscale for better visualization
#     ela_image = ela_image.convert("L")
# 
#     return ela_image
# 
# # Function to process videos from the dataset
# def process_video(video_path, output_dir, frame_skip=30):
#     video_capture = cv2.VideoCapture(video_path)
#     frame_count = 0
#     saved_frame_count = 0
# 
#     while True:
#         ret, frame = video_capture.read()
#         if not ret:
#             break
# 
#         # Skip frames to reduce processing load
#         if frame_count % frame_skip == 0:
#             # Convert frame to PIL image
#             pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
# 
#             # Resize the image
#             resized_image = resize_image(pil_frame)
# 
#             # Perform ELA
#             ela_image = perform_ela(resized_image)
# 
#             # Save the ELA image
#             ela_filename = os.path.join(output_dir, f"ela_frame_{saved_frame_count:04d}.png")
#             ela_image.save(ela_filename)
# 
#             saved_frame_count += 1
# 
#         frame_count += 1
# 
#     video_capture.release()
# 
# # Function to process all videos in a directory
# def process_dataset_videos(dataset_dir, output_base_dir, frame_skip=30):
#     for root, dirs, files in os.walk(dataset_dir):
#         for file in files:
#             if file.endswith('.mp4'):
#                 video_path = os.path.join(root, file)
# 
#                 # Create an output directory for the video
#                 output_dir = os.path.join(output_base_dir, os.path.splitext(file)[0])
#                 os.makedirs(output_dir, exist_ok=True)
# 
#                 # Process the video
#                 print(f"Processing video: {video_path}")
#                 process_video(video_path, output_dir, frame_skip)
# 
# # Replace with the path to your downloaded videos
# dataset_dir = "test_videos"
# # Replace with the directory where you want to save ELA frames
# output_base_dir = "ela_results"
# 
# # Process all videos in the dataset
# process_dataset_videos(dataset_dir, output_base_dir, frame_skip=30)


Processing video: test_videos/Deepfake Scam Example Featuring Martin Lewis.mp4
Processing video: test_videos/Deepfake Example Presented by Senator Richard Blumenthal.mp4
Processing video: test_videos/Supporting Local Retailers This Diwali ｜ Not Just A Cadbury Ad Campaign Video.mp4


In [42]:
# import cv2
# from PIL import Image, ImageChops, ImageEnhance
# import os
# 
# # Function to resize the image according to CNN's input layer
# def resize_image(image, target_size=(224, 224)):
#     return image.resize(target_size, Image.LANCZOS)
# 
# # Function to save and reload the image to perform ELA
# def perform_ela(image, scale=10):
#     # Save the image in a lossless format to avoid compression artifacts
#     temp_filename = "temp_ela.png"
#     image.save(temp_filename, "PNG")
# 
#     # Reload the saved image
#     compressed_image = Image.open(temp_filename)
# 
#     # Find the difference between the original and compressed images
#     ela_image = ImageChops.difference(image, compressed_image)
# 
#     # Debugging information
#     extrema = ela_image.getextrema()
#     max_diff = max([ex[1] for ex in extrema])
#     print(f"Max difference: {max_diff}")
# 
#     # Adjust scale for better visualization
#     scale = scale / max_diff if max_diff > 0 else 1
#     print(f"Scaling factor: {scale}")
# 
#     # Enhance contrast and scale the image
#     ela_image = ImageEnhance.Contrast(ela_image).enhance(scale)
# 
#     # Convert to grayscale for better visualization
#     ela_image = ela_image.convert("L")
# 
#     return ela_image
# 
# # Function to process videos from the dataset
# def process_video(video_path, output_dir, frame_skip=30):
#     video_capture = cv2.VideoCapture(video_path)
#     frame_count = 0
#     saved_frame_count = 0
# 
#     while True:
#         ret, frame = video_capture.read()
#         if not ret:
#             break
# 
#         # Skip frames to reduce processing load
#         if frame_count % frame_skip == 0:
#             # Convert frame to PIL image
#             pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
# 
#             # Resize the image
#             resized_image = resize_image(pil_frame)
# 
#             # Perform ELA
#             ela_image = perform_ela(resized_image)
# 
#             # Save the ELA image
#             ela_filename = os.path.join(output_dir, f"ela_frame_{saved_frame_count:04d}.png")
#             ela_image.save(ela_filename)
# 
#             saved_frame_count += 1
# 
#         frame_count += 1
# 
#     video_capture.release()
# 
# # Function to process all videos in a directory
# def process_dataset_videos(dataset_dir, output_base_dir, frame_skip=30):
#     for root, dirs, files in os.walk(dataset_dir):
#         for file in files:
#             if file.endswith('.mp4'):
#                 video_path = os.path.join(root, file)
# 
#                 # Create an output directory for the video
#                 output_dir = os.path.join(output_base_dir, os.path.splitext(file)[0])
#                 os.makedirs(output_dir, exist_ok=True)
# 
#                 # Process the video
#                 print(f"Processing video: {video_path}")
#                 process_video(video_path, output_dir, frame_skip)
# 
# # Replace with the path to your downloaded videos
# dataset_dir = "test_videos"
# # Replace with the directory where you want to save ELA frames
# output_base_dir = "ela_results"
# 
# # Process all videos in the dataset
# process_dataset_videos(dataset_dir, output_base_dir, frame_skip=30)


Processing video: test_videos/Deepfake Scam Example Featuring Martin Lewis.mp4
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Sca

In [43]:
# import cv2
# from PIL import Image, ImageChops, ImageEnhance
# import os
# 
# # Function to resize the image according to CNN's input layer
# def resize_image(image, target_size=(224, 224)):
#     return image.resize(target_size, Image.LANCZOS)
# 
# # Function to save and reload the image to perform ELA
# def perform_ela(image, scale=10):
#     # Save the image in a lossless format to avoid compression artifacts
#     temp_filename = "temp_ela.png"
#     image.save(temp_filename, "PNG")
# 
#     # Reload the saved image
#     compressed_image = Image.open(temp_filename)
# 
#     # Find the difference between the original and compressed images
#     ela_image = ImageChops.difference(image, compressed_image)
# 
#     # Debugging: Save the difference image
#     ela_debug_filename = "ela_debug.png"
#     ela_image.save(ela_debug_filename)
# 
#     # Debugging information
#     extrema = ela_image.getextrema()
#     max_diff = max([ex[1] for ex in extrema])
#     print(f"Max difference: {max_diff}")
# 
#     # Adjust scale for better visualization
#     scale = scale / max_diff if max_diff > 0 else 1
#     print(f"Scaling factor: {scale}")
# 
#     # Enhance contrast and scale the image
#     ela_image = ImageEnhance.Contrast(ela_image).enhance(scale)
# 
#     # Convert to grayscale for better visualization
#     ela_image = ela_image.convert("L")
# 
#     return ela_image
# 
# # Function to process videos from the dataset
# def process_video(video_path, output_dir, frame_skip=30):
#     video_capture = cv2.VideoCapture(video_path)
#     frame_count = 0
#     saved_frame_count = 0
# 
#     while True:
#         ret, frame = video_capture.read()
#         if not ret:
#             break
# 
#         # Skip frames to reduce processing load
#         if frame_count % frame_skip == 0:
#             # Convert frame to PIL image
#             pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
# 
#             # Resize the image
#             resized_image = resize_image(pil_frame)
# 
#             # Perform ELA
#             ela_image = perform_ela(resized_image)
# 
#             # Save the ELA image
#             ela_filename = os.path.join(output_dir, f"ela_frame_{saved_frame_count:04d}.png")
#             ela_image.save(ela_filename)
# 
#             saved_frame_count += 1
# 
#         frame_count += 1
# 
#     video_capture.release()
# 
# # Function to process all videos in a directory
# def process_dataset_videos(dataset_dir, output_base_dir, frame_skip=30):
#     for root, dirs, files in os.walk(dataset_dir):
#         for file in files:
#             if file.endswith('.mp4'):
#                 video_path = os.path.join(root, file)
# 
#                 # Create an output directory for the video
#                 output_dir = os.path.join(output_base_dir, os.path.splitext(file)[0])
#                 os.makedirs(output_dir, exist_ok=True)
# 
#                 # Process the video
#                 print(f"Processing video: {video_path}")
#                 process_video(video_path, output_dir, frame_skip)
# 
# # Replace with the path to your downloaded videos
# dataset_dir = "test_videos"
# # Replace with the directory where you want to save ELA frames
# output_base_dir = "ela_results"
# 
# # Process all videos in the dataset
# process_dataset_videos(dataset_dir, output_base_dir, frame_skip=30)


Processing video: test_videos/Deepfake Scam Example Featuring Martin Lewis.mp4
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Scaling factor: 1
Max difference: 0
Sca