# Import drive

In [8]:
from google.colab import drive
drive.mount('/content/drive')
BASE = "/content/drive/MyDrive/ffpp_data"


Mounted at /content/drive


# Environnement

In [9]:
import os, sys, textwrap
BASE = globals().get("BASE", "/content/ffpp_data")
os.makedirs(BASE, exist_ok=True)
!python --version
!pip -q install tqdm


Python 3.12.12


# Script de téléchargement

In [10]:
# (1) Dépendance utile pour la barre de progression
!pip -q install tqdm

# (2) Écrire le script dans /content/download_ffpp.py
script = """#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Downloads FaceForensics++ and Deep Fake Detection public data release
# Example usage:
#   see -h or https://github.com/ondyari/FaceForensics

import argparse
import os
import urllib
import urllib.request
import tempfile
import time
import sys
import json
import random
from tqdm import tqdm
from os.path import join

# URLs and filenames
FILELIST_URL = 'misc/filelist.json'
DEEPFEAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'
DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5',]

# Parameters
DATASETS = {
    'original_youtube_videos': 'misc/downloaded_youtube_videos.zip',
    'original_youtube_videos_info': 'misc/downloaded_youtube_videos_info.zip',
    'original': 'original_sequences/youtube',
    'DeepFakeDetection_original': 'original_sequences/actors',
    'Deepfakes': 'manipulated_sequences/Deepfakes',
    'DeepFakeDetection': 'manipulated_sequences/DeepFakeDetection',
    'Face2Face': 'manipulated_sequences/Face2Face',
    'FaceShifter': 'manipulated_sequences/FaceShifter',
    'FaceSwap': 'manipulated_sequences/FaceSwap',
    'NeuralTextures': 'manipulated_sequences/NeuralTextures'
}
ALL_DATASETS = ['original', 'DeepFakeDetection_original', 'Deepfakes',
                'DeepFakeDetection', 'Face2Face', 'FaceShifter', 'FaceSwap',
                'NeuralTextures']
COMPRESSION = ['raw', 'c23', 'c40']
TYPE = ['videos', 'masks', 'models']
SERVERS = ['EU', 'EU2', 'CA']

def parse_args():
    parser = argparse.ArgumentParser(
        description='Downloads FaceForensics v2 public data release.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument('output_path', type=str, help='Output directory.')
    parser.add_argument('-d', '--dataset', type=str, default='all',
                        choices=list(DATASETS.keys()) + ['all'],
                        help='Which dataset to download.')
    parser.add_argument('-c', '--compression', type=str, default='raw',
                        choices=COMPRESSION,
                        help='Compression (raw/c23/c40).')
    parser.add_argument('-t', '--type', type=str, default='videos',
                        choices=TYPE,
                        help='File type to download (videos/masks/models).')
    parser.add_argument('-n', '--num_videos', type=int, default=None,
                        help='Limit to first N videos.')
    parser.add_argument('--server', type=str, default='EU',
                        choices=SERVERS,
                        help='Mirror server.')
    args = parser.parse_args()

    # URLs
    server = args.server
    if server == 'EU':
        server_url = 'http://canis.vc.in.tum.de:8100/'
    elif server == 'EU2':
        server_url = 'http://kaldir.vc.in.tum.de/faceforensics/'
    elif server == 'CA':
        server_url = 'http://falas.cmpt.sfu.ca:8100/'
    else:
        raise Exception('Wrong server name. Choices: {}'.format(str(SERVERS)))
    args.tos_url = server_url + 'webpage/FaceForensics_TOS.pdf'
    args.base_url = server_url + 'v3/'
    args.deepfakes_model_url = args.base_url + 'manipulated_sequences/Deepfakes/models/'
    return args

def download_files(filenames, base_url, output_path, report_progress=True):
    os.makedirs(output_path, exist_ok=True)
    itr = tqdm(filenames) if report_progress else filenames
    for filename in itr:
        download_file(base_url + filename, join(output_path, filename))

def reporthook(count, block_size, total_size):
    global start_time
    if count == 0:
        start_time = time.time()
        return
    duration = time.time() - start_time
    progress_size = int(count * block_size)
    speed = int(progress_size / (1024 * duration)) if duration > 0 else 0
    percent = int(count * block_size * 100 / total_size) if total_size else 0
    sys.stdout.write("\\rProgress: %d%%, %d MB, %d KB/s, %d seconds passed"
                     % (percent, progress_size // (1024*1024), speed, duration))
    sys.stdout.flush()

def download_file(url, out_file, report_progress=False):
    out_dir = os.path.dirname(out_file)
    os.makedirs(out_dir, exist_ok=True)
    if not os.path.isfile(out_file):
        fh, out_file_tmp = tempfile.mkstemp(dir=out_dir)
        os.close(fh)
        if report_progress:
            urllib.request.urlretrieve(url, out_file_tmp, reporthook=reporthook)
        else:
            urllib.request.urlretrieve(url, out_file_tmp)
        os.rename(out_file_tmp, out_file)
    else:
        tqdm.write('WARNING: skipping download of existing file ' + out_file)

def main(args):
    print('By pressing Enter you confirm that you have agreed to the FaceForensics terms of use at:')
    print(args.tos_url)
    print('***')
    print('Press Enter to continue, or CTRL-C to exit.')
    _ = input('')

    c_datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS
    c_type = args.type
    c_compression = args.compression
    num_videos = args.num_videos
    output_path = args.output_path
    os.makedirs(output_path, exist_ok=True)

    for dataset in c_datasets:
        dataset_path = DATASETS[dataset]

        if 'original_youtube_videos' in dataset:
            print('Downloading original youtube videos.')
            suffix = '' if 'info' not in dataset_path else 'info'
            download_file(args.base_url + '/' + dataset_path,
                          out_file=join(output_path, 'downloaded_videos%s.zip' % suffix),
                          report_progress=True)
            return

        print('Downloading {} of dataset "{}"'.format(c_type, dataset_path))

        # Build file list
        if 'DeepFakeDetection' in dataset_path or 'actors' in dataset_path:
            filepaths = json.loads(urllib.request.urlopen(args.base_url + '/' + DEEPFEAKES_DETECTION_URL).read().decode('utf-8'))
            filelist = filepaths['actors'] if 'actors' in dataset_path else filepaths['DeepFakesDetection']
        else:
            file_pairs = json.loads(urllib.request.urlopen(args.base_url + '/' + FILELIST_URL).read().decode('utf-8'))
            if 'original' in dataset_path:
                filelist = []
                for pair in file_pairs:
                    filelist += pair
            else:
                filelist = []
                for pair in file_pairs:
                    filelist.append('_'.join(pair))
                    if c_type != 'models':
                        filelist.append('_'.join(pair[::-1]))

        if num_videos is not None and num_videos > 0:
            print('Downloading the first {} videos'.format(num_videos))
            filelist = filelist[:num_videos]

        dataset_videos_url = args.base_url + '{}/{}/{}/'.format(dataset_path, c_compression, c_type)
        dataset_mask_url = args.base_url + '{}/{}/videos/'.format(dataset_path, 'masks')

        if c_type == 'videos':
            dataset_output_path = join(output_path, dataset_path, c_compression, c_type)
            print('Output path: {}'.format(dataset_output_path))
            filelist = [f + '.mp4' for f in filelist]
            download_files(filelist, dataset_videos_url, dataset_output_path)
        elif c_type == 'masks':
            if 'original' in dataset:
                print('Only videos available for original data. Skipping original masks.')
                continue
            if 'FaceShifter' in dataset:
                print('Masks not available for FaceShifter. Aborting.')
                return
            dataset_output_path = join(output_path, dataset_path, c_type, 'videos')
            print('Output path: {}'.format(dataset_output_path))
            filelist = [f + '.mp4' for f in filelist]
            download_files(filelist, dataset_mask_url, dataset_output_path)
        else:
            if dataset != 'Deepfakes' and c_type == 'models':
                print('Models only available for Deepfakes. Aborting.')
                return
            dataset_output_path = join(output_path, dataset_path, c_type)
            print('Output path: {}'.format(dataset_output_path))
            for folder in tqdm(filelist):
                folder_filelist = DEEPFAKES_MODEL_NAMES
                folder_base_url = args.deepfakes_model_url + folder + '/'
                folder_dataset_output_path = join(dataset_output_path, folder)
                download_files(folder_filelist, folder_base_url, folder_dataset_output_path, report_progress=False)

if __name__ == '__main__':
    args = parse_args()
    main(args)
"""

with open("/content/download_ffpp.py", "w", encoding="utf-8") as f:
    f.write(script)

!chmod +x /content/download_ffpp.py


3 vidéos originales

In [12]:
!python /content/download_ffpp.py "$BASE" -d original -c c23 -t videos -n 3 --server EU2


By pressing Enter you confirm that you have agreed to the FaceForensics terms of use at:
http://kaldir.vc.in.tum.de/faceforensics/webpage/FaceForensics_TOS.pdf
***
Press Enter to continue, or CTRL-C to exit.

Downloading videos of dataset "original_sequences/youtube"
Downloading the first 3 videos
Output path: /content/drive/MyDrive/ffpp_data/original_sequences/youtube/c23/videos
100% 3/3 [00:03<00:00,  1.14s/it]


2 vidéos actors

In [7]:
!python /content/download_ffpp.py "$BASE" -d DeepFakeDetection_original -c c23 -t videos -n 2


# Vérifier ou les fichiers ont été écrit

In [13]:
import subprocess, shlex, os
for p in [
  f"{BASE}/original_sequences/youtube/c23/videos",
  f"{BASE}/manipulated_sequences/FaceSwap/c23/videos",
  f"{BASE}/original_sequences/actors/c23/videos",
]:
    if os.path.isdir(p):
        print("==>", p)
        print(subprocess.getoutput(shlex.split(f"ls -lh {p} | head -n 10")))


==> /content/drive/MyDrive/ffpp_data/original_sequences/youtube/c23/videos
download_ffpp.py
drive
ffpp_data
sample_data
