In [1]:
!pip install -r requirements.txt

Collecting absl-py==0.4.0
  Downloading absl-py-0.4.0.tar.gz (88 kB)
     |████████████████████████████████| 88 kB 4.1 MB/s             
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting astor==0.7.1
  Downloading astor-0.7.1-py2.py3-none-any.whl (27 kB)
Collecting certifi==2018.11.29
  Downloading certifi-2018.11.29-py2.py3-none-any.whl (154 kB)
     |████████████████████████████████| 154 kB 22.7 MB/s            
[?25hCollecting cffi==1.12.1
  Downloading cffi-1.12.1-cp36-cp36m-manylinux1_x86_64.whl (428 kB)
     |████████████████████████████████| 428 kB 59.8 MB/s            
[?25hCollecting cmake==3.12.0
  Downloading cmake-3.12.0-cp36-cp36m-manylinux1_x86_64.whl (17.7 MB)
     |████████████████████████████████| 17.7 MB 99.2 MB/s            
[?25hCollecting dlib==19.15.0
  Downloading dlib-19.15.0.tar.gz (3.3 MB)
     |████████████████████████████████| 3.3 MB 86.9 MB/s            
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting face-recogn

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



     |████████████████████████████████| 582.5 MB 16 kB/s               
[?25hCollecting torchvision==0.2.1
  Downloading torchvision-0.2.1-py2.py3-none-any.whl (54 kB)
     |████████████████████████████████| 54 kB 4.1 MB/s             
[?25hCollecting tqdm==4.25.0
  Downloading tqdm-4.25.0-py2.py3-none-any.whl (43 kB)
     |████████████████████████████████| 43 kB 384 kB/s             
[?25hCollecting torchgeometry==0.1.2
  Downloading torchgeometry-0.1.2-py2.py3-none-any.whl (42 kB)
     |████████████████████████████████| 42 kB 112 kB/s              
Collecting keras-preprocessing==1.0.1
  Downloading Keras_Preprocessing-1.0.1-py2.py3-none-any.whl (26 kB)
Collecting keras-applications==1.0.2
  Downloading Keras_Applications-1.0.2-py2.py3-none-any.whl (43 kB)
     |████████████████████████████████| 43 kB 139 kB/s              
Building wheels for collected packages: absl-py, dlib, face-recognition-models, ffmpy, gast, munch, nvidia-ml-py3, pathlib, pretrainedmodels, pycparser, scandi

In [None]:
!pip list

In [4]:
"""
Create adversarial videos that can fool xceptionnet.

Usage:
python attack.py
    -i <folder with video files or path to video file>
    -m <path to model file>
    -o <path to output folder, will write one or multiple output videos there>

built upon the code by Andreas Rössler for detecting deep fakes.
"""

import sys, os
import argparse
from os.path import join
import cv2
import dlib
import torch
import torch.nn as nn
from PIL import Image as pil_image
from tqdm import tqdm

from network.models import model_selection
from dataset.transform import xception_default_data_transforms, mesonet_default_data_transforms
from torch import autograd
import numpy
from torchvision import transforms
import attack_algos
import json

import warnings
warnings.filterwarnings("ignore")


In [3]:
def get_boundingbox(face, width, height, scale=1.3, minsize=None):
    """
    Expects a dlib face to generate a quadratic bounding box.
    :param face: dlib face class
    :param width: frame width
    :param height: frame height
    :param scale: bounding box size multiplier to get a bigger face region
    :param minsize: set minimum bounding box size
    :return: x, y, bounding_box_size in opencv form
    """
    x1 = face.left()
    y1 = face.top()
    x2 = face.right()
    y2 = face.bottom()
    size_bb = int(max(x2 - x1, y2 - y1) * scale)
    if minsize:
        if size_bb < minsize:
            size_bb = minsize
    center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2

    # Check for out of bounds, x-y top left corner
    x1 = max(int(center_x - size_bb // 2), 0)
    y1 = max(int(center_y - size_bb // 2), 0)
    # Check for too big bb size for given x, y
    size_bb = min(width - x1, size_bb)
    size_bb = min(height - y1, size_bb)

    return x1, y1, size_bb

In [4]:
# Preprocessamento

def preprocess_image(image, model_type, cuda=True, legacy = False):
    """
    Preprocesses the image such that it can be fed into our network.
    During this process we envoke PIL to cast it into a PIL image.

    :param image: numpy image in opencv form (i.e., BGR and of shape
    :return: pytorch tensor of shape [1, 3, image_size, image_size], not
    necessarily casted to cuda
    """
    # Revert from BGR
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # Preprocess using the preprocessing function used during training and
    # casting it to PIL image
    if not legacy:
        # only conver to tensor here, 
        # other transforms -> resize, normalize differentiable done in predict_from_model func
        # same for meso, xception
        preprocess = xception_default_data_transforms['to_tensor']
    else:
        if model_type == "xception":
            preprocess = xception_default_data_transforms['test']
        elif model_type == "meso":
            preprocess = mesonet_default_data_transforms['test']

    preprocessed_image = preprocess(pil_image.fromarray(image))
    
    # Add first dimension as the network expects a batch
    preprocessed_image = preprocessed_image.unsqueeze(0)
    if cuda:
        preprocessed_image = preprocessed_image.cuda()

    preprocessed_image.requires_grad = True
    return preprocessed_image


In [5]:
def un_preprocess_image(image, size):
    """
    Tensor to PIL image and RGB to BGR
    """
    
    image.detach()
    new_image = image.squeeze(0)
    new_image = new_image.detach().cpu()

    undo_transform = transforms.Compose([
        transforms.ToPILImage(),
    ])

    new_image = undo_transform(new_image)
    new_image = numpy.array(new_image)

    new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR)

    return new_image
    


In [6]:
def predict_with_model_legacy(image, model, model_type, post_function=nn.Softmax(dim=1),
                       cuda=True):
    """
    Predicts the label of an input image. Preprocesses the input image and
    casts it to cuda if required

    :param image: numpy image
    :param model: torch model with linear layer at the end
    :param post_function: e.g., softmax
    :param cuda: enables cuda, must be the same parameter as the model
    :return: prediction (1 = fake, 0 = real)
    """
    # Preprocess
    preprocessed_image = preprocess_image(image, model_type, cuda, legacy = True)

    # Model prediction
    output = model(preprocessed_image)
    output = post_function(output)

    # Cast to desired
    _, prediction = torch.max(output, 1)    # argmax
    prediction = float(prediction.cpu().numpy())

    return int(prediction), output


In [5]:
def get_boundingbox(face, width, height, scale=1.3, minsize=None):
    """
    Expects a dlib face to generate a quadratic bounding box.
    :param face: dlib face class
    :param width: frame width
    :param height: frame height
    :param scale: bounding box size multiplier to get a bigger face region
    :param minsize: set minimum bounding box size
    :return: x, y, bounding_box_size in opencv form
    """
    x1 = face.left()
    y1 = face.top()
    x2 = face.right()
    y2 = face.bottom()
    size_bb = int(max(x2 - x1, y2 - y1) * scale)
    if minsize:
        if size_bb < minsize:
            size_bb = minsize
    center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2

    # Check for out of bounds, x-y top left corner
    x1 = max(int(center_x - size_bb // 2), 0)
    y1 = max(int(center_y - size_bb // 2), 0)
    # Check for too big bb size for given x, y
    size_bb = min(width - x1, size_bb)
    size_bb = min(height - y1, size_bb)

    return x1, y1, size_bb
# Preprocessamento

def preprocess_image(image, model_type, cuda=True, legacy = False):
    """
    Preprocesses the image such that it can be fed into our network.
    During this process we envoke PIL to cast it into a PIL image.

    :param image: numpy image in opencv form (i.e., BGR and of shape
    :return: pytorch tensor of shape [1, 3, image_size, image_size], not
    necessarily casted to cuda
    """
    # Revert from BGR
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # Preprocess using the preprocessing function used during training and
    # casting it to PIL image
    if not legacy:
        # only conver to tensor here, 
        # other transforms -> resize, normalize differentiable done in predict_from_model func
        # same for meso, xception
        preprocess = xception_default_data_transforms['to_tensor']
    else:
        if model_type == "xception":
            preprocess = xception_default_data_transforms['test']
        elif model_type == "meso":
            preprocess = mesonet_default_data_transforms['test']

    preprocessed_image = preprocess(pil_image.fromarray(image))
    
    # Add first dimension as the network expects a batch
    preprocessed_image = preprocessed_image.unsqueeze(0)
    if cuda:
        preprocessed_image = preprocessed_image.cuda()

    preprocessed_image.requires_grad = True
    return preprocessed_image

def un_preprocess_image(image, size):
    """
    Tensor to PIL image and RGB to BGR
    """
    
    image.detach()
    new_image = image.squeeze(0)
    new_image = new_image.detach().cpu()

    undo_transform = transforms.Compose([
        transforms.ToPILImage(),
    ])

    new_image = undo_transform(new_image)
    new_image = numpy.array(new_image)

    new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR)

    return new_image
    
def predict_with_model_legacy(image, model, model_type, post_function=nn.Softmax(dim=1),
                       cuda=True):
    """
    Predicts the label of an input image. Preprocesses the input image and
    casts it to cuda if required

    :param image: numpy image
    :param model: torch model with linear layer at the end
    :param post_function: e.g., softmax
    :param cuda: enables cuda, must be the same parameter as the model
    :return: prediction (1 = fake, 0 = real)
    """
    # Preprocess
    preprocessed_image = preprocess_image(image, model_type, cuda, legacy = True)

    # Model prediction
    output = model(preprocessed_image)
    output = post_function(output)

    # Cast to desired
    _, prediction = torch.max(output, 1)    # argmax
    prediction = float(prediction.cpu().numpy())

    return int(prediction), output

def create_adversarial_video(video_path, model_path, model_type, output_path,
                            start_frame=0, end_frame=None, attack="iterative_fgsm", 
                            compress = True, cuda=True, showlabel = True):
    """
    Reads a video and evaluates a subset of frames with the a detection network
    that takes in a full frame. Outputs are only given if a face is present
    and the face is highlighted using dlib.
    :param video_path: path to video file
    :param model_path: path to model file (should expect the full sized image)
    :param output_path: path where the output video is stored
    :param start_frame: first frame to evaluate
    :param end_frame: last frame to evaluate
    :param cuda: enable cuda
    :return:
    """
    print('Starting: {}'.format(video_path))

    # Read and write
    reader = cv2.VideoCapture(video_path)

    video_fn = video_path.split('/')[-1].split('.')[0]+'.avi'
    os.makedirs(output_path, exist_ok=True)

    if compress:
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    else:
        fourcc = cv2.VideoWriter_fourcc(*'HFYU') # Chnaged to HFYU because it is lossless

    fps = reader.get(cv2.CAP_PROP_FPS)
    num_frames = int(reader.get(cv2.CAP_PROP_FRAME_COUNT))
    writer = None

    # Face detector
    face_detector = dlib.get_frontal_face_detector()

    # Load model
    if model_path is not None:
        if not cuda:
            model = torch.load(model_path, map_location = "cpu")
        else:
            model = torch.load(model_path)
        print('Model found in {}'.format(model_path))
    else:
        print('No model found, initializing random model.')
    if cuda:
        print("Converting mode to cuda")
        model = model.cuda()
        for param in model.parameters():
            param.requires_grad = True
        print("Converted to cuda")

    # raise Exception()
    # Text variables
    font_face = cv2.FONT_HERSHEY_SIMPLEX
    thickness = 2
    font_scale = 1

    # Frame numbers and length of output video
    frame_num = 0
    print(start_frame,num_frames)
    assert start_frame < num_frames - 1
    end_frame = end_frame if end_frame else num_frames
    pbar = tqdm(total=end_frame-start_frame)

    metrics = {
        'total_fake_frames' : 0,
        'total_real_frames' : 0,
        'total_frames' : 0,
        'percent_fake_frames' : 0,
        'probs_list' : [],
        'attack_meta_data' : [],
    }

    while reader.isOpened():
        _, image = reader.read()
        if image is None:
            break
        frame_num += 1

        if frame_num < start_frame:
            continue
        pbar.update(1)

        # Image size
        height, width = image.shape[:2]

        # Init output writer
        if writer is None:
            writer = cv2.VideoWriter(join(output_path, video_fn), fourcc, fps,
                                     (height, width)[::-1])

            # writer = cv2.VideoWriter(join(output_path, video_fn), 0, 1,
            #                          (height, width)[::-1])

        # 2. Detect with dlib
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        faces = face_detector(gray, 1)
        if len(faces):
            # For now only take biggest face
            face = faces[0]

            # --- Prediction ---------------------------------------------------
            # Face crop with dlib and bounding box scale enlargement
            x, y, size = get_boundingbox(face, width, height)
            cropped_face = image[y:y+size, x:x+size]

            
            processed_image = preprocess_image(cropped_face, model_type, cuda = cuda)
            
            # Attack happening here

            # white-box attacks
            if attack == "iterative_fgsm":
                perturbed_image, attack_meta_data = attack_algos.iterative_fgsm(processed_image, model, model_type, cuda)
            elif attack == "robust":
                perturbed_image, attack_meta_data = attack_algos.robust_fgsm(processed_image, model, model_type, cuda)
            elif attack == "carlini_wagner":
                perturbed_image, attack_meta_data = attack_algos.carlini_wagner_attack(processed_image, model_type, model, cuda)

            # black-box attacks
            elif attack == "black_box":
                perturbed_image, attack_meta_data = attack_algos.black_box_attack(processed_image, model, model_type, 
                    cuda, transform_set={}, desired_acc = 0.999999)
            elif attack == "black_box_robust":
                perturbed_image, attack_meta_data = attack_algos.black_box_attack(processed_image, model, 
                    model_type, cuda, transform_set = {"gauss_blur", "translation", "resize"})
            
            # Undo the processing of xceptionnet, mesonet
            unpreprocessed_image = un_preprocess_image(perturbed_image, size)
            image[y:y+size, x:x+size] = unpreprocessed_image
            

            cropped_face = image[y:y+size, x:x+size]
            processed_image = preprocess_image(cropped_face, model_type, cuda = cuda)
            prediction, output, logits = attack_algos.predict_with_model(processed_image, model, model_type, cuda=cuda)

            print (">>>>Prediction for frame no. {}: {}".format(frame_num ,output))

            prediction, output = predict_with_model_legacy(cropped_face, model, model_type, cuda=cuda)

            print (">>>>Prediction LEGACY for frame no. {}: {}".format(frame_num ,output))

            label = 'fake' if prediction == 1 else 'real'
            if label == 'fake':
                metrics['total_fake_frames'] += 1.
            else:
                metrics['total_real_frames'] += 1.

            metrics['total_frames'] += 1.
            metrics['probs_list'].append(output[0].detach().cpu().numpy().tolist())
            metrics['attack_meta_data'].append(attack_meta_data)

            if showlabel:
                # Text and bb
                # print a bounding box in the generated video
                x = face.left()
                y = face.top()
                w = face.right() - x
                h = face.bottom() - y
                label = 'fake' if prediction == 1 else 'real'
                color = (0, 255, 0) if prediction == 0 else (0, 0, 255)
                output_list = ['{0:.2f}'.format(float(x)) for x in
                               output.detach().cpu().numpy()[0]]

                cv2.putText(image, str(output_list)+'=>'+label, (x, y+h+30),
                            font_face, font_scale,
                            color, thickness, 2)
                # draw box over face
                cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)

        if frame_num >= end_frame:
            break

        writer.write(image)
    pbar.close()

    metrics['percent_fake_frames'] = metrics['total_fake_frames']/metrics['total_frames']

    with open(join(output_path, video_fn.replace(".avi", "_metrics_attack.json")), "w") as f:
        f.write(json.dumps(metrics))
    if writer is not None:
        writer.release()
        print('Finished! Output saved under {}'.format(output_path))
    else:
        print('Input video file was empty')


# Disable
def blockPrint():
    sys.stdout = open(os.devnull, 'w')

# Restore
def enablePrint():
    sys.stdout = sys.__stdout__

if __name__ == '__main__':
#     p = argparse.ArgumentParser(
#         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
#     p.add_argument('--video_path', '-i', type=str)
#     p.add_argument('--model_path', '-mi', type=str, default=None)
#     p.add_argument('--model_type', '-mt', type=str, default="xception")
#     p.add_argument('--output_path', '-o', type=str,
#                    default='.')
#     p.add_argument('--start_frame', type=int, default=0)
#     p.add_argument('--end_frame', type=int, default=None)
#     p.add_argument('--attack', '-a', type=str, default="iterative_fgsm")
#     p.add_argument('--compress', action='store_true')
#     p.add_argument('--cuda', action='store_true')
#     p.add_argument('--showlabel', action='store_true') # add face labels in the generated video

#     args = p.parse_args()

#     video_path = args.video_path

#     video_path = '/root/deepfake-framework/1-AdversarialDeepFakes/videos/source/'

    video_path = '/home/ec2-user/SageMaker/deepfake-framework/Dataset/25/manipulated_sequences/Deepfakes/raw/videos'
    model_path = '/home/ec2-user/SageMaker/deepfake-framework/1-AdversarialDeepFakes/all_raw.p'
    model_type = "xception"
    output_path = '/home/ec2-user/SageMaker/deepfake-framework/Dataset/25/manipulated_sequences/Deepfakes/raw/2-Adv_attacked_videos'
    
    start_frame = 0
    end_frame = None
    
#     compress = 'store_true'
    attack = 'iterative_fgsm' 
    cuda = 'store_true'
#     showlabel = 'store_true'
    
    if video_path.endswith('.mp4') or video_path.endswith('.avi'):
        create_adversarial_video(video_path, model_path, model_type, output_path,
                            start_frame=0, end_frame=None, attack="iterative_fgsm", 
                            compress = False, cuda=True, showlabel = False)
    else:

        videos = os.listdir(video_path)
        pbar_global = tqdm(total=len(videos))
        for video in videos:
            video_path = join(video_path, video)
            print('Video:', video_path)
            blockPrint()
#             create_adversarial_video(**vars(args))
            create_adversarial_video(video_path, model_path, model_type, output_path,
                            start_frame=0, end_frame=None, attack="iterative_fgsm", 
                            compress = True, cuda=True, showlabel = True)
            enablePrint()
            pbar_global.update(1)
        pbar_global.close()

  0%|          | 0/25 [00:00<?, ?it/s]

Video: /home/ec2-user/SageMaker/deepfake-framework/Dataset/25/manipulated_sequences/Deepfakes/raw/videos/183_253.mp4



  0%|          | 0/390 [00:00<?, ?it/s][A
  0%|          | 1/390 [00:00<01:02,  6.20it/s][A
  1%|          | 2/390 [00:00<01:43,  3.73it/s][A
  1%|          | 3/390 [00:01<02:06,  3.05it/s][A
  1%|          | 4/390 [00:01<02:20,  2.75it/s][A
  1%|▏         | 5/390 [00:02<02:30,  2.56it/s][A
  2%|▏         | 6/390 [00:02<02:35,  2.46it/s][A
  2%|▏         | 7/390 [00:02<02:40,  2.39it/s][A
  2%|▏         | 8/390 [00:03<02:42,  2.35it/s][A
  2%|▏         | 9/390 [00:03<02:44,  2.32it/s][A
  3%|▎         | 10/390 [00:04<02:45,  2.30it/s][A
  3%|▎         | 11/390 [00:04<02:46,  2.28it/s][A
  3%|▎         | 12/390 [00:05<02:45,  2.28it/s][A
  3%|▎         | 13/390 [00:05<02:45,  2.27it/s][A
  4%|▎         | 14/390 [00:06<02:45,  2.28it/s][A
  4%|▍         | 15/390 [00:06<02:48,  2.22it/s][A
  4%|▍         | 16/390 [00:07<03:00,  2.07it/s][A
  4%|▍         | 17/390 [00:07<02:55,  2.12it/s][A
  5%|▍         | 18/390 [00:08<02:59,  2.07it/s][A
  5%|▍         | 19/390 [00:0

AssertionError: 

In [None]:
os.listdir()

In [12]:
!cd videos/source

In [16]:
!ls -al 

total 81788
drwxr-xr-x 7 root root     6144 Feb 21 22:32 .
drwxr-xr-x 9 root root     6144 Feb 21 19:44 ..
drwxr-xr-x 2 root root     6144 Feb 21 19:30 .ipynb_checkpoints
-rw-r--r-- 1 root root    36033 Feb 21 22:32 Attack.ipynb
-rw-r--r-- 1 root root     1142 Feb 17 22:30 LICENSE-FaceForensics
-rw-r--r-- 1 root root     7437 Feb 17 22:30 README.md
drwxr-xr-x 2 root root     6144 Feb 17 22:30 __pycache__
-rw-r--r-- 1 root root     5818 Feb 17 22:30 aggregate_stats.py
-rw-r--r-- 1 root root 83555882 Feb 21 22:07 all_raw.p
-rw-r--r-- 1 root root     4861 Feb 17 23:23 attack-env.txt
-rw-r--r-- 1 root root    13087 Feb 17 22:30 attack.py
-rw-r--r-- 1 root root     4861 Feb 18 00:12 attack.yml
-rw-r--r-- 1 root root    13903 Feb 17 22:30 attack_algos.py
-rw-r--r-- 1 root root     4829 Feb 17 22:30 convert_to_mjpeg.py
-rw-r--r-- 1 root root     1959 Feb 17 22:30 create_test_data.py
drwxr-xr-x 3 root root     6144 Feb 17 22:30 dataset
-rw-r--r-- 1 root root    10011 Feb 17 22:30 detect_from_v

In [13]:
!nvidia-smi

Mon Feb 21 22:18:33 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:1E.0 Off |                    0 |
| N/A   31C    P0    27W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [24]:
!ls

In [25]:
!df -H