# Implementation of EfficientNetB5 for the APTOS 2019 competition with Keras

## Dependencies <a id="1"></a>

Special thanks to [qubvel](https://github.com/qubvel/efficientnet) for sharing an amazing wrapper to get the EfficientNet architecture in one line of code!

In [75]:
import os
import sys
# Repository source: https://github.com/qubvel/efficientnet
import tensorflow 
from tensorflow.python.keras.applications.efficientnet import EfficientNetB3
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
from sklearn.model_selection import train_test_split

import glob
import re
from tensorflow.keras.backend import argmax
import numpy as np

In [63]:
from tensorflow.keras.models import Sequential


## Metric (Quadratic Weighted Kappa) <a id="3"></a>

Batch Normalization becomes unstable with small batch sizes (<16) and that is why we use [Group Normalization ](https://arxiv.org/pdf/1803.08494.pdf) layers instead. Big thanks to [Somshubra Majumdar](https://github.com/titu1994) for building an implementation of Group Normalization for Keras.

Keras makes it incredibly easy to replace layers. Just loop through the layers and replace each Batch Normalization layer with a Group Normalization layer.

In [84]:
import os
import sys
import math as m

# Repository source: https://github.com/qubvel/efficientnet
import tensorflow 
from tensorflow.python.keras.applications.efficientnet import EfficientNetB3
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
def get_logits(labels, y_pred):
    s = 64.
    cos_t = y_pred
    sin_m = tf.math.sin(0.5)
    cos_m = tf.math.cos(0.5)
    cos_t2 = tf.square(cos_t, name='cos_2')
    sin_t2 = tf.subtract(1., cos_t2, name='sin_2')
    sin_t = tf.sqrt(sin_t2, name='sin_t')
    cos_mt = s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt')
    threshold = tf.math.cos(tf.constant(m.pi) - 0.5)
    cond_v = cos_t - threshold
    cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool)
    mm = sin_m * 0.5
    keep_val = s*(cos_t - mm)
    cos_mt_temp = tf.where(cond, cos_mt, keep_val)
    mask = tf.one_hot(labels, depth=2, name='one_hot_mask')
    inv_mask = tf.subtract(1., mask, name='inverse_mask')
    s_cos_t = tf.multiply(s, cos_t, name='scalar_cos_t')
    output = tf.add(tf.multiply(s_cos_t, inv_mask), tf.multiply(cos_mt_temp, mask), name='arcface_logits')
    return output
def loss(y_true, y_pred):
    labels = argmax(y_true, axis=-1)
    logits = get_logits(labels, y_pred)
    #tf.keras.losses.SparseCategoricalCrossentropy()
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)
    return loss

In [65]:
effnet = EfficientNetB3(weights = "imagenet", include_top = False, input_shape=(299,299,3), classes = 2)


In [66]:
effnet.compile(loss=loss,
                      optimizer=Adam(lr=0.0005), 
                  metrics=['acc'])
print(effnet.summary())

____________
block6b_project_bn (BatchNormal (None, 10, 10, 232)  928         block6b_project_conv[0][0]       
__________________________________________________________________________________________________
block6b_drop (Dropout)          (None, 10, 10, 232)  0           block6b_project_bn[0][0]         
__________________________________________________________________________________________________
block6b_add (Add)               (None, 10, 10, 232)  0           block6b_drop[0][0]               
                                                                 block6a_project_bn[0][0]         
__________________________________________________________________________________________________
block6c_expand_conv (Conv2D)    (None, 10, 10, 1392) 322944      block6b_add[0][0]                
__________________________________________________________________________________________________
block6c_expand_bn (BatchNormali (None, 10, 10, 1392) 5568        block6c_expand_conv[0][0]      

In [24]:
DATASET_PATHS = {
    'youtube': 'original_sequences/youtube/c23/',
    'actors': 'original_sequences/actors/c23/',
    'Deepfakes': 'manipulated_sequences/Deepfakes/c23/',
    'Face2Face': 'manipulated_sequences/Face2Face/c23/',
    'FaceShifter': 'manipulated_sequences/FaceShifter/c23/',
    'FaceSwap': 'manipulated_sequences/FaceSwap/c23/',
    'deepfakedetection': 'manipulated_sequences/DeepFakeDetection/c23/'
}
data_path = "/Users/asmaaaly/Minerva/Capstone/Minerva_Capstone/Capstone/data/"
output_path = "/Users/asmaaaly/Minerva/Capstone/Minerva_Capstone/Capstone/data/"



In [25]:
data = pd.read_csv("/Users/asmaaaly/Minerva/Minerva_Capstone/1.Preprocess Data/data_csv.csv")

In [26]:
def read_images(path):
    """
    read_images iterates through the preprocessed
    images and adds the labels for each image
    based on the directory of the image
    :param path (str): path for the preprocessed
    image
    :param dataset (str): dataset name
    :return: a Pandas dataframe with 
    image label and .
    """ 
    data = []
    filenames = glob.glob(Processed_datasets[path])
    for image in filenames:
        frame_name = image.split("/")[-1]
        dataset = frame_name.split("_")[0]
        if dataset == "Face2Face":
            video_name = re.findall(r'\d+',image)[1]
        else: 
            video_name = re.findall(r'\d+',image)[0]
        if path == 'Real':
            label = 0
            data.append([video_name, label,frame_name,dataset])
        else: 
            label = 1
            data.append([video_name, label,frame_name,dataset])
    return data

In [27]:
"""Processed_datasets = {
    'Real': '/Volumes/MY PASSPORT/Base_directory/Real/*.jpg',
    'Fake': '/Volumes/MY PASSPORT/Base_directory/Fake/*.jpg'
}"""

"Processed_datasets = {\n    'Real': '/Volumes/MY PASSPORT/Base_directory/Real/*.jpg',\n    'Fake': '/Volumes/MY PASSPORT/Base_directory/Fake/*.jpg'\n}"

In [96]:
training_dataset = {
    'Real': '/Volumes/MY PASSPORT/Base_directory/Real/',
    'Fake': '/Volumes/MY PASSPORT/Base_directory/Fake/'
}


In [29]:
"""data_real = pd.DataFrame(read_images('Real'),columns=["Video_ID","Label","Frame_ID","Dataset"])
data_fake = pd.DataFrame(read_images('Fake'),columns=["Video_ID","Label","Frame_ID","Dataset"])
#Combine both datframes
frames = [data_real, data_fake]
df_data = pd.concat(frames)"""

'data_real = pd.DataFrame(read_images(\'Real\'),columns=["Video_ID","Label","Frame_ID","Dataset"])\ndata_fake = pd.DataFrame(read_images(\'Fake\'),columns=["Video_ID","Label","Frame_ID","Dataset"])\n#Combine both datframes\nframes = [data_real, data_fake]\ndf_data = pd.concat(frames)'

In [20]:
#this is the df that has all of the images, will be used for mapping
df = pd.read_csv("/Users/asmaaaly/Minerva/Minerva_Capstone/1.Preprocess Data/all_data_pd.csv",index_col=0)

In [21]:
df_data = df.groupby('Video_ID')[["Frame_ID"]].agg("count").reset_index()
df_data

Unnamed: 0,Video_ID,Frame_ID
0,000Deepfakes,101
1,000youtube,101
2,001Deepfakes,101
3,001Face2Face,101
4,001FaceShifter,101
...,...,...
3404,998FaceSwap,101
3405,998youtube,102
3406,999Deepfakes,101
3407,999FaceSwap,101


In [31]:
labels = df[["Video_ID","Label"]]

In [32]:
merged_df_video_only = df_data.merge(labels, how = 'inner', on = ['Video_ID']).drop_duplicates(subset=['Video_ID'])


In [33]:
merged_df_video_only = merged_df_video_only[~(merged_df_video_only['Frame_ID'] <= 100)] 

In [34]:
merged_df_video_only

Unnamed: 0,Video_ID,Frame_ID,Label
0,000Deepfakes,101,1
101,000youtube,101,0
202,001Deepfakes,101,1
303,001Face2Face,101,1
404,001FaceShifter,101,1
...,...,...,...
323286,998FaceSwap,101,1
323387,998youtube,102,0
323489,999Deepfakes,101,1
323590,999FaceSwap,101,1


In [36]:
"""df_0 = merged_df_video_only[merged_df_video_only['Label'] == 0].sample(1166, random_state = 101)
df_1 = merged_df_video_only[merged_df_video_only['Label'] == 1].sample(1166, random_state = 101)"""

"df_0 = merged_df_video_only[merged_df_video_only['Label'] == 0].sample(1166, random_state = 101)\ndf_1 = merged_df_video_only[merged_df_video_only['Label'] == 1].sample(1166, random_state = 101)"

In [37]:
""""df_data = pd.concat([df_0, df_1], axis=0).reset_index(drop=True)
# shuffle
#print to see if the data is balanced 
df_data['Label'].value_counts()""""

SyntaxError: EOL while scanning string literal (<ipython-input-37-2a1dd404ed8a>, line 4)

In [54]:
""""df_data = pd.concat([df_0, df_1], axis=0).reset_index(drop=True)
# shuffle
#print to see if the data is balanced 
df_data['Label'].value_counts()""""

"""y = df_data['Label']
df_train, df_test = train_test_split(df_data, test_size=0.20, random_state=101, stratify=y)
"""
"""y_train = df_train['Label']
df_train, df_valid = train_test_split(df_train, test_size=0.25, random_state=101,stratify=y_train)"""
"""df_train["Split Type"] = np.asarray(['Train'] * len(df_train))
df_valid["Split Type"] = np.asarray(['Valid'] * len(df_valid))
df_test["Split Type"] = np.asarray(['Test'] * len(df_test))
Processed_frames = [df_train, df_valid,df_test]
df_data_used = pd.concat(Processed_frames)"""


In [57]:
"""df_data_used.columns = ['Video_ID', 'Number_frames','Label',"Split_type"]
df_data_used"""

Unnamed: 0,Video_ID,Number_frames,Label,Split_type
746,597youtube,101,0,Train
336,569youtube,102,0,Train
2209,267FaceSwap,101,1,Train
176,350youtube,101,0,Train
1024,978youtube,101,0,Train
...,...,...,...,...
2116,103FaceSwap,101,1,Test
1594,200Deepfakes,101,1,Test
960,04__talking_against_wall_actors,101,0,Test
1929,107FaceShifter,101,1,Test


In [58]:
"""#save the datat preprocessed as csv file
data_with_nframes = df_data_used.merge(df, how = 'inner', on = ['Video_ID'])

data_full =data_with_nframes"""

In [29]:
#data_full.to_csv("data_full.csv",index=False)

In [93]:
data_full = pd.read_csv("data_full.csv")

In [59]:
#data_full = data_full.drop("Label_y",axis=1)


In [60]:
"""data_full.columns = ['Video_ID', 'Number_frames','Label',"Split_type","Frame_ID","Dataset"]"""

In [61]:
"""data_with_nframes.to_csv("data_with_nframes.csv",header=False)"""

In [71]:
data_with_nframes = pd.read_csv("/Users/asmaaaly/Minerva/Minerva_Capstone/2.Models/data_with_nframes.csv")

In [70]:
from keras.preprocessing.image import img_to_array, load_img
import numpy as np

def process_image(image, target_shape):
    """Given an image, process it and return the array."""
    # Load the image.
    h, w, _ = target_shape
    print(image)
    image = load_img(image, target_size=(h, w))

    # Turn it into numpy, normalize and return.
    img_arr = img_to_array(image)
    x = (img_arr / 255.).astype(np.float32)

    return x


In [68]:
"""
Class for managing our data.
"""
import csv
import numpy as np
import random
import glob
import os.path
import sys
import operator
import threading
from tensorflow.keras.models import Model, load_model


from keras.utils import to_categorical

class threadsafe_iterator:
    def __init__(self, iterator):
        self.iterator = iterator
        self.lock = threading.Lock()

    def __iter__(self):
        return self

    def __next__(self):
        with self.lock:
            return next(self.iterator)

def threadsafe_generator(func):
    """Decorator"""
    def gen(*a, **kw):
        return threadsafe_iterator(func(*a, **kw))
    return gen

class DataSet():

    def __init__(self, seq_length=100, class_limit=None, image_shape=(224, 224, 3)):
        """Constructor.
        seq_length = (int) the number of frames to consider
        class_limit = (int) number of classes to limit the data to.
            None = no limit.
        """
        self.seq_length = seq_length
        self.class_limit = class_limit
        self.sequence_path = os.path.join('data', 'sequences')
        self.max_frames = 300  # max number of frames a video can have for us to use it

        # Get the data.
        self.data = self.get_data()

        # Get the classes.
        self.classes = self.get_classes()

        # Now do some minor data cleaning.
        #self.data = self.clean_data()

        self.image_shape = image_shape

    @staticmethod
    def get_data():
        """Load our data from file."""
        with open(os.path.join('/Users/asmaaaly/Minerva/Minerva_Capstone/1.Preprocess Data', 'only_videos_with_labels.csv'), 'r') as fin:
            reader = csv.reader(fin)
            data = list(reader)

        return data

    def clean_data(self):
        """Limit samples to greater than the sequence length and fewer
        than N frames. Also limit it to classes we want to use."""
        data_clean = []
        for item in self.data:
            if int(item[1]) >= self.seq_length and int(item[1]) <= self.max_frames \
                    and item[2] in self.classes:
                data_clean.append(item)

        return data_clean

    def get_classes(self):
        """Extract the classes from our data. If we want to limit them,
        only return the classes we need."""
        classes = []
        for item in self.data:
            if item[3] not in classes:
                classes.append(item[3])

        # Sort them.
        classes = sorted(classes)

        # Return.
        if self.class_limit is not None:
            return classes[:self.class_limit]
        else:
            return classes

    def get_class_one_hot(self, class_str):
        """Given a class as a string, return its number in the classes
        list. This lets us encode and one-hot it for training."""
        # Encode it first.
        #label_encoded = self.classes.index(class_str)

        # Now one-hot it.
        #label_hot = to_categorical(class_str, len(self.classes))
        label_hot = float(class_str)
        #assert len(label_hot) == len(self.classes)

        return label_hot

    def split_train_test(self):
        """Split the data into train and test groups."""
        train = []
        test = []
        valid = []
        for item in self.data:
            if item[4] == 'Train':
                train.append(item)
            elif item[4] == 'Valid':
                valid.append(item)
            else:
                test.append(item)
        return train, valid, test

    def get_all_sequences_in_memory(self, train_test, data_type):
        """
        This is a mirror of our generator, but attempts to load everything into
        memory so we can train way faster.
        """
        # Get the right dataset.
        train, valid, test = self.split_train_test()
        if train_test == 'train':
            data = train 
        elif train_test == 'valid':
            data = valid
        else:
            data = test 

        print("Loading %d samples into memory for %sing." % (len(data), train_test))

        X, y = [], []
        for row in data:
            if data_type == 'images':
                frames = self.get_frames_for_sample(row)
                frames = self.rescale_list(frames, self.seq_length)

                # Build the image sequence
                sequence = self.build_image_sequence(frames)

            else:
                sequence = self.get_extracted_sequence(data_type, row)

                if sequence is None:
                    print("Can't find sequence. Did you generate them?")
                    raise

            X.append(sequence)
            y.append(row[3])

        return np.array(X), np.array(y)

    @threadsafe_generator
    def frame_generator(self, batch_size, train_test, data_type):
        """Return a generator that we can use to train on. There are
        a couple different things we can return:

        data_type: 'features', 'images'
        """
        # Get the right dataset for the generator.
        train, valid, test = self.split_train_test()
        if train_test == 'train':
            data = train
        elif train_test == 'valid':
            data = valid
        else:
            data = test
        print("Creating %s generator with %d samples." % (train_test, len(data)))

        while 1:
            X, y = [], []

            # Generate batch_size samples.
            for _ in range(batch_size):
                # Reset to be safe.
                sequence = None
                # Get a random sample.
                sample = random.choice(data)

                # Check to see if we've already saved this sequence.
                if data_type is "images":
                    # Get and resample frames.
                    frames = self.get_frames_for_sample(sample)
                    frames = self.rescale_list(frames, self.seq_length)

                    # Build the image sequence
                    sequence = self.build_image_sequence(frames)
                else:
                    # Get the sequence from disk.
                    sequence = self.get_extracted_sequence(data_type, sample)

                    if sequence is None:
                        raise ValueError("Can't find sequence. Did you generate them?")

                X.append(sequence)
                y.append(self.get_class_one_hot(sample[3]))
            yield np.array(X), np.array(y)
    def build_image_sequence(self, frames):
        """Given a set of frames (filenames), build our sequence."""
        return [process_image(x, self.image_shape) for x in frames]

    def get_extracted_sequence(self, data_type, sample):
        """Get the saved extracted features."""
        filename = sample[1]
        path = os.path.join(data_path, "sequences",filename + '-' + str(self.seq_length) + \
            '-' + data_type + '.npy')
        if os.path.isfile(path):
            return np.load(path)
        else:
            return None

    def get_frames_by_filename(self, filename, data_type):
        """Given a filename for one of our samples, return the data
        the model needs to make predictions."""
        # First, find the sample row.
        sample = None
        for row in self.data:
            if row[1] == filename:
                sample = row
                break
        if sample is None:
            raise ValueError("Couldn't find sample: %s" % filename)

        if data_type == "images":
            # Get and resample frames.
            frames = self.get_frames_for_sample(sample)
            frames = self.rescale_list(frames, self.seq_length)
            # Build the image sequence
            sequence = self.build_image_sequence(frames)
        else:
            # Get the sequence from disk.
            sequence = self.get_extracted_sequence(data_type, sample)

            if sequence is None:
                raise ValueError("Can't find sequence. Did you generate them?")

        return sequence

    @staticmethod
    def get_frames_for_sample(sample):
        sample = sample[1]
        sources = []
        images = data_full.loc[data_full['Video_ID'] == sample]['Frame_ID']
        target = data_full.loc[data_full['Video_ID'] == sample]['Label']
        if target.any() == 0:
            for i in images:
                src = os.path.join(training_dataset['Real'], i)
                sources.append(src)
        else: 
            for i in images:
                src = os.path.join(training_dataset['Fake'], i)
                sources.append(src)
        return sources

    @staticmethod
    def get_filename_from_image(filename):
        parts = filename.split(os.path.sep)
        return parts[-1].replace('.jpg', '')

    @staticmethod
    def rescale_list(input_list, size):
        """Given a list and a size, return a rescaled/samples list. For example,
        if we want a list of size 5 and we have a list of size 25, return a new
        list of size five which is every 5th element of the origina list."""
        assert len(input_list) >= size

        # Get the number to skip between iterations.
        skip = len(input_list) // size

        # Build our new output.
        output = [input_list[i] for i in range(0, len(input_list), skip)]

        # Cut off the last one if needed.
        return output[:size]

    def print_class_from_prediction(self, predictions, nb_to_return=5):
        """Given a prediction, print the top classes."""
        # Get the prediction for each label.
        label_predictions = {}
        for i, label in enumerate(self.classes):
            label_predictions[label] = predictions[i]

        # Now sort them.
        sorted_lps = sorted(
            label_predictions.items(),
            key=operator.itemgetter(1),
            reverse=True
        )

        # And return the top N.
        for i, class_prediction in enumerate(sorted_lps):
            if i > nb_to_return - 1 or class_prediction[1] == 0.0:
                break
            print("%s: %.2f" % (class_prediction[0], class_prediction[1]))


Using TensorFlow backend.


In [69]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import preprocess_input
#from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input

from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input
import numpy as np

In [24]:
effnet.layers[-12]

<tensorflow.python.keras.layers.pooling.GlobalAveragePooling2D at 0x7fe581a9ac10>

In [118]:
from tensorflow.keras.preprocessing import image

class Extractor():
    def __init__(self, weights="None"):
        """Either load pretrained from imagenet, or load our saved
        weights from our own training."""

        self.weights = weights  # so we can check elsewhere which model

        if weights is None:
            # We'll extract features at the final pool layer.
            base_model = effnet

            # We'll extract features at the final pool layer.
            self.extractor_model = Model(
                inputs=base_model.input,
                outputs=base_model.layers[-12].output
            )
        elif self.weights == "effec":
            # Load the model first.
            self.extractor_model = model_2

            # Then remove the top so we get features not predictions.
            # From: https://github.com/fchollet/keras/issues/2371
            self.extractor_model.layers.pop()
            self.extractor_model.layers.pop()  # two pops to get to pool layer
            self.extractor_model.outputs = [self.extractor_model.layers[-1].output]
            self.extractor_model.output_layers = [self.extractor_model.layers[-1]]
            #self.extractor_model.layers[-1].outbound_nodes = []

    def extract(self, image_path):
        img = image.load_img(image_path, target_size=(299, 299))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        # Get the prediction.
        features = self.extractor_model.predict(x)
        if self.weights is None:
            # For imagenet/default network:
            features = features[0]
        else:
            # For loaded network:
            features = features[0]

        return features


In [119]:
import numpy as np
import os.path
from tqdm import tqdm
import os

# Set defaults.
seq_length = 100
class_limit = 1  
# Get the dataset.
data = DataSet(seq_length=seq_length, class_limit=class_limit)

# get the model.
extractor_model = Extractor(weights ="effec")
data_path = "/Volumes/MY PASSPORT/Base_directory/"
# Loop through data.
pbar = tqdm(total=len(data.data))
for i in range (1, len(data.data)):
    video = data.data[i]
    # Get the path to the sequence for this video.
    path = os.path.join(data_path, 'sequences', video[1] + '-' + str(seq_length) + \
        '-features')  # numpy will auto-append .npy
    # Check if we already have it.

    if os.path.isfile(path + '.npy'):
        pbar.update(1)
        continue
    else:
        os.makedirs(path,exist_ok=True)

    # Get the frames for this video.
    frames = data.get_frames_for_sample(video)
    # Now downsample to just the ones we need.
    frames = data.rescale_list(frames, seq_length)

    # Now loop through and extract features to build the sequence.
    sequence = []
    for img in frames:
        features = extractor_model.extract(img)
        sequence.append(features)

    # Save the sequence.
    np.save(path, sequence)

    pbar.update(1)

pbar.close()


  0%|          | 2/2333 [21:35<419:15:42, 647.51s/it]
 74%|███████▍  | 1728/2333 [8:12:05<3:28:07, 20.64s/it]

FileNotFoundError: [Errno 2] No such file or directory: '/Volumes/MY PASSPORT/Base_directory/Real/actors_08__talking_against_wall_1011.jpg'

In [106]:
model_2 = load_model("/Users/asmaaaly/Minerva/Minerva_Capstone/2.Models/Weights/results/EfficientNetB3",compile=False)

a function (__inference_model_layer_call_and_return_conditional_losses_90612) with ops with custom gradients. Will likely fail if a gradient is requested.
  0%|          | 0/2333 [06:46<?, ?it/s]
  0%|          | 0/2333 [06:00<?, ?it/s]
  0%|          | 0/2333 [05:43<?, ?it/s]


In [107]:
model_2.compile(loss=loss,
                      optimizer=Adam(lr=0.0001), 
                  metrics=['acc'])

In [112]:
model_2.outputs = [model_2.layers[-1].output]

In [114]:
model_2.layers[-1]

<tensorflow.python.keras.layers.core.Dense at 0x7f8071042130>

In [117]:
model_2.layers[-1].outbound_nodes 

[]

In [76]:
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import Dense, Flatten, Dropout, ZeroPadding3D
from tensorflow.keras.layers import LSTM
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import (Conv2D, MaxPooling3D, Conv3D,
    MaxPooling2D)
from collections import deque
import sys

In [100]:
"""
A collection of models we'll use to attempt to classify videos.
"""
from tensorflow.keras.layers import Dense, Flatten, Dropout, ZeroPadding3D
from tensorflow.keras.layers import LSTM
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import (Conv2D, MaxPooling3D, Conv3D,
    MaxPooling2D)
from collections import deque
import sys

class ResearchModels():
    def __init__(self, nb_classes, model, seq_length,
                 saved_model=None, features_length=2304):
        """
        `model` = one of:
            lstm
            lrcn
            mlp
            conv_3d
            c3d
        `nb_classes` = the number of classes to predict
        `seq_length` = the length of our video sequences
        `saved_model` = the path to a saved Keras model to load
        """

        # Set defaults.
        self.seq_length = seq_length
        self.load_model = load_model
        self.saved_model = saved_model
        self.nb_classes = nb_classes
        self.feature_queue = deque()

        # Set the metrics. Only use top k if there's a need.
        metrics = ['accuracy']
        if self.nb_classes >= 10:
            metrics.append('top_k_categorical_accuracy')

        # Get the appropriate model.
        if self.saved_model is not None:
            print("Loading model %s" % self.saved_model)
            self.model = load_model(self.saved_model)
        elif model == 'gru':
            print("Loading GRU model.")
            self.input_shape = (seq_length, features_length)
            self.model = self.gru()
        elif model == 'lrcn':
            print("Loading CNN-LSTM model.")
            self.input_shape = (seq_length, 80, 80, 3)
            self.model = self.lrcn()
        elif model == 'mlp':
            print("Loading simple MLP.")
            self.input_shape = (seq_length, features_length)
            self.model = self.mlp()
        elif model == 'conv_3d':
            print("Loading Conv3D")
            self.input_shape = (seq_length, 80, 80, 3)
            self.model = self.conv_3d()
        elif model == 'c3d':
            print("Loading C3D")
            self.input_shape = (seq_length, 80, 80, 3)
            self.model = self.c3d()
        else:
            print("Unknown network.")
            sys.exit()

        # Now compile the network.
        optimizer = Adam(lr=0.001)
        self.model.compile(loss='categorical_crossentropy', optimizer=optimizer,
                           metrics=metrics)

        print(self.model.summary())

    def gru(self):
        """Build a simple LSTM network. We pass the extracted features from
        our CNN to this model predomenently.
        # Model.
        """
        model = Sequential()
        model.add(GRU(2304, return_sequences=False,
                       input_shape=self.input_shape,
                       dropout=0.1))
        model.add(Dense(512, activation='relu'))
        model.add(Dropout(0.1))
        model.add(Dense(self.nb_classes, activation='softmax'))

        return model
        """
        # Create our convnet with (112, 112, 3) input shape        
        # then create our final model
        model = Sequential()
        # add the convnet with (5, 112, 112, 3) shape
        # here, you can also use GRU or LSTM
        model.add(GRU(64))
        # and finally, we make a decision network
        model.add(Dense(2304, activation='relu'))
        model.add(Dropout(.5))
        model.add(Dense(512, activation='relu'))
        model.add(Dropout(.5))
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(.5))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.nb_classes, activation='softmax'))
        return model
        """

    def lrcn(self):
        """Build a CNN into RNN.
        Starting version from:
            https://github.com/udacity/self-driving-car/blob/master/
                steering-models/community-models/chauffeur/models.py

        Heavily influenced by VGG-16:
            https://arxiv.org/abs/1409.1556

        Also known as an LRCN:
            https://arxiv.org/pdf/1411.4389.pdf
        """
        def add_default_block(model, kernel_filters, init, reg_lambda):

            # conv
            model.add(TimeDistributed(Conv2D(kernel_filters, (3, 3), padding='same',
                                             kernel_initializer=init, kernel_regularizer=L2_reg(l=reg_lambda))))
            model.add(TimeDistributed(BatchNormalization()))
            model.add(TimeDistributed(Activation('relu')))
            # conv
            model.add(TimeDistributed(Conv2D(kernel_filters, (3, 3), padding='same',
                                             kernel_initializer=init, kernel_regularizer=L2_reg(l=reg_lambda))))
            model.add(TimeDistributed(BatchNormalization()))
            model.add(TimeDistributed(Activation('relu')))
            # max pool
            model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))

            return model

        initialiser = 'glorot_uniform'
        reg_lambda  = 0.001

        model = Sequential()

        # first (non-default) block
        model.add(TimeDistributed(Conv2D(32, (7, 7), strides=(2, 2), padding='same',
                                         kernel_initializer=initialiser, kernel_regularizer=L2_reg(l=reg_lambda)),
                                  input_shape=self.input_shape))
        model.add(TimeDistributed(BatchNormalization()))
        model.add(TimeDistributed(Activation('relu')))
        model.add(TimeDistributed(Conv2D(32, (3,3), kernel_initializer=initialiser, kernel_regularizer=L2_reg(l=reg_lambda))))
        model.add(TimeDistributed(BatchNormalization()))
        model.add(TimeDistributed(Activation('relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))

        # 2nd-5th (default) blocks
        model = add_default_block(model, 64,  init=initialiser, reg_lambda=reg_lambda)
        model = add_default_block(model, 128, init=initialiser, reg_lambda=reg_lambda)
        model = add_default_block(model, 256, init=initialiser, reg_lambda=reg_lambda)
        model = add_default_block(model, 512, init=initialiser, reg_lambda=reg_lambda)

        # LSTM output head
        model.add(TimeDistributed(Flatten()))
        model.add(LSTM(256, return_sequences=False, dropout=0.5))
        model.add(Dense(self.nb_classes, activation='softmax'))

        return model

    def mlp(self):
        """Build a simple MLP. It uses extracted features as the input
        because of the otherwise too-high dimensionality."""
        # Model.
        model = Sequential()
        model.add(Flatten(input_shape=self.input_shape))
        model.add(Dense(512))
        model.add(Dropout(0.1))
        model.add(Dense(512))
        model.add(Dropout(0.1))
        model.add(Dense(self.nb_classes, activation='softmax'))

        return model

    def conv_3d(self):
        """
        Build a 3D convolutional network, based loosely on C3D.
            https://arxiv.org/pdf/1412.0767.pdf
        """
        # Model.
        model = Sequential()
        model.add(Conv3D(
            32, (3,3,3), activation='relu', input_shape=self.input_shape
        ))
        model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))
        model.add(Conv3D(64, (3,3,3), activation='relu'))
        model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))
        model.add(Conv3D(128, (3,3,3), activation='relu'))
        model.add(Conv3D(128, (3,3,3), activation='relu'))
        model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))
        model.add(Conv3D(256, (2,2,2), activation='relu'))
        model.add(Conv3D(256, (2,2,2), activation='relu'))
        model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))

        model.add(Flatten())
        model.add(Dense(1024))
        model.add(Dropout(0.5))
        model.add(Dense(1024))
        model.add(Dropout(0.5))
        model.add(Dense(self.nb_classes, activation='softmax'))

        return model

    def c3d(self):
        """
        Build a 3D convolutional network, aka C3D.
            https://arxiv.org/pdf/1412.0767.pdf

        With thanks:
            https://gist.github.com/albertomontesg/d8b21a179c1e6cca0480ebdf292c34d2
        """
        model = Sequential()
        # 1st layer group
        model.add(Conv3D(64, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv1',
                         subsample=(1, 1, 1),
                         input_shape=self.input_shape))
        model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2),
                               border_mode='valid', name='pool1'))
        # 2nd layer group
        model.add(Conv3D(128, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv2',
                         subsample=(1, 1, 1)))
        model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                               border_mode='valid', name='pool2'))
        # 3rd layer group
        model.add(Conv3D(256, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv3a',
                         subsample=(1, 1, 1)))
        model.add(Conv3D(256, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv3b',
                         subsample=(1, 1, 1)))
        model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                               border_mode='valid', name='pool3'))
        # 4th layer group
        model.add(Conv3D(512, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv4a',
                         subsample=(1, 1, 1)))
        model.add(Conv3D(512, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv4b',
                         subsample=(1, 1, 1)))
        model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                               border_mode='valid', name='pool4'))

        # 5th layer group
        model.add(Conv3D(512, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv5a',
                         subsample=(1, 1, 1)))
        model.add(Conv3D(512, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv5b',
                         subsample=(1, 1, 1)))
        model.add(ZeroPadding3D(padding=(0, 1, 1)))
        model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                               border_mode='valid', name='pool5'))
        model.add(Flatten())

        # FC layers group
        model.add(Dense(4096, activation='relu', name='fc6'))
        model.add(Dropout(0.5))
        model.add(Dense(4096, activation='relu', name='fc7'))
        model.add(Dropout(0.5))
        model.add(Dense(self.nb_classes, activation='softmax'))

        return model


In [103]:
"""
Train our RNN on extracted features or images.
"""
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, CSVLogger
import time
import os.path

def train(data_type, seq_length, model, saved_model=None,
          class_limit=None, image_shape=None,
          load_to_memory=False, batch_size=32, nb_epoch=100):
    # Helper: Save the model.
    mode_version = "GRU"
    checkpointer = ModelCheckpoint(f'weights-{mode_version}.h5', monitor='accuracy',
    save_best_only=True, save_weights_only=True, verbose=1)

    # Helper: TensorBoard
    #tb = TensorBoard(log_dir=os.path.join('data', 'logs', model))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit
        )
    else:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit,
            image_shape=image_shape
        )

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = 1398 // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type)
        val_generator = data.frame_generator(batch_size, 'valid', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(
            X,
            y,
            batch_size=batch_size,
            validation_data=(X_test, y_test),
            verbose=1,
            callbacks=[early_stopper, csv_logger],
            epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[checkpointer],
            validation_data=val_generator,
            validation_steps=46,
            workers=4)

model = 'gru'
saved_model = None  # None or weights file
class_limit = 2  # int, can be 1-101 or None
seq_length = 100
load_to_memory = False  # pre-load the sequences into memory
batch_size = 10
nb_epoch = 30

# Chose images or features and image shape based on network.
if model in ['conv_3d', 'c3d', 'lrcn']:
    data_type = 'images'
    image_shape = (80, 80, 3)
elif model in ['gru', 'mlp']:
    data_type = 'features'
    image_shape = None
else:
    raise ValueError("Invalid model. See train.py for options.")

train(data_type, seq_length, model, saved_model=saved_model,
        class_limit=class_limit, image_shape=image_shape,
        load_to_memory=load_to_memory, batch_size=batch_size, nb_epoch=nb_epoch)


Loading GRU model.
Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_10 (GRU)                 (None, 2304)              31864320  
_________________________________________________________________
dense_19 (Dense)             (None, 512)               1180160   
_________________________________________________________________
dropout_9 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 2)                 1026      
Total params: 33,045,506
Trainable params: 33,045,506
Non-trainable params: 0
_________________________________________________________________
None
Creating train generator with 1398 samples.
Epoch 1/30

Epoch 00001: accuracy improved from -inf to 0.52590, saving model to weights-GRU.h5
Epoch 2/30

Epoch 00002: accuracy improved from 0.52590 to 0.53813, saving 

KeyboardInterrupt: 

In [None]:
model = Sequential()
# add the convnet with (5, 112, 112, 3) shape
# here, you can also use GRU or LSTM
model.add(GRU(2048, return_sequences=False,
                       input_shape=(40,2048),
                       dropout=0.3))# and finally, we make a decision network
model.add(Dropout(.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(64, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.summary()
        

In [125]:
import pandas as pd
data = pd.read_csv("/Users/asmaaaly/Minerva/Minerva_Capstone/1.Preprocess Data/only_videos_with_labels.csv")

In [126]:
train_pd = data[data["Split_type"]=="Train"]

In [127]:
validation_pd = data[data["Split_type"]=="Valid"]

In [128]:
test_pd = data[data["Split_type"]=="Test"]

In [158]:
len(validation_pd)

467

In [165]:
467//10

46

In [91]:
test = np.load("/Volumes/MY PASSPORT/Base_directory/sequences/415Deepfakes-100-features.npy")

In [92]:
np.shape(test)

(100, 2304)

In [102]:
len(data.classes)

2

In [None]:
def gru(self):
    """Build a simple LSTM network. We pass the extracted features from
    our CNN to this model predomenently.
    # Model.
    """
    model = Sequential()
    model.add(GRU(2304, return_sequences=False,
                    input_shape=self.input_shape,
                    dropout=0.1))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(self.nb_classes, activation='softmax'))

    return model

In [56]:
import pandas as pd
data_full = pd.read_csv("/Users/asmaaaly/Minerva/Minerva_Capstone/2.Models/merged_file_all_data.csv", index_col=0)
only_dfdc = data_full[data_full.Dataset.isin(['actors', 'deepfakedetection'])]
only_unique = only_dfdc.copy()
only_unique = only_unique.drop_duplicates(subset=['Video_ID'])


In [58]:
only_unique = only_unique.drop("Split Type", axis =1 )

In [59]:
from sklearn.model_selection import train_test_split


In [60]:
only_unique["Label"].value_counts()

0.0    359
1.0    158
Name: Label, dtype: int64

In [61]:
df_0 = only_unique[only_unique['Label'] == 0].sample(158, random_state = 101)
df_1 = only_unique[only_unique['Label'] == 1].sample(158, random_state = 101)

In [62]:
import numpy as np
df_data = pd.concat([df_0, df_1], axis=0).reset_index(drop=True)
# shuffle
#print to see if the data is balanced 
df_data['Label'].value_counts()
y = df_data['Label']
df_train, df_test = train_test_split(df_data, test_size=0.20, random_state=101, stratify=y)

y_train = df_train['Label']
df_train, df_valid = train_test_split(df_train, test_size=0.25, random_state=101,stratify=y_train)
df_train["Split Type"] = np.asarray(['Train'] * len(df_train))
df_valid["Split Type"] = np.asarray(['Valid'] * len(df_valid))
df_test["Split Type"] = np.asarray(['Test'] * len(df_test))
Processed_frames = [df_train, df_valid,df_test]
df_data_used = pd.concat(Processed_frames)


In [66]:
df_data_used = df_data_used[~(df_data_used['Frame_ID_x'] <= 100)] 

In [67]:
df_data_used.to_csv("df_data_used.csv", header = False)

In [65]:
df_data = pd.concat([df_0, df_1], axis=0).reset_index(drop=True)
# shuffle
#print to see if the data is balanced 
df_data['Label'].value_counts()

0.0    158
1.0    158
Name: Label, dtype: int64