# Notes



*   There are 300 frames in a 10 second video. 
    * Instead of getting each frames, we can average every _n_ frames.
*   The background in each video is non-relevant.
    * Need to add a face/body detection layer and crop the frames accrodingly.
* Need to capture movements between frames. Could use RNN, or feeding the difference between two consecutive frames
* Add the attention maps (Stehuawer paper) inside XceptionNet blocks.
* Frame sizes are `(1080, 1920)` and `(1920, 1080)`. Rotate the latter ones to make every frame into the same shape.



# Environment Setup

In [1]:
from google.colab import drive
drive.mount('/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /gdrive


In [1]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [2]:
import tensorflow as tf
print(tf.__version__)

2.1.0


In [3]:
!ls drive/My\ Drive/DeepFake

sample_submission.csv  train_sample_videos
test_videos	       xception_weights_tf_dim_ordering_tf_kernels.h5


# utils

In [0]:
# play a video
from IPython.display import HTML
from base64 import b64encode
mp4 = open('/content/drive/My Drive/DeepFake/train_sample_videos/aapnvogymq.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [0]:
# getting the number of frames in a video file
a = cv.VideoCapture('/content/drive/My Drive/DeepFake/train_sample_videos/cdaxixbosp.mp4')
a.get(cv.CAP_PROP_FRAME_COUNT)

# XceptionNet

In [0]:
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Input, BatchNormalization, Activation
from tensorflow.keras.layers import Conv2D, SeparableConv2D, MaxPooling2D, GlobalAveragePooling2D, GlobalMaxPooling2D
#from tensorflow.keras.applications.imagenet_utils import _obtain_input_shape
from tensorflow.keras.utils import get_file

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler
# from tensorflow.keras.utils import np_utils
from tensorflow.keras import regularizers, optimizers
from tensorflow.keras.optimizers import SGD
import os
import h5py
import matplotlib
import matplotlib.pyplot as plt
import random
#import cv2
from sklearn.preprocessing import LabelEncoder
from skimage import io, transform
from sklearn.metrics import accuracy_score
from scipy import misc
import numpy as np

In [0]:
# define the path for pretrained model
WEIGHTS_PATH = '/content/drive/My Drive/DeepFake/xception_weights_tf_dim_ordering_tf_kernels.h5'

In [0]:
#WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels.h5'

def Xception(nb_classes):

    # Determine proper input shape
#     input_shape = _obtain_input_shape(None, default_size=299, min_size=71, data_format='channels_last', include_top=False)

#     img_input = Input(shape=input_shape)
    img_input = Input(shape=(227,227,3))

    # Block 1
    x = Conv2D(32, (3, 3), strides=(2, 2), use_bias=False)(img_input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(64, (3, 3), use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    residual = Conv2D(128, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    # Block 2
    x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)

    # Block 2 Pool
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = layers.add([x, residual])

    residual = Conv2D(256, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    # Block 3
    x = Activation('relu')(x)
    x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)

    # Block 3 Pool
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = layers.add([x, residual])

    residual = Conv2D(728, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    # Block 4
    x = Activation('relu')(x)
    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = layers.add([x, residual])

    # Block 5 - 12
    for i in range(8):
        residual = x

        x = Activation('relu')(x)
        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)

        x = layers.add([x, residual])

    residual = Conv2D(1024, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    # Block 13
    x = Activation('relu')(x)
    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(1024, (3, 3), padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)

    # Block 13 Pool
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = layers.add([x, residual])

    # Block 14
    x = SeparableConv2D(1536, (3, 3), padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # Block 14 part 2
    x = SeparableConv2D(2048, (3, 3), padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # Fully Connected Layer
    x = GlobalAveragePooling2D()(x)
    x = Dense(1000, activation='relu')(x)
    x = Dense(nb_classes, activation='softmax')(x)

    inputs = img_input

    # Create model
    model = Model(inputs, x, name='xception')

    # Download and cache the Xception weights file
    #weights_path = get_file('xception_weights.h5', WEIGHTS_PATH, cache_subdir='models')

    # load weights
    #model.load_weights(weights_path)

    return model


# """
#     Instantiate the model by using the following line of code

#     model = Xception()

# """

# Simple CNN

In [0]:
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout
from functools import partial

def simple_cnn(nb_classes):
    DefaultConv2D = partial(Conv2D,
                            kernel_size=3, activation='relu', padding="SAME")
    
    model = keras.models.Sequential([
        DefaultConv2D(filters=64, kernel_size=7, input_shape=[512, 512, 3]),
        MaxPooling2D(pool_size=2),
        DefaultConv2D(filters=128),
        DefaultConv2D(filters=128),
        MaxPooling2D(pool_size=2),
        DefaultConv2D(filters=256),
        DefaultConv2D(filters=256),
        MaxPooling2D(pool_size=2),
        Flatten(),
        Dense(units=128, activation='relu'),
        Dropout(0.5),
        Dense(units=64, activation='relu'),
        Dropout(0.5),
        Dense(units=nb_classes, activation='softmax'),
    ])

    return model

# DataLoader

In [0]:
import numpy as np
import pandas as pd
import os
import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
%matplotlib inline 
import cv2 as cv

In [5]:
DATA_FOLDER = '/content/drive/My Drive/DeepFake'
TRAIN_SAMPLE_FOLDER = 'train_sample_videos'
TEST_FOLDER = 'test_videos'

print(f"Train samples: {len(os.listdir(os.path.join(DATA_FOLDER, TRAIN_SAMPLE_FOLDER)))}")
print(f"Test samples: {len(os.listdir(os.path.join(DATA_FOLDER, TEST_FOLDER)))}")

Train samples: 401
Test samples: 400


## Meta data

In [6]:
def get_meta_from_json(path):
    df = pd.read_json(os.path.join(DATA_FOLDER, path, json_file))
    df = df.T
    return df

train_list = list(os.listdir(os.path.join(DATA_FOLDER, TRAIN_SAMPLE_FOLDER)))
test_list = list(os.listdir(os.path.join(DATA_FOLDER, TEST_FOLDER)))
json_file = [file for file in train_list if  file.endswith('json')][0]
meta_train_df = get_meta_from_json(TRAIN_SAMPLE_FOLDER)
meta_train_df.head()

Unnamed: 0,label,split,original
aagfhgtpmv.mp4,FAKE,train,vudstovrck.mp4
aapnvogymq.mp4,FAKE,train,jdubbvfswz.mp4
abarnvbtwb.mp4,REAL,train,
abofeumbvv.mp4,FAKE,train,atvmxvwyns.mp4
abqwwspghj.mp4,FAKE,train,qzimuostzz.mp4


In [22]:
meta_train_df["original"].value_counts()

meawmsgiti.mp4    6
atvmxvwyns.mp4    6
kgbkktcjxf.mp4    5
qeumxirsme.mp4    5
xngpzquyhs.mp4    4
                 ..
nlerwupaqr.mp4    1
qokxxuayqn.mp4    1
rfzzrftgco.mp4    1
bejhvclboh.mp4    1
ixuouyigxa.mp4    1
Name: original, Length: 209, dtype: int64

In [25]:
meta_train_df.loc[meta_train_df.index == "aagfhgtpmv.mp4", "label"].values[0]

'FAKE'

# Train/Test Data

In [44]:
X_train_full, y_train_full = [], []
for root, dirs, files in os.walk(os.path.join(DATA_FOLDER, TRAIN_SAMPLE_FOLDER)):
    for name in files:
        if name != 'metadata.json':
            print(f"filename: {name}")
            print(os.path.join(root, name))
            path = os.path.join(root, name)
            capture_image = cv.VideoCapture(path) 
            ret, frame = capture_image.read()
            if ret:
                image = np.array(tf.image.resize(frame, [512, 512]))
                X_train_full.append(image)
                y_train_full.append(meta_train_df.loc[meta_train_df.index == name, "label"].values[0])
                print(f"image size: {image.shape}")
                print("="*50)

filename: cdaxixbosp.mp4
/content/drive/My Drive/DeepFake/train_sample_videos/cdaxixbosp.mp4
image size: (512, 512, 3)
filename: btiysiskpf.mp4
/content/drive/My Drive/DeepFake/train_sample_videos/btiysiskpf.mp4
image size: (512, 512, 3)
filename: clihsshdkq.mp4
/content/drive/My Drive/DeepFake/train_sample_videos/clihsshdkq.mp4
image size: (512, 512, 3)
filename: alvgwypubw.mp4
/content/drive/My Drive/DeepFake/train_sample_videos/alvgwypubw.mp4
image size: (512, 512, 3)
filename: eqvuznuwsa.mp4
/content/drive/My Drive/DeepFake/train_sample_videos/eqvuznuwsa.mp4
image size: (512, 512, 3)
filename: eudeqjhdfd.mp4
/content/drive/My Drive/DeepFake/train_sample_videos/eudeqjhdfd.mp4
image size: (512, 512, 3)
filename: eeyhxisdfh.mp4
/content/drive/My Drive/DeepFake/train_sample_videos/eeyhxisdfh.mp4
image size: (512, 512, 3)
filename: cizlkenljw.mp4
/content/drive/My Drive/DeepFake/train_sample_videos/cizlkenljw.mp4
image size: (512, 512, 3)
filename: bndybcqhfr.mp4
/content/drive/My Drive

In [26]:
print(f"X_train size: {len(X_train_full)}, y_train size: {len(y_train_full)}")

X_train size: 400, y_train size: 400


In [0]:
# X_train_full = np.array(X_train_full)
# y_train_full = np.array(y_train_full)

split = 0.7
cutoff = int(split * len(X_train_full))

X_train, X_valid = X_train_full[:-cutoff], X_train_full[-cutoff:]
y_train, y_valid = y_train_full[:-cutoff], y_train_full[-cutoff:]

In [0]:
# X_train_full = np.array(X_train_full)
# y_train_full = np.array(y_train_full)

# split = 0.7
# cutoff = int(split * len(X_train))

# X_train, X_valid = X_train_full[:-cutoff], X_train_full[-cutoff:]
# y_train, y_valid = y_train_full[:-cutoff], y_train_full[-cutoff:]

# X_mean = X_train.mean(axis=0, keepdims=True)
# X_std = X_train.std(axis=0, keepdims=True) + 1e-7
# X_train = (X_train - X_mean) / X_std
# X_valid = (X_valid - X_mean) / X_std
# # X_test = (X_test - X_mean) / X_std

# X_train = X_train[..., np.newaxis]
# X_valid = X_valid[..., np.newaxis]
# # X_test = X_test[..., np.newaxis]

# Training

In [0]:
model = simple_cnn(2)

In [0]:
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [41]:
history = model.fit(X_train, y_train, epochs=10, validation_data=[X_valid, y_valid])
score = model.evaluate(X_test, y_test)

ValueError: ignored