In [None]:
# #install dependencies 
# ! pip install --upgrade pip
# !pip install numpy --upgrade
# ! pip install pandas --upgrade
# ! pip install boto3 --upgrade
# ! pip install requests --upgrade
# ! pip install scikit-learn --upgrade
# ! pip install tensorflow --upgrade
# ! pip install keras --upgrade
# ! pip install scikit-video --upgrade
# ! pip install scikit-image --upgrade
# !pip install sagemaker --upgrade
# ! pip install opencv-python --upgrade
# ! pip install MTCNN

In [None]:
import pandas as pd
import numpy as np
import boto3
import cv2 as cv
import os
# import time
import random 
import json
from joblib import dump, load
import math
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.python import keras
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import *# Dense, Flatten, Conv2D, Dropout, Activation, BatchNormalization, MaxPooling2D
from keras.utils import to_categorical
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import load_model
from mtcnn.mtcnn import MTCNN
face_detector = MTCNN()
#sensitive variables in config.py file that is on .gitignore
from config import key_, secret_, s3_bucket, kaggle_cookie

from functions_for_testing import *

In [None]:
import matplotlib.pyplot as plt


In [None]:
with open('meta.json') as m:
    meta = json.load(m)
#get list of videos that exist in my bucket
video_df = pd.read_csv('video_information.csv')
video_list = video_df['video_names'].to_list()

In [None]:
def get_faces_from_video(video_link, 
                         skipped_frames=15, 
                         new_max_size=750, 
                         face_confidence = 0.9,
                         padding=(.15, 0.15, 0.15), #(.1, 0.05, 0.05) 
                         face_dim = (146, 225)):
    '''
    takes a link to a video as input, and returns an array of faces found in a single frame of the video
    inputs:
    
    video_link: link to video that contains a frame you want to look at
    skipped_frames: number of frames to skip before looking for faces
    
    face_confidence: the confidence, as a percentage, that the model used in MTCNN function needs to be in order 
    to treat a detected potential face as a face
    
    new_max_size: for the shorter of the length or width, the max size you want the frame to be resized to before
    looking for faces
    
    padding: tuple of percentages; will be added to the size of the face to ensure the entire face is captured
    -- the tuple is (top, bottom, horizontal)
    the top param will move the top of the face by this param times the size of the face towards the top of the y axis
    the bottom param will move the bottom of the face by this praram times the size of the face towards the bottom
    the horizontal param will move the left and right edges of the face by this param towards the left and
    right edges of the plane respectively
    
    returns:
    an array of images (stored as an array) of found images in the frame in question
    '''
    #load the video
    video = cv.VideoCapture(video_link)
#     frame_count = int(video.get(cv.CAP_PROP_FRAME_COUNT)) #not needed, but takes very little runtime
    #skip appropiate number of frames based on skipped_frames input
    for skipped_frame in np.arange(0, (skipped_frames)):
        _ = video.grab()
    found_faces = False
    while found_faces == False:
        _ = video.grab()
        _, frame = video.retrieve()
        #convert the frame to color
        #unsure if this step is necessary, however cvtColor takes very little time (~200 µs )
        img = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
        original_height = frame.shape[0]
        original_width = frame.shape[1]
        #get original shape of frame
        original_height, original_width = frame.shape[0], frame.shape[1]
        #get aspect ratio -- want to maintain this
        img_size_ratio = original_height / original_width
        #if the height is greater than the width, make new height the new_max_size, and
        #make new width the new height divided by the aspect ratio
        if original_height > original_width:
            new_height = new_max_size
            new_width = new_height / img_size_ratio
        #otherwise, make the new width equal to the new max size, and 
        #the new height the new width times the aspect ratio
        else:
            new_width = new_max_size
            new_height = new_width * img_size_ratio
        #new dimensions -- the aspect ratio will not match exactly due to rounding, but will be close
        new_dim = (int(new_width), int(new_height))
        #resize the image while maintaining the aspect ratio, and changing the maximum edge length to new_max_size
        resized_image = cv.resize(img, new_dim, interpolation = cv.INTER_AREA)
        face_dictionaries = face_detector.detect_faces(resized_image)
        faces = []
        for face in range(len(face_dictionaries)):
            #only review faces that have more than a face_confidence% confidence of being a face
            if face_dictionaries[face]['confidence'] > face_confidence:
                #the 'box' of the face is a list of pixel values as: '[x, y, width, height]'
                box = face_dictionaries[face]['box']
                #this is the left side of the face. This will look at the x 'box' value, and will move left by the 
                #percentage of the horizontal padding param
                start_x = box[0] - (padding[2] * box[2])
                #right side of the face. Will add the horizontal padding param to the width and add the result to the 
                #original x starting value
                end_x = box[0] + ((1 + padding[2]) * box[2])
                #bottom of face
                start_y = box[1] - (padding[1] * box[3])
                #top of face
                end_y = box[1] + ((1 + padding[0]) * box[3])
                #if the adjusted x starting value is negative, change the starting x value to 0 (the 0 index of 
                #the frame array)
                if start_x < 0:
                    start_x = 0
                if start_y < 0:
                    start_y = 0
                #keep consistant - do additional research on this
                face_ratio = round(face_dim[1] / face_dim[0], 2) # will keep horizontal size the same 
                #(can experiment with adjusting the horizontal axis later)
                #calculate the number of pixels the face is on the horizontal axis
                x_size = end_x - start_x
                #calculate the number of pixels the face is on the vertical axis
                y_size = end_y - start_y
                #get what y_size needs to be
                y_size_with_ratio = x_size * face_ratio
                #how much the y_size needs to be adjusted
                y_size_change = y_size_with_ratio - y_size
                start_y_ = start_y - y_size_change
                end_y_ = end_y + y_size_change
                if start_y_ < 0:
                    y_adjust = 0 - start_y_
                    end_y_ = min((end_y_ + y_adjust), resized_image.shape[0])
                    start_y_ = 0
                elif end_y_ > resized_image.shape[0]:
                    y_adjust = end_y_ - resized_image.shape[0]
                    start_y_ = max(0, (start_y_ - y_adjust))
                    end_y_ = resized_image.shape[0]
                start_x, end_x, start_y_, end_y_ = int(start_x), int(end_x), int(start_y_), int(end_y_)
                face_image = resized_image[start_y_:end_y_, start_x:end_x]
                new_face = cv.resize(face_image, face_dim, interpolation = cv.INTER_AREA)#change new_dim_ to face_dim
                faces.append(new_face)
            if len(faces) > 0:
                found_faces = True
                video.release()
                #if no faces are found with a confidence above face_confidence after the skipped_frames'th frame,
                #the function will loop forever. I think the risk of this happening is low for the data set I am 
                #using. Can consider using a counter based on the number of frames if this happens
    #convert faces list to array
    faces_ = np.array(faces)
    return faces_
                
        

In [None]:
def get_xy_values(video, video_dictionary=meta):
    '''
    inputs:
    video: video name
    
    video_dictionary: dictionary that can be looked up to check if a video is real or fake
    
    returns:
    arrays of x values, and y values that can be passed into a neural network
    '''
    #get the video link
    try:
        video_ = video.decode("utf-8") #needed due to weirdness with tf.Dataset.from_generator
    except AttributeError:
        video_ = video
    video_link = get_video_link(video_)
    x_values = get_faces_from_video(video_link)
    #check if the video is fake
    if video_dictionary[video_]['label'] == 'FAKE':
        #if so, the y_value is 0, otherwise it is 1
        y_value = 0
    else:
        y_value = 1
    #create a list with a len matching the len of x_values with the above y_value
    y_values = []
    for x in np.arange(0, len(x_values)):
        y_values.append(y_value)
    #pass the above list to the to_categorical and the result can be passed into my model
    y_values_ = to_categorical(y_values, num_classes=2)
    return x_values, y_values_

In [None]:
class Generator(Sequence): 
    # Class that will allow multiprocessing
    def __init__(self, video_list, y_set=None, batch_size=1):
        #convert the video_list to an array
        self.x, self.y = np.array(video_list), y_set
        self.batch_size = batch_size
        self.indices = np.arange(self.x.shape[0])

    def __len__(self):
        return math.floor(self.x.shape[0] / self.batch_size)

    def __getitem__(self, idx):
        inds = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        #currently only accepts a batch size of 1, update "idx" to "inds" once can accept larger batch size
        batch_x, batch_y = get_xy_values(video_list[idx]) 
        #consider allowing get_faces_from_video to accept a list of video names, and loop through the function
        #for each video name. If I do this, I would also need to upgrade the get xy values function 
        return batch_x, batch_y

    def on_epoch_end(self):
        np.random.shuffle(self.indices)

In [None]:
def generator(video_list):
    '''
    a python generator that yields x and y values to be passed into a neural network
    
    Inputs:
    video_list: a list of video names
    
    yields:
    x and y values intended to be passed into a neural network
    
    todo -- inprove above description
    '''
    video_list_ = video_list#['xmkwsnuzyq.mp4']
    list_len = len(video_list_)
    random.shuffle(video_list_)
    count = 0
    while True:
        if count == list_len:
            random.shuffle(video_list_)
            count = 0
        x, y = get_xy_values(video_list_[count])
        count += 1
        yield x, y


In [None]:
real_videos = []
fake_videos = []
for video in meta:
    if meta[video]['label'] == 'REAL':
        real_videos.append(video)
    else:
        fake_videos.append(video)
video_set = []
for video in range(len(real_videos)):
    video_set.append(real_videos[video])
    video_set.append(fake_videos[video])
#split videos into test and training 
_, _, train_videos, test_videos = train_test_split(video_set, video_set, test_size=.003, random_state=3)

In [None]:
train_generator = Generator(train_videos, train_videos)
test_generator = Generator(test_videos, test_videos)
training_len = len(train_videos)
testing_len = len(test_videos)

In [None]:
train_generator_ = tf.data.Dataset.from_generator(
    generator,
    args=[train_videos], 
    output_types=(tf.uint8, tf.float32), 
    output_shapes=([None, 225, 146, 3], [None, 2])
)
test_generator_ = tf.data.Dataset.from_generator(
    generator,
    args=[test_videos], 
    output_types=(tf.uint8, tf.float32), 
    output_shapes=([None, 225, 146, 3], [None, 2])
)



In [None]:
model = Sequential()
model.add(Conv2D(16, (5, 5), padding="same", activation = 'relu', input_shape=(225, 146,3)))
#add more layers
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(32, (6, 6), activation = 'relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.05))
model.add(Conv2D(64, (8, 8), activation = 'relu'))
model.add(MaxPooling2D(( 2, 2)))
model.add(Conv2D(16, (4, 4), activation = 'relu'))
model.add(MaxPooling2D(( 2, 2)))
model.add(Dropout(0.15))
model.add(Conv2D(32, (4, 4), activation = 'relu'))
model.add(MaxPooling2D(( 2, 2)))


#must flatten before the output layer
model.add(Flatten())
model.add(Dense(64))
model.add(Dense(16))
model.add(Dropout(0.2))
model.add(Dense(128))
model.add(Dense(32))
model.add(Dropout(0.35))

#output layer
model.add(Dense(2, activation='softmax'))

In [None]:
model.compile(loss='mean_squared_error',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
batch_size = 1
num_epochs = 2
model.fit(x=train_generator_, 
          validation_data=test_generator_, 
          steps_per_epoch=training_len//batch_size,
          validation_steps=testing_len//batch_size,
          workers=8, 
          use_multiprocessing=True, 
          epochs=num_epochs)

In [None]:
batch_size = 1
num_epochs = 2
model.fit(x=train_generator_, 
          validation_data=test_generator_, 
          steps_per_epoch=training_len//batch_size,
          validation_steps=testing_len//batch_size,
          workers=8, 
          use_multiprocessing=False, 
          epochs=num_epochs)

In [None]:
x, _ = get_xy_values('xpzfhhwkwb.mp4') # fake video
res_fake = model.predict(x)
res_fake[0][1]

In [None]:
x, _ = get_xy_values('xmkwsnuzyq.mp4') # real video
res_real = model.predict(x)
res_real[0][1]