In [None]:
# #install dependencies 
# ! pip install --upgrade pip
# !pip install numpy --upgrade
# ! pip install pandas --upgrade
# ! pip install boto3 --upgrade
# ! pip install requests --upgrade
# ! pip install scikit-learn --upgrade
# ! pip install tensorflow --upgrade
# ! pip install keras --upgrade
# ! pip install scikit-video --upgrade
# ! pip install scikit-image --upgrade
# !pip install sagemaker --upgrade
# ! pip install opencv-python --upgrade

In [1]:
import pandas as pd
import numpy as np
import boto3
import cv2 as cv
import os
# import time
import random 
import json
from joblib import dump, load
import math
from sklearn.model_selection import train_test_split
from tensorflow.python import keras
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Flatten, Conv2D, Dropout, Activation
from keras.utils import to_categorical
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import load_model
from mtcnn.mtcnn import MTCNN
face_detector = MTCNN()
#sensitive variables in config.py file that is on .gitignore
from config import key_, secret_, s3_bucket, kaggle_cookie

from functions_for_testing import get_video_link

Using TensorFlow backend.


In [None]:
import matplotlib.pyplot as plt


In [7]:
with open('meta.json') as m:
    meta = json.load(m)
#get list of videos that exist in my bucket
video_df = pd.read_csv('video_information.csv')
video_list = video_df['video_names'].to_list()

In [None]:
def grab_frame(video_link, skipped_frames=5):
    '''
    function that takes a link to a video, and returns the frame after 'skipped_frames' input variable
    temporary function to prevent large amount of bucket queries -- combine with resize and detect image function later
    '''
    video = cv.VideoCapture(video_link)
    frame_count = int(video.get(cv.CAP_PROP_FRAME_COUNT))
    for skipped_frame in np.arange(0, (skipped_frames + 1)):
        _ = video.grab()
    _, frame = video.retrieve()
    video.release()
    return frame
# look into improving this - 701 ms when loading from bucket, 50 ms when loading from file, 5 skipped frames

In [None]:
def resize_and_detect_face(frame, new_max_size=750, padding=(.1, 0.05, 0.05)):
    '''
    temporary function -- combine with grab frame later
    -- want to reduce number of bucket queries--
    inputs:
    frame: a single frame or an image
    new_max_size: the maximum size of the longer of the width/height the frame will be resized to prior
    to looking for faces
    padding: tuple of percentages; will be added to the size of the face to ensure the entire face is captured
    -- the tuple is (top, bottom, horizontal)
    the top param will move the top of the face by this param times the size of the face towards the top of the y axis
    the bottom param will move the bottom of the face by this praram times the size of the face towards the bottom
    the horizontal param will move the left and right edges of the face by this param towards the left and
    right edges of the plane respectively
    returns:
    a list of arrays
    each array is a cropped face with dimensions of 146 by 225 pixels
    '''
    #convert the frame to color
    #unsure if this step is necessary, however cvtColor takes very little time (~200 µs )
    img = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
    original_height = frame.shape[0]
    original_width = frame.shape[1]
    #get original shape of frame
    original_height, original_width = frame.shape[0], frame.shape[1]
    #get aspect ratio -- want to maintain this
    img_size_ratio = original_height / original_width
    #if the height is greater than the width, make new height the new_max_size, and
    #make new width the new height divided by the aspect ratio
    if original_height > original_width:
        new_height = new_max_size
        new_width = new_height / img_size_ratio
    #otherwise, make the new width equal to the new max size, and 
    #the new height the new width times the aspect ratio
    else:
        new_width = new_max_size
        new_height = new_width * img_size_ratio
    #new dimensions -- the aspect ratio will not match exactly due to rounding, but will be close
    new_dim = (int(new_width), int(new_height))
    #resize the image while maintaining the aspect ratio, and changing the maximum edge length to new_max_size
    resized_image = cv.resize(img, new_dim, interpolation = cv.INTER_AREA)
    face_dictionaries = face_detector.detect_faces(resized_image)
    faces = []
    for face in range(len(face_dictionaries)):
        #only review faces that have more than a 90% confidence of being a face
        if face_dictionaries[face]['confidence'] > 0.9:
            #the 'box' of the face is a list of pixel values as: '[x, y, width, height]'
            box = face_dictionaries[face]['box']
            #this is the left side of the face. This will look at the x 'box' value, and will move left by the 
            #percentage of the horizontal padding param
            start_x = box[0] - (padding[2] * box[2])
            #right side of the face. Will add the horizontal padding param to the width and add the result to the 
            #original x starting value
            end_x = box[0] + ((1 + padding[2]) * box[2])
            #bottom of face
            start_y = box[1] - (padding[1] * box[3])
            #top of face
            end_y = box[1] + ((1 + padding[0]) * box[3])
            #if the adjusted x starting value is negative, change the starting x value to 0 (the 0 index of the frame array)
            if start_x < 0:
                start_x = 0
            if start_y < 0:
                start_y = 0
            #keep consistant - do additional research on this
            face_ratio = 1.54 # will keep horizontal size the same (can experiment with adjusting the horizontal axis later)
            #calculate the number of pixels the face is on the horizontal axis
            x_size = end_x - start_x
            #calculate the number of pixels the face is on the vertical axis
            y_size = end_y - start_y
            #get what y_size needs to be
            y_size_with_ratio = x_size * face_ratio
            #how much the y_size needs to be adjusted
            y_size_change = y_size_with_ratio - y_size
            start_y_ = start_y - y_size_change
            end_y_ = end_y + y_size_change
            if start_y_ < 0:
                y_adjust = 0 - start_y_
                end_y_ = min((end_y_ + y_adjust), resized_image.shape[0])
                start_y_ = 0
            elif end_y_ > resized_image.shape[0]:
                y_adjust = end_y_ - resized_image.shape[0]
                start_y_ = max(0, (start_y_ - y_adjust))
                end_y_ = resized_image.shape[0]
            start_x, end_x, start_y_, end_y_ = int(start_x), int(end_x), int(start_y_), int(end_y_)
            face_image = resized_image[start_y_:end_y_, start_x:end_x]
            new_dim_ = (146, 225) #hard coded - -will want to change if I update the _face_ratio
            new_face = cv.resize(face_image, new_dim_, interpolation = cv.INTER_AREA)
            faces.append(new_face)
    return faces #this will eventually need to become an array

In [15]:
def get_y_values(video, x_length, video_dictionary=meta):
    '''
    inputs:
    video: video name
    x_length: length of x input (the y value will need to have a similar length)
    video_dictionary: dictionary that can be looked up to check if a video is real or fake
    returns:
    an array of y values that can be passed into a neural network
    '''
    #check if the video is fake
    if video_dictionary[video]['label'] == 'FAKE':
        #if so, the y_value is 0, otherwise it is 1
        y_value = 0
    else:
        y_value = 1
    #create a list with a len of x_length with the above y_value
    y_values = []
    for x in np.arange(0, x_length):
        y_values.append(y_value)
    #pass the above list to the to_categorical and the result can be passed into my model
    y_values_ = to_categorical(y_values, num_classes=2)
    return y_values_

In [16]:
test_video = 'xmkwsnuzyq.mp4'

In [None]:
#todo -- build pipeline to pass data to model

In [None]:
model = Sequential()
model.add(Conv2D(8, (3, 3), padding="same", activation = 'elu', input_shape=(225, 146,3)))
#add more layers
#must flatten before the output layer
model.add(Flatten())
#output layer
model.add(Dense(2, activation='softmax'))

In [None]:
model.compile(loss='mean_squared_error',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model.fit(x=, 
         #params
         )