## Import Statements

In [1]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import face_recognition
from PIL import Image
import cv2 as cv
import os
import random

## Helper Functions

In [2]:
# Read the image
def read_image(video_filename,DIR):
  """Pass a filename and directory, return an image
     Makes our full processing much easier later on"""
  video_file = DIR + video_filename
  cap = cv.VideoCapture(video_file)
  success, image = cap.read()
  image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
  cap.release()
  return image

In [3]:
def face_location_and_coordinates(image):
  """Pass an image and return the location of a face, the landmarks dictionary,
     and the full feature XY positions
     We are most interested in the full_feature_list for graph generation"""
  face_locations = face_recognition.face_locations(image)
  if len(face_locations) ==1:
    face_landmarks = face_recognition.face_landmarks(image)
    full_feature_list = []
    for cord in face_landmarks[0]:
        full_feature_list.extend(face_landmarks[0].get(cord, ""))
    return face_locations, face_landmarks, full_feature_list
  else:
    return [(0,0,0,0)], [(0,0,0,0)],[(0,0,0,0)]

In [4]:
def graph_face(image):
  """Pass an image, return a graph of the facial landmarks of a person in the video
     If there is no person detected, this will return zeros and will not be added to our collection
     Working on handling the case where there are multiple people detected"""
  locations, landmarks_list, full_feature_list = face_location_and_coordinates(image)
  if locations == [(0, 0, 0, 0)]:
    G=nx.Graph()
    return G
  else:
    # Create individual graphs
    graph_dict = {}
    feature_tag = 0

    for feature in list(landmarks_list[0].keys()):
      # print(feature, feature_tag)
      # should match list up next
      total_nodes = len(landmarks_list[0][feature])
      graph_dict[feature] = nx.complete_graph(n=total_nodes)
      for current_node in range(total_nodes):
        graph_dict[feature].nodes[current_node]['x']=float(landmarks_list[0][feature][current_node][0])
        graph_dict[feature].nodes[current_node]['y']=float(landmarks_list[0][feature][current_node][1])
        graph_dict[feature].nodes[current_node]['feature'] = int(feature_tag)
      feature_tag +=1
    graph_list = []
    graph_list.append(graph_dict['chin'])          # tag=0
    graph_list.append(graph_dict['left_eyebrow'])  # tag=1
    graph_list.append(graph_dict['right_eyebrow']) # tag=2
    graph_list.append(graph_dict['nose_bridge'])   # tag=3
    graph_list.append(graph_dict['nose_tip'])      # tag=4
    graph_list.append(graph_dict['left_eye'])      # tag=5
    graph_list.append(graph_dict['right_eye'])     # tag=6
    graph_list.append(graph_dict['top_lip'])       # tag=7
    graph_list.append(graph_dict['bottom_lip'])    # tag=8


    new_graph = nx.disjoint_union_all(graph_list)
    for node_source in range(new_graph.number_of_nodes()):
      for node_dest in range(new_graph.number_of_nodes()):
        new_graph.add_edge(node_source,node_dest)
    return new_graph

In [5]:
def process_and_graph(video_filename, DIR, video_df):
  """Generate a label that designates the video as original or modified"""
  image = read_image(video_filename=video_filename, DIR=DIR)
  if video_df.loc[video_filename].label=='FAKE':
    graph_label = 1
  else:
    graph_label = 0
  graph = graph_face(image)
  return graph, graph_label

In [6]:
def add_graphs(DIR, label):
  """Take a directory containing all 'original' or all 'modified' videos,
     create a graph and add to our full collection of graphs.
     Modified and original videos will be shuffled later. """
  video_df = pd.DataFrame(index=(([name for name in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, name))])))
  video_df['label'] = label
  video_list = video_df.index.to_series()
  video_list = list(video_list)
  for video in video_list:
    # video = (f"'{video}'")
    graph, graph_label = process_and_graph(video, DIR, video_df=video_df)
    if len(graph) > 0:
      full_graph_list.append(graph)
      full_graph_labels.append(graph_label)

  return full_graph_list, full_graph_labels

In [20]:
def write_text(text_filename, full_graph_list, full_graph_labels, number_of_graphs):
  """see https://github.com/muhanzhang/pytorch_DGCNN/tree/master/data for structure
     Remember to change name after shuffling or you will overwrite data"""
  file = open(text_filename,"w")
  file.write(f'{number_of_graphs}\n')
  graph_index = 0
  for graph in full_graph_list:
    graph_label = full_graph_labels[graph_index]
    graph_index +=1
    file.write(f'{graph.number_of_nodes()} {graph_label}\n')
    for node in graph.nodes():
      if graph.has_edge(node,node):
        graph.remove_edge(node,node)
      neighb_list = list(graph.neighbors(node))
      file.write(f"{graph.nodes[node]['feature']} {len(list(graph.neighbors(node)))} {' '.join(map(str, neighb_list))} {graph.nodes[node]['x']} {graph.nodes[node]['y']}, {graph.nodes[node]['feature']} \n")
  file.close()

## Specify data, and create graphs

### IMPORTANT

In [13]:
### Very important to only define full_graph___ once!
### We want to maintain our graph collection over multiple directory iterations
### Our functions append these lists, and graph creation is a time consuming process at this scale
### This is what we use to generate final output
full_graph_list =[]
full_graph_labels =[]

In [14]:
#Now specify the locations of all directories and labels
%cd /home/jupyter/
# Original for reference
# full_graph_list,full_graph_labels = add_graphs(DIR='/content/drive/My Drive/deepfake/data_test/dataset_1/fake/', label = 'FAKE')
# full_graph_list,full_graph_labels = add_graphs(DIR='/content/drive/My Drive/deepfake/data_test/dataset_1/real/', label = 'REAL')

full_graph_list,full_graph_labels = add_graphs(DIR='data/original_sequences/actors/c23/videos/', label = 'REAL')
print("done with original 1")
full_graph_list,full_graph_labels = add_graphs(DIR='data/original_sequences/youtube/c23/videos/', label = 'REAL')
print("done with original 2")



# Get a final count to verify data additions
number_of_graphs = len(full_graph_list)

/home/jupyter
done with original 1
done with original 2


In [16]:
full_graph_list,full_graph_labels = add_graphs(DIR='data/manipulated_sequences/DeepFakeDetection/c23/videos/', label = 'FAKE')
print("done with manipulated")

done with manipulated


In [32]:
# full_graph_list = list(full_graph_list)
# full_graph_labels = list(full_graph_labels)
full_graph_list,full_graph_labels = add_graphs(DIR='data/manipulated_sequences/Deepfakes/c23/videos/', label = 'FAKE')
print("done with manipulated Deepfakes")


done with manipulated Deepfakes


In [35]:
full_graph_list,full_graph_labels = add_graphs(DIR='data/manipulated_sequences/Face2Face/c23/videos/', label = 'FAKE')
print("done with manipulated Face2Face")

done with manipulated Face2Face


In [37]:
full_graph_list,full_graph_labels = add_graphs(DIR='data/manipulated_sequences/FaceSwap/c23/videos/', label = 'FAKE')
print("done with manipulated FaceSwap")

done with manipulated FaceSwap


In [39]:
full_graph_list,full_graph_labels = add_graphs(DIR='data/manipulated_sequences/NeuralTextures/c23/videos/', label = 'FAKE')
print("done with manipulated NeuralTextures")

done with manipulated NeuralTextures


In [40]:
# Check all these to ensure we are ready to write to text
# full_graph_list
# full_graph_labels
number_of_graphs = len(full_graph_list)
number_of_graphs

7248

In [41]:
# DeepFakeDetection has 1158 Original lips, and 2309 manipulated videos. Running total: 3467
# Deepfakes has 946 manipulated videos. Running total 4413 
# Face2Face has 942 manipulated videos. Running total 5355
# FaceSwap  has 948 manipulated videos. Running total 6303
# Neural T  has 945 manipulated videos. Running total 7248


list

## Write Text File

In [43]:
# Let's save the original unshuffled collection first
write_text(text_filename='FF.txt', full_graph_list=full_graph_list, 
           full_graph_labels=full_graph_labels, number_of_graphs=number_of_graphs)

In [44]:
# At this time, train/test splits are slightly cumbersome for this application in dgcnn
# Choices are (specify index or take the last N graphs in the text file)
# so we shuffle here (multiple times) to take different train/test splits

# Now let's shuffle and save 10 times over
for i in range(10):
  c = list(zip(full_graph_list, full_graph_labels))
  random.shuffle(c)
  full_graph_list, full_graph_labels = zip(*c)
  write_text(text_filename=f'FF_{i}.txt', full_graph_list=full_graph_list, 
           full_graph_labels=full_graph_labels, number_of_graphs=number_of_graphs)
  