In [1]:
#import imageio
#imageio.plugins.ffmpeg.download()
import sys
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import face_recognition
import moviepy.editor as mpy
import random
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
sample_width = 224
sample_height = 224
n_frames_from_video = 5

# Sample Code Run for one video

In [3]:
video_id = "_uNup91ZYw0.002.mp4"
video = cv2.VideoCapture(video_id)

In [4]:
def save_images_from_videos(video, n_random_frame):
    count = 0
    file_iter = 0
    n_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    random_frame = np.random.randint(1, n_frames, size = n_random_frame * 3)
                      
    while(video.isOpened()):
        ret, frame = video.read()
        count += 1
        if ret == True and file_iter < 5:
            if count in random_frame:
                
                # flip the frame for randomness
                random_flip = np.random.randint(-1, 2)
                frame = cv2.flip(frame, random_flip)

                recface = face_recognition.face_locations(frame)
                
                if len(recface) > 0:
                    top, right, bottom, left = recface[0]
                    
                    width = right - left
                    height = bottom - top
                    center_X = (left + right) / 2
                    center_Y = (top + bottom) / 2

                    # to choose a window of 224 * 224
                    top = int(center_Y - sample_height / 2)
                    left = int(center_X - sample_width / 2)
                    height = sample_height
                    width = sample_width
                    
                    face_image = frame[top:(top + height), left:(left + width)]

                    # write the flipped frame
                    cv2.imwrite("frame%d.jpg" % file_iter, face_image)
                    #cv2.imshow('frame',face_image)
                    file_iter += 1
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break
            
    if file_iter == n_random_frame:
        return True
    else:
        return False

In [5]:
#save_images_from_videos(video, 5)

In [6]:
video.release()

# Create the path Variables

In [7]:
training_video_path = "Data/Videos/Training/"
training_input_labels_path = "Data/training_gt.csv"
training_image_path = "Data/Images/Training/"
training_output_labels_path = "Data/training_final.csv"

test_video_path = "Data/Videos/Test/"
tset_input_labels_path = "Data/validation_gt.csv"
test_image_path = "Data/Images/Test/"
test_output_labels_path = "Data/test_final.csv"

validation_video_path = "Data/Videos/Validation/"
validation_input_labels_path = "Data/validation_gt.csv"
validation_image_path = "Data/Images/Validation/"
validation_output_labels_path = "Data/validation_final.csv"

In [8]:
if not os.path.exists("Data/Images"):
    os.mkdir("Data/Images")

# Training: Read the input label file

In [9]:
training_input_labels_data = pd.read_csv(training_input_labels_path)
training_input_labels_columns = list(training_input_labels_data.columns.values)
training_input_labels_columns.append("ImageName")

In [10]:
training_input_labels_data.head()

Unnamed: 0,VideoName,ValueExtraversion,ValueAgreeableness,ValueConscientiousness,ValueNeurotisicm,ValueOpenness
0,GQczMGrVgbc.001.mp4,0.570093,0.703297,0.640777,0.666667,0.544444
1,-utrsarZeIY.004.mp4,0.523364,0.516484,0.436893,0.333333,0.411111
2,3HA2W1s4oP8.001.mp4,0.401869,0.538462,0.427184,0.510417,0.388889
3,hM96SfN5_F4.004.mp4,0.485981,0.637363,0.359223,0.458333,0.566667
4,SgzOYog1pH4.003.mp4,0.53271,0.527473,0.650485,0.458333,0.477778


# Training: Create a List for the output label file

In [11]:
training_output_labels_data = []

In [12]:
training_output_labels_data

[]

# Training: Read the contents of dataset

In [13]:
training_video_filenames = os.listdir(training_video_path)

# Training: Create dataset of images from each video

In [14]:
def save_frames_from_videos(video, n_random_frames, target_dir, target_filename):
    
    np.random.seed(100)
    
    count = 0
    file_iter = 0
    n_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    random_frame = np.random.randint(1, n_frames, size = n_random_frames * 2)
    random_frame = np.sort(random_frame)
    random_frame = np.unique(random_frame)
                      
    while(video.isOpened()):
        ret, frame = video.read()
        count += 1
        if ret == True and file_iter < n_random_frames:
            if count in random_frame:
                
                recface = face_recognition.face_locations(frame)
                
                if len(recface) > 0:
                    top, right, bottom, left = recface[0]
                    
                    width = right - left
                    height = bottom - top
                    center_X = (left + right) / 2
                    center_Y = (top + bottom) / 2

                    # to choose a window of 224 * 224
                    #top = int(center_Y - sample_height / 2)
                    #left = int(center_X - sample_width / 2)
                    #height = sample_height
                    #width = sample_width
                    
                    if (top - 20) > 0:
                        if (top - 40) > 0:
                            top = top - 40
                            height = height + 80
                        else:
                            top = top - 20
                            height = height + 40
                        
                    if (left - 20) > 0:
                        if (left - 40) > 0:
                            left = left - 40
                            width = width + 80
                        else:
                            left = left - 20
                            width = width + 40
                    
                    if(top >=0 and left >= 0):
                        face_image = frame[top:(top + height), left:(left + width)]

                        # write the flipped frame
                        if not os.path.exists(target_dir):
                            os.mkdir(target_dir)
                        face_image = cv2.resize(face_image, (sample_width, sample_height))
                        
                        # flip the frame for randomness
                        #random_flip = np.random.randint(-1, 2)
                        #face_image = cv2.flip(face_image, random_flip)

                        cv2.imwrite(target_dir + target_filename + "_%d.jpg" % (file_iter+1), face_image)
                        #cv2.imshow('frame',face_image)
                        file_iter += 1
                    else:
                        print("Dimension less than zero for video: ", target_filename)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break
    if file_iter == n_random_frames:
        return True
    else:
        return False

In [15]:
def getOutputLabels(video_filenames, input_labels_data, video_path, image_path, n_frames, returnLabels):
    output_labels_data = []
    for curr_video_id in video_filenames:
        # for train and validation:
        if returnLabels:
            if curr_video_id in input_labels_data["VideoName"].values:
                curr_video_data = input_labels_data[input_labels_data["VideoName"] == curr_video_id]
                curr_videoName = curr_video_data["VideoName"].values[0]
                curr_E = curr_video_data["ValueExtraversion"].values[0]
                curr_A = curr_video_data["ValueAgreeableness"].values[0]
                curr_C = curr_video_data["ValueConscientiousness"].values[0]
                curr_N = curr_video_data["ValueNeurotisicm"].values[0]
                curr_O = curr_video_data["ValueOpenness"].values[0]
                curr_frameName = curr_videoName[:-4]        

                curr_video = cv2.VideoCapture(video_path + curr_videoName)

                if (save_frames_from_videos(curr_video, n_frames, image_path, curr_frameName) == True):
                    for i in range(0, n_frames):
                        curr_ImageName = curr_frameName + "_" + str(i+1)+".jpg"
                        output_labels_data.append([curr_videoName, curr_E, curr_A, curr_C, curr_N, curr_O, curr_ImageName])
                else:
                    print("Failed: ", curr_videoName)

            else:
                print("Video details not found in the input label file.")
                curr_video.release()
        # for test:
        else:
            curr_frameName = curr_video_id[:-4]
            curr_video = cv2.VideoCapture(video_path + curr_video_id)
            save_frames_from_videos(curr_video, n_frames, image_path, curr_frameName)
            
    return output_labels_data

In [16]:
n_frames_from_video = 5

training_output_labels_data = getOutputLabels(training_video_filenames, training_input_labels_data, 
                                                training_video_path, training_image_path,
                                                n_frames_from_video, True)

Failed:  36NZ0sHWQFg.001.mp4
Failed:  8eeZjC_bmtc.005.mp4
Failed:  8mZZlnbmOYE.000.mp4
Failed:  8mZZlnbmOYE.001.mp4
Failed:  8mZZlnbmOYE.003.mp4
Failed:  A0braVJH3Pw.000.mp4
Failed:  aaDlp62qn60.002.mp4
Failed:  Fe9_SVPd_5I.005.mp4
Failed:  IwfA-squ7Oo.002.mp4
Failed:  JIR4aPcyrn8.001.mp4
Failed:  lNaZ4aJaiBU.000.mp4
Failed:  NDBCrVvp0Vg.000.mp4
Failed:  syTTeox8Yaw.003.mp4
Failed:  TWKKCoT4FTc.001.mp4


# Training: Create a DataFrame of the output labels

In [17]:
training_output_labels_data = pd.DataFrame(training_output_labels_data, columns=training_input_labels_columns)

In [18]:
training_output_labels_data.head()

Unnamed: 0,VideoName,ValueExtraversion,ValueAgreeableness,ValueConscientiousness,ValueNeurotisicm,ValueOpenness,ImageName
0,-2qsCrkXdWs.001.mp4,0.476636,0.593407,0.572816,0.604167,0.611111,-2qsCrkXdWs.001_1.jpg
1,-2qsCrkXdWs.001.mp4,0.476636,0.593407,0.572816,0.604167,0.611111,-2qsCrkXdWs.001_2.jpg
2,-2qsCrkXdWs.001.mp4,0.476636,0.593407,0.572816,0.604167,0.611111,-2qsCrkXdWs.001_3.jpg
3,-2qsCrkXdWs.001.mp4,0.476636,0.593407,0.572816,0.604167,0.611111,-2qsCrkXdWs.001_4.jpg
4,-2qsCrkXdWs.001.mp4,0.476636,0.593407,0.572816,0.604167,0.611111,-2qsCrkXdWs.001_5.jpg


# Training: Export the output labels dataframe to a CSV

In [19]:
training_output_labels_data.to_csv(training_output_labels_path, index=False)

# Validation: Read the input label file

In [20]:
validation_input_labels_data = pd.read_csv(validation_input_labels_path)
validation_input_labels_columns = list(validation_input_labels_data.columns.values)
validation_input_labels_columns.append("ImageName")

In [21]:
validation_input_labels_data.head()

Unnamed: 0,VideoName,ValueExtraversion,ValueAgreeableness,ValueConscientiousness,ValueNeurotisicm,ValueOpenness
0,DrlC4bEYcmw.001.mp4,0.691589,0.615385,0.524272,0.635417,0.566667
1,o7rFDFvW300.000.mp4,0.401869,0.43956,0.320388,0.40625,0.555556
2,Wx_oe0SxD9w.004.mp4,0.485981,0.67033,0.601942,0.625,0.577778
3,d4cPiUXpGbc.004.mp4,0.523364,0.593407,0.543689,0.520833,0.566667
4,yp-tfq1NxBk.005.mp4,0.53271,0.593407,0.524272,0.604167,0.666667


# Validation: Create a List for the output label file

In [22]:
validation_output_labels_data = []

# Validation: Read the contents of dataset

In [23]:
validation_video_filenames = os.listdir(validation_video_path)

# Validation: Create dataset of images from each video

In [24]:
n_frames_from_video = 5

validation_output_labels_data = getOutputLabels(validation_video_filenames, validation_input_labels_data, 
                                                validation_video_path, validation_image_path,
                                                n_frames_from_video, True)

Failed:  0axZSeaUbfs.003.mp4
Failed:  2TXrDZgbDHE.002.mp4
Failed:  6M8OQNo64Tc.000.mp4
Failed:  CFK8ib0aWe8.004.mp4
Failed:  fKrX-KXgXYM.001.mp4
Failed:  LRczShwIVbM.002.mp4
Failed:  Me22JENkhJA.001.mp4
Failed:  p-OcwNFQB0U.003.mp4
Failed:  YdxS3f4HXaA.001.mp4
Failed:  YdxS3f4HXaA.002.mp4


# Validation: Create a DataFrame of the output labels

In [25]:
validation_output_labels_data = pd.DataFrame(validation_output_labels_data, columns=validation_input_labels_columns)

In [26]:
validation_output_labels_data.head()

Unnamed: 0,VideoName,ValueExtraversion,ValueAgreeableness,ValueConscientiousness,ValueNeurotisicm,ValueOpenness,ImageName
0,-6otZ7M-Mro.003.mp4,0.71028,0.681319,0.728155,0.552083,0.666667,-6otZ7M-Mro.003_1.jpg
1,-6otZ7M-Mro.003.mp4,0.71028,0.681319,0.728155,0.552083,0.666667,-6otZ7M-Mro.003_2.jpg
2,-6otZ7M-Mro.003.mp4,0.71028,0.681319,0.728155,0.552083,0.666667,-6otZ7M-Mro.003_3.jpg
3,-6otZ7M-Mro.003.mp4,0.71028,0.681319,0.728155,0.552083,0.666667,-6otZ7M-Mro.003_4.jpg
4,-6otZ7M-Mro.003.mp4,0.71028,0.681319,0.728155,0.552083,0.666667,-6otZ7M-Mro.003_5.jpg


# Validation: Export the output labels dataframe to a CSV

In [27]:
validation_output_labels_data.to_csv(validation_output_labels_path, index=False)

# Test: Create a List for the output label file

In [28]:
test_output_labels_data = []

# Test: Read the contents of dataset

In [29]:
test_video_filenames = os.listdir(test_video_path)

# Test: Create dataset of images from each video

In [30]:
n_frames_from_video = 5

test_output_labels_data = getOutputLabels(test_video_filenames, validation_input_labels_data, 
                                                test_video_path, test_image_path,
                                                n_frames_from_video, True)

# Test: Create a DataFrame of the output labels

In [31]:
test_output_labels_data = pd.DataFrame(test_output_labels_data, columns=validation_input_labels_columns)

# Test: Export the output labels dataframe to a CSV

In [32]:
test_output_labels_data.to_csv(test_output_labels_path, index=False)