In [1]:
#Script feito para importar os vídeos em uma pasta, fazer a extração dos 30 frames por segundo,
#e processá-los, anotando os landmarks e salvando em um arquivo csv a posição (x,y) da landmark,
#juntamente com um label representando o frame em questão
#Feito por Hugo Padovani

#Importação de módulos
import os
import glob
import cv2
from skimage import io
import sys
import cv2
import dlib
import numpy as np
import pandas as pd
import re

#predictor treinado pelo módulo dlib
PREDICTOR_PATH = '/home/hugo/Documents/TFC/Python/shape_predictor_68_face_landmarks.dat'
predictor = dlib.shape_predictor(PREDICTOR_PATH)
detector = dlib.get_frontal_face_detector()

#Classifier treinado pelo módulo OpenCV
cascade_path = '/home/hugo/Documents/TFC/Python/haarcascade_frontalface_default.xml'
cascade = cv2.CascadeClassifier(cascade_path)

#Caminho dos vídeos: ATENÇÃO, única coisa que deve ser alterada no script
video_path = '/home/hugo/Documents/TFC/Videos/teste/'

win = dlib.image_window()

#Função que pega as landmarks
#input: imagem
#output: matrix de landmarks na forma (x,y)
def get_landmarks(im):
    gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) #transforma em imagem cinza
    print(cascade.detectMultiScale(gray, 1.3, 5))
    faces = cascade.detectMultiScale(gray, 1.3, 5)
    if(len(np.array(faces)) == 0):
        return 1
    else:
        for (x,y,w,h) in faces:
            #print(x,y,w,h)
            rect = dlib.rectangle(int(x),int(y),int(x+w),int(y+h))
        #rect=dlib.rectangle(x,y,x+w,y+h)
        landmarks = np.matrix([[p.x, p.y] for p in predictor(im, rect).parts()]) #Calcula as landmarks
        return landmarks


#Função que faz a anotação das landmarks nas imagens
#inputs: imagem, matriz de landmarks
#output: imagem com as landmarks anotadas
def annotate_landmarks(im, landmarks):
    im = im.copy()
   # write_landmarks(landmarks)
    for idx, point in enumerate(landmarks):
        pos = (point[0, 0], point[0, 1])
        cv2.putText(im, str(idx), pos,
                    fontFace=cv2.FONT_HERSHEY_SCRIPT_SIMPLEX,
                    fontScale=0.4,
                    color=(0, 0, 255))
        cv2.circle(im, pos, 3, color=(0, 255, 255))
    return im

#Função que extrai os frames dos vídeos
#inputs: caminho do vídeo, nome do vídeo
#outputs: 30fps dos vídeos
def video_to_frame(video_path, filename):
    os.system("ffmpeg -i "+video_path+filename+" "+video_path+filename.split(".")[0]+"/FramesSemLandmarks/image%d.jpg")


#Função que faz o processamento das imagens, percorrendo todos os frames, anotando as landmarks (chamando as outras funções)
#inputs: diretório de frames
#output: imagem com landmarks, e arquivo csv das landmarks
def processing_images(face_folder_path):
    #for f in sorted(glob.glob(os.path.join(FACES_FOLDER_PATH, "*.jpg")),key=os.path.getmtime):
    count = 1
    for f in sorted(glob.glob(os.path.join(face_folder_path, "*.jpg")),key=os.path.getmtime):
        print("Processing file: {}".format(f))
        img = io.imread(f)
        win.clear_overlay()
        win.set_image(img)
        landmarks = get_landmarks(img)
        if(len(np.matrix(landmarks)) == 1):
            count += 1
            continue
        imagem = annotate_landmarks(img,landmarks)
        imagem = cv2.cvtColor(imagem, cv2.COLOR_BGR2RGB)
        with open(video_path+filename.split(".")[0]+'/landmarks.csv','ab') as arquivo:
            np.savetxt(arquivo, np.insert(landmarks.reshape(1,landmarks.size),0,count), fmt ='%i', delimiter=',')
       #cv2.imwrite("/home/hugo/Documents/TFC/Videos/Carolina Holly/Em6/Frames com landmark/frame%d.jpg" % count, imagem)     # save frame as JPEG file
        cv2.imwrite(video_path+filename.split(".")[0]+'/FramesComLandmarks/frame%d.jpg' % count, imagem)     # save frame as JPEG file
        #cv2.imwrite(glob.glob(os.path.join(FACES_FOLDER_LANDS, "*.jpg")),imagem)
        count += 1
    df = pd.DataFrame.from_csv(video_path+filename.split(".")[0]+'/landmarks.csv',header=None)
    df_mean = df.mean(1)
    df_var = df.var(1)
    df = pd.concat([df, df_mean, df_var], axis=1)
    #df['last'] = filename.split(".")[0][2]
    df['last'] = re.findall(r'\d+', filename.split(".")[0])[0]
    df.to_csv(video_path+filename.split(".")[0]+'/'+filename.split(".")[0]+'.csv',header=None)
    
        
        
#loop principal, ele percorre os vídeos no diretório de vídeos, cria as pastas para armazenar as landmarks,
#chama a função de extração de frames e chama a função de processamento das imagens.
for filename in glob.iglob('*.mp4'):
    os.makedirs(filename.split(".")[0])
    os.makedirs(video_path+filename.split(".")[0]+'/FramesSemLandmarks')
    os.makedirs(video_path+filename.split(".")[0]+'/FramesComLandmarks')
    video_to_frame(video_path, filename)
    processing_images(video_path+filename.split(".")[0]+'/FramesSemLandmarks')

Processing file: /home/hugo/Documents/TFC/Videos/teste/Em5_Fala1_DanielTonsig/FramesSemLandmarks/image1.jpg
[[773 274 491 491]]
Processing file: /home/hugo/Documents/TFC/Videos/teste/Em5_Fala1_DanielTonsig/FramesSemLandmarks/image2.jpg
[[781 283 478 478]]
Processing file: /home/hugo/Documents/TFC/Videos/teste/Em5_Fala1_DanielTonsig/FramesSemLandmarks/image3.jpg
[[773 280 482 482]]
Processing file: /home/hugo/Documents/TFC/Videos/teste/Em5_Fala1_DanielTonsig/FramesSemLandmarks/image4.jpg
[[777 278 483 483]]
Processing file: /home/hugo/Documents/TFC/Videos/teste/Em5_Fala1_DanielTonsig/FramesSemLandmarks/image5.jpg
[[774 278 485 485]]
Processing file: /home/hugo/Documents/TFC/Videos/teste/Em5_Fala1_DanielTonsig/FramesSemLandmarks/image6.jpg
[[765 275 502 502]]
Processing file: /home/hugo/Documents/TFC/Videos/teste/Em5_Fala1_DanielTonsig/FramesSemLandmarks/image7.jpg
[[769 275 492 492]]
Processing file: /home/hugo/Documents/TFC/Videos/teste/Em5_Fala1_DanielTonsig/FramesSemLandmarks/image8.

In [142]:
df = pd.DataFrame.from_csv('/home/hugo/Documents/TFC/teste/Em6_Fala2_Neutra_CarolinaHolly/landmarks.csv',header=None)
df_mean = df.mean(1)
df_var = df.var(1)
df = pd.concat([df, df_mean, df_var], axis=1)
pd.options.display.float_format = '{:,.2f}'.format
df.head()
#df['last'] = filename.split(".")[0][2]
#df.to_csv(video_path+filename.split(".")[0]+'/FinalData.csv',header=None)

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,129,130,131,132,133,134,135,136,0,1
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,827,583,832,633,841,681,849,729,863,774,...,1061,772,1024,769,1007,770,988,768,849.88,35858.58
2,828,583,833,633,841,680,849,728,862,773,...,1060,772,1024,769,1007,770,989,768,849.88,35858.02
3,828,583,833,633,841,681,848,728,861,773,...,1060,772,1024,769,1006,771,988,768,849.71,35699.7
4,828,586,833,636,842,684,849,731,863,776,...,1061,772,1023,770,1007,771,989,768,850.34,35794.55
5,828,587,833,636,841,683,848,729,861,773,...,1060,772,1023,770,1007,771,989,768,850.18,35733.85


In [103]:
X = np.matrix(file)
X = X[:,0:-1]
X_mean = X.mean(1)
X_var = X.var(1)
X_mean
#A = np.concatenate((X,X_mean,X_var),axis=1)
#df1 = pd.DataFrame(A)
#df1.head()
#pd.DataFrame.to_csv(df1,'teste3.csv',header=None)

matrix([[ 849.875     ],
        [ 849.71323529],
        [ 850.33823529],
        [ 850.18382353],
        [ 850.19117647],
        [ 850.66176471],
        [ 850.80882353],
        [ 850.26470588],
        [ 850.125     ],
        [ 850.07352941],
        [ 850.44852941],
        [ 850.11029412],
        [ 850.36029412],
        [ 850.17647059],
        [ 849.21323529],
        [ 847.90441176],
        [ 846.97794118],
        [ 845.63970588],
        [ 844.95588235],
        [ 844.61764706],
        [ 844.48529412],
        [ 843.82352941],
        [ 843.75      ],
        [ 843.18382353],
        [ 842.86764706],
        [ 842.61764706],
        [ 843.02941176],
        [ 843.10294118],
        [ 843.02941176],
        [ 842.94117647],
        [ 842.63970588],
        [ 843.32352941],
        [ 843.59558824],
        [ 844.76470588],
        [ 844.83088235],
        [ 845.97058824],
        [ 845.875     ],
        [ 846.32352941],
        [ 846.07352941],
        [ 845.79411765],


In [6]:
teste = 'Em22_Fala1'
import re
re.findall(r'\d+', teste)[0]

'22'