In [1]:
# Basic constants

OUTPUT_DIR = '/home/ubuntu/visual_search_assistant/M3/results'
FACE_LIB_DIR = '/home/ubuntu/visual_search_assistant/M3/library/'
DATA_DIR = '/home/ubuntu/visual_search_assistant/data'
SAMPLE_FRAME_FREQ = 2
LOG_FREQ = 50

batch_size = 128

input_video = None
output_video = None


## Download Test Videos

Functions to download youtube video from link and clip them 

In [79]:
from pytube import YouTube 
from moviepy.editor import *

def download_video(link,output_path,output_title=None):
    yt = YouTube(link)
    video_stream = yt.streams.first()
    video_stream.download(output_path=output_path,filename=output_title)
    print('Download complete for %s' % video_stream.title)
    return video_stream.title


def clip_video(video_pth,output_pth,time):
    """
    video_pth: /path/to/input/video
    output_pth: /path/to/output/video
    time: list of start and end time in seconds to clip video
    """

    start,end=time
    clip = VideoFileClip(video_pth).subclip(start,end)
    if output_pth is None:
        name = video_pth.split('/')[-1].split('.')[0]
        new_name = name + '_%d_%d'%(start,end)
        output_pth = video_pth.replace(name,new_name)
        
    #import pdb; pdb.set_trace()
    clip.write_videofile(output_pth)
        

Specify the link and download

In [72]:
download_link = 'https://www.youtube.com/watch?v=oHddkxW5IqU'
download_link = 'https://www.youtube.com/watch?v=X4uo7uKR61M'
fname = 'radio_star_.mp4'
fname = 'parks_and_rec_.mp4'

output_pth = os.path.join(DATA_DIR,fname)
if not os.path.exists(output_pth):
    download_video(download_link,DATA_DIR,fname)

Download complete for Donna Wakes Up Everyday And Chooses Being Cool | Parks and Recreation


In [75]:
from IPython.core.display import Video
Video('/home/ubuntu/visual_search_assistant/data/Donna Wakes Up Everyday And Chooses Being Cool  Parks and Recreation.mp4')

## Library of face embeddings

In [2]:
import os
import face_recognition

faces = os.listdir(FACE_LIB_DIR)
all_embeddings = []
all_names = []
for file in faces:
    face = face_recognition.load_image_file(os.path.join(FACE_LIB_DIR,file))
    all_embeddings.append(face_recognition.face_encodings(face)[0])
    name = file.split('/')[-1].split('.')[0]
    all_names.append(name)
    

In [12]:
import os
import cv2
import face_recognition
import numpy as np


def process_video(input_pth,output_pth=None,use_gpu=True,recognition=False,detection_threshold=0.7):
    if output_pth is None:
        output_pth = os.path.join(OUTPUT_DIR,input_pth.split('/')[-1])
    if not use_gpu:
        batch_size = 1
    else:
        batch_size = 16
    video_capture = cv2.VideoCapture(input_pth)

    frame_width = int(video_capture.get(3))
    frame_height = int(video_capture.get(4))
    
    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    out = cv2.VideoWriter(output_pth, fourcc, 30.0, (frame_width, frame_height))
    
    frame_count = 0
    
    frames = []
    print('='*20,'Start Face Detection and Recognition','='*20)
    while video_capture.isOpened():
        ret, frame = video_capture.read()
        
        if not ret:
            break
        if frame_count % LOG_FREQ == LOG_FREQ -1:
            print('Processed %d frames'% frame_count)
            
        frame_count += 1
        
        # skip frames
        if frame_count % SAMPLE_FRAME_FREQ > 0:
            continue
            
        frames.append(frame)
        if len(frames) == batch_size:
#             import pdb;pdb.set_trace()
            batch_face_locations = face_recognition.batch_face_locations(frames)
            for idx,face_locations in enumerate(batch_face_locations):
                number_of_faces_in_frame = len(face_locations)
                
                fno = frame_count - batch_size + idx
                frame = frames[idx]
                
                
                embeddings = face_recognition.face_encodings(frame,face_locations)
                
                for embd,(top,right,bottom,left) in zip(embeddings,face_locations):
                    # Draw a box around the face
                    cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
                    # Draw a label with a name below the face
                    cv2.rectangle(frame, (left, bottom + 35), (right, bottom), (0, 0, 255), cv2.FILLED)
                    
                    face_dist = face_recognition.face_distance(all_embeddings,embd)
#                     import pdb;pdb.set_trace()
                    idx = np.argmin(face_dist)
                    name = all_names[idx] if face_dist[idx] > detection_threshold else 'Unknown'
                    
                    font = cv2.FONT_HERSHEY_DUPLEX
                    cv2.putText(frame, name, (left + 6, bottom + 6), font, 0.5, (255, 255, 255), 1)
        
                out.write(frame)
            frames = []
                
                
    video_capture.release()
    out.release()
    print('='*15,'Done Processing %s to %s' % (input_pth,output_pth),'='*15)
        

In [13]:
test_input_video = '/home/ubuntu/visual_search_assistant/data/radio_star_10_20.mp4'
process_video(test_input_video)

Processed 49 frames
Processed 99 frames
Processed 149 frames
Processed 199 frames
Processed 249 frames
Processed 299 frames


In [22]:
res[0].shape


(128,)