<a href="https://colab.research.google.com/github/joplaete/faceMLtools/blob/main/Face_detect_recog_extract.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install opencv-python
!pip install pillow

!pip install face-recognition

!wget https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_alt2.xml

In [None]:
# make sure you got a gpu runtime!
!nvidia-smi

In [None]:
#@title Connect your drive that hosts your files
from google.colab import drive
drive.mount('/content/drive')

In [4]:
#@title Imports and Supporting Function, run once
import face_recognition
import cv2
from PIL import Image
import os, time
import numpy as np

def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]
    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))
    resized = cv2.resize(image, dim, interpolation = inter)
   return resized

In [None]:
#@title Compute encodings for Face Recognition

# images of person to find (helps to align them first)
person_to_find_images = r"/content/drive/MyDrive/FACE_RECOG_EXTRACT/PERSON_TO_FIND/daniel_craig/aligned" # @param str
# encodings get saved and re-used if found in folder, unless force_recompute is on
force_recompute = True #@param {type:"boolean"}

encodings_path = os.path.join(person_to_find_images, 'encodings.npy')
encodings = []
if not force_recompute and os.path.exists(encodings_path):
    print('Found and using existing encoding: ', encodings_path)
    print('(delete this or enabled force_recompute to recompute when image set has changed)')
    encodings = np.load(encodings_path)
else:
    print('Build face encodings')
    for im in os.listdir(person_to_find_images):
        if not 'jpg' in im: continue
        known_image = face_recognition.load_image_file(os.path.join(person_to_find_images, im))
        encoding = face_recognition.face_encodings(known_image)
        if len(encoding):
            print('-->> Adding: ', im)
            encodings.append(encoding[0])
np.save(encodings_path, encodings)

In [None]:
#@title Find and Extract Recognized faces

mp4path = r"/content/drive/MyDrive/FACE_RECOG_EXTRACT/MEDIA_TO_SEARCH/Daniel Craig James Bond Monologue - SNL.mp4" # @param str

# OUT    
out = r"/content/drive/MyDrive/FACE_RECOG_EXTRACT/OUT/daniel_craig__late_night_show" # @param str
out_prexix = r'out.' # @param str
if not os.path.exists(out):
    os.mkdir(out)

# use 1 not 0 for start 
start_offset =  500# @param int
limit_frames =-1 # @param int # handy for testing, will only compute limit_frames
vidcap =  cv2.VideoCapture(mp4path)
vidcap.set(cv2.CAP_PROP_POS_FRAMES, start_offset-1)
print('cv2.CAP_PROP_POS_FRAMES:',cv2.CAP_PROP_POS_FRAMES)

print('\n START PROCESSING', vidcap)
print(mp4path)
print(out)
print('\n\n')

success, image = vidcap.read()
frame_count = vidcap.get(cv2.CAP_PROP_FRAME_COUNT)
face_cascade = cv2.CascadeClassifier(r"/content/haarcascade_frontalface_alt2.xml")

count = start_offset-1
while success:
    # cv2.imwrite("frame%d.jpg" % count, image)     # save frame as JPEG file      
    # if count < clamp_start or count > clamp_end:
    #     count+=1
    #     print('skip', count)
    #     continue
    print('\nProcess frame: ', count, '/', int(frame_count), '  --  ', round(count/frame_count*100), '%')
    # print(mp4path, out)
    
    success, image = vidcap.read()
    if not success:
        print('-- FRAME READ FAILED, aborting.')
        break

    imshowname = os.path.split(mp4path)[1] + ' (RESIZED for PREVIEW)'
    # cv2.imshow(imshowname, image_resize(image, height = 800))
    # cv2.waitKey(1)

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # # Detect faces using opencv
    faces = face_cascade.detectMultiScale(gray, 1.1, 5)
    print('>> Found', len(faces),'face(s)')
    if len(faces) > 6: continue
    padding = 150 # @param float # padding for extraction
    min_face_size =  60# @param float # skip w or h smaller than this
    for i, (x, y, w, h) in enumerate(faces):
        print('------ Face', i)
        if h < min_face_size: print(f'too small ({min_face_size})'); continue
        if w < min_face_size: print(f'too small ({min_face_size})'); continue
        preview = image
        # preview = cv2.rectangle(image, (x-padding,y-padding), (x+w+padding,y+h+padding), (200,200,200), 15)
        # cv2.imshow(imshowname, image_resize(preview, height = 800))
        # cv2.waitKey(1)
        # cv2.rectangle(image, (x-padding, y-padding), (x+w+padding, y+h+padding),
                    # (0, 0, 255), 2)
        # print('Face: ',x,y,w,h)
        face = image[y-padding:y + h+padding, x-padding:x + w+padding]
        # # face_grey = gray[y-padding:y + h+padding, x-padding:x + w+padding]

        # protect against dodgy images
        if face.shape[0]==0 or face.shape[1]==0: continue
        # cv2.imshow("face", face)
        # cv2.waitKey(0) 
        # cv2.destroyAllWindows() 

        # print('**** comparing --', x,y,w,h)
        unknown_image = face#face_recognition.load_image_file(r"C:\Users\jopla\Projects\face_extractor\datasets\daniel_craig\daniel_craig_0012.jpg")
        unknown_image = cv2.cvtColor(unknown_image, cv2.COLOR_BGR2RGB)
        unknown_image = np.array(Image.fromarray(unknown_image))
        unknown_encodings = face_recognition.face_encodings(unknown_image)
        if len(unknown_encodings):
            results = face_recognition.compare_faces(encodings, unknown_encodings[0])
            print(results)
            match_count = results.count(True)
            print('Found', match_count, 'matche(s) !')
            required_matches =  2# @param int
            if match_count >= required_matches:
                cv2.imwrite(os.path.join(out,out_prexix+str(count).zfill(6)+'.jpg'), face)
                # preview = cv2.circle(face, (50, 50), 25, (0, 255, 0), 25)
                # preview = cv2.rectangle(image, (x-padding,y-padding), (x+w+padding,y+h+padding), (0,255,0), 20)
                # cv2.imshow(imshowname, image_resize(preview, height = 800))
                # cv2.waitKey(1)
            else:
                # preview = cv2.rectangle(image, (x-padding,y-padding), (x+w+padding,y+h+padding), (0,0,255), 20)
                # cv2.imshow(imshowname, image_resize(preview, height = 800))
                # cv2.waitKey(1)
                pass
        else:
            print('No encodings found')
            pass

    count += 1
    if limit_frames != -1:
      if count > (start_offset+limit_frames): break
    # if count>50: break

# clean up window when done
# cv2.destroyAllWindows()

In [None]:
#@title ZIP Result
zip_dir, out_name = os.path.split(out)
zip_path = os.path.join(zip_dir, out_name.replace(' ','')+'.zip')
print('Inflating ', out_name, ' ...', )
cmd = f"zip -r {zip_path} {out}"
os.system(cmd)
print('Find in Drive =>', zip_path)


In [None]:
#@title The ZIP already lives in your Drive, will be faster to download it there. Alterntively you can download it here as well.
from google.colab import files
files.download(zip_path)