In [2]:
import math
from sklearn import neighbors
import os
import os.path
import pickle
from PIL import Image, ImageDraw
import face_recognition
from face_recognition.face_recognition_cli import image_files_in_folder


In [3]:
def train(train_dir, model_save_path=None, n_neighbors=None, knn_algo='ball_tree', verbose=False):
    """
    Trains a k-nearest neighbors classifier for face recognition.

    :param train_dir: directory that contains a sub-directory for each known person, with its name.

     (View in source code to see train_dir example tree structure)

     Structure:
        <train_dir>/
        ├── <person1>/
        │   ├── <somename1>.jpeg
        │   ├── <somename2>.jpeg
        │   ├── ...
        ├── <person2>/
        │   ├── <somename1>.jpeg
        │   └── <somename2>.jpeg
        └── ...

    :param model_save_path: (optional) path to save model on disk
    :param n_neighbors: (optional) number of neighbors to weigh in classification. Chosen automatically if not specified
    :param knn_algo: (optional) underlying data structure to support knn.default is ball_tree
    :param verbose: verbosity of training
    :return: returns knn classifier that was trained on the given data.
    """
    X = []
    y = []

    # Loop through each person in the training set
    for class_dir in os.listdir(train_dir):
        if not os.path.isdir(os.path.join(train_dir, class_dir)):
            continue

        # Loop through each training image for the current person
        for img_path in image_files_in_folder(os.path.join(train_dir, class_dir)):
            image = face_recognition.load_image_file(img_path)
            face_bounding_boxes = face_recognition.face_locations(image)

            if len(face_bounding_boxes) != 1:
                # If there are no people (or too many people) in a training image, skip the image.
                if verbose:
                    print("Image {} not suitable for training: {}".format(img_path, "Didn't find a face" if len(face_bounding_boxes) < 1 else "Found more than one face"))
            else:
                # Add face encoding for current image to the training set
                X.append(face_recognition.face_encodings(image, known_face_locations=face_bounding_boxes)[0])
                y.append(class_dir)

    # Determine how many neighbors to use for weighting in the KNN classifier
    if n_neighbors is None:
        n_neighbors = int(round(math.sqrt(len(X))))
        if verbose:
            print("Chose n_neighbors automatically:", n_neighbors)

    # Create and train the KNN classifier
    knn_clf = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors, algorithm=knn_algo, weights='distance')
    knn_clf.fit(X, y)

    # Save the trained KNN classifier
    if model_save_path is not None:
        with open(model_save_path, 'wb') as f:
            pickle.dump(knn_clf, f)

    return knn_clf

In [None]:
import face_recognition
import cv2
import numpy as np

# STEP 1: Train the KNN classifier and save it to disk
# Once the model is trained and saved, you can skip this step next time.
print("Training KNN classifier...")
knn_clf = train("alx_examples/train", model_save_path="alx_examples/trained_knn_model.clf", n_neighbors=2)
print("Training complete!")

# STEP 2: Using the trained classifier, make predictions for unknown images


# Open the input movie file
input_movie = cv2.VideoCapture("alx_examples/test/DSC_0273.mp4")

## Lấy ra số lượng khung hình chụp được từ video
length = int(input_movie.get(cv2.CAP_PROP_FRAME_COUNT))

## Lấy ra kích thước khung hình
height = int(input_movie.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(input_movie.get(cv2.CAP_PROP_FRAME_WIDTH))

## Lấy ra chiều dài của video theo giây
input_movie.set(cv2.CAP_PROP_POS_AVI_RATIO,1)
seconds = input_movie.get(cv2.CAP_PROP_POS_MSEC) / 1000
input_movie.set(cv2.CAP_PROP_POS_AVI_RATIO,0)

## Tính Số lượng khung hình ghi được trong 1 giây
fps = round(length/seconds, 2)
print(fps, width, height )

# Create an output movie file (make sure resolution/frame rate matches input video!)
## FourCC is a 4-byte code used to specify the video codec
fourcc = cv2.VideoWriter_fourcc(*'XVID')
## cv2.VideoWriter(output_file_name, FourCC code, number_of_frames_per_second(fps), frame size)
output_movie = cv2.VideoWriter('alx_examples/result/DSC_0273_output.avi', fourcc, float(fps), (int(width), int(height)))


# Initialize some variables
face_locations = []
face_encodings = []
face_names = []
frame_number = 0
distance_threshold = 0.6

## List để lưu lại tất cả giá trị face_locations của tất cả các khung hình
pre_locations = []

while True:
    # Grab a single frame of video
    ## Hàm .read() trả về 1 giá trị bool 'ret' = True/False cho biết khung hình có được đọc hay không và 'frame' là array các pixel của khung hình đó
    ret, frame = input_movie.read()
    frame_number += 1

    # Quit when the input video file ends
    if not ret:
        break

    # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
    rgb_frame = frame[:, :, ::-1]

    # Find all the faces and face encodings in the current frame of video
    ## face_locations: tìm kiếm các khuôn mặt có trong ảnh và trả về vị trí các bounding boxes
    face_locations = face_recognition.face_locations(rgb_frame)
    ## Lưu lại tất cả các face_locations vào list pre_locations
    pre_locations.append(face_locations)
    
    ## Trường hợp không tìm được khuôn măt ở khung hình
    if face_locations == []: 
        ## Tìm trong list khuôn mặt tìm được trong 10 khung hình gần nhất trước đó để gán cho khung hình này
        for i, location in enumerate(reversed(pre_locations)):
            if location != [] and i < 10:
                face_locations = location
                break
         
    #print(face_locations)
    ## face_encodings: trả về mỗi array 128-dimension encoding cho mỗi khuôn mặt, trong trường hợp đã biết face_locations
    face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)
    
    if(face_encodings == []):
        face_encodings.append(np.zeros((128,)))
    
    # Use the KNN model to find the best matches for the test face
    closest_distances = knn_clf.kneighbors(face_encodings, n_neighbors=1)
    are_matches = [closest_distances[0][i][0] <= distance_threshold for i in range(len(face_locations))]


    # Predict classes and remove classifications that aren't within the threshold
    predictions = [(pred, loc) if rec else ('Unknown', loc) for pred, loc, rec in zip(knn_clf.predict(face_encodings), face_locations, are_matches)]
    
    # Label the results
    ## (top, right, bottom, left): tọa độ 4 điểm của bounding boxes
    for name, (top, right, bottom, left) in predictions:
        if not name:
            continue
        
        ## Tăng kích thước của bounding boxes
        #top, right, bottom, left = top - 10, right + 10, bottom + 10, left - 10
        # Draw a box around the face
        ## cv2.rectangle(image, tọa_độ_đỉnh, tọa_độ_đỉnh_đối_diện, color, thickness)
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)

        # Draw a label with a name below the face
        cv2.rectangle(frame, (left, bottom + 25), (right, bottom), (0, 0, 255), cv2.FILLED)
        font = cv2.FONT_HERSHEY_DUPLEX
        ## Hiển thị chữ trong hình ảnh
        ## cv2.putText(image, text, bottom-left_corner_of_text, font_style, font_scale, color, thickness)
        cv2.putText(frame, name, (left + 6, bottom + 19), font, 0.5, (255, 255, 255), 1)
    
    # Write the resulting image to the output video file
    print("Writing frame {} / {}".format(frame_number, length))
    output_movie.write(frame)

# All done!
## close video file
input_movie.release()
## Đóng tất cả các cửa sổ đã tạo
cv2.destroyAllWindows()


Training KNN classifier...
Training complete!
59.94 1920 1080
Writing frame 1 / 581
Writing frame 2 / 581
Writing frame 3 / 581
Writing frame 4 / 581
Writing frame 5 / 581
Writing frame 6 / 581
Writing frame 7 / 581
Writing frame 8 / 581
Writing frame 9 / 581
Writing frame 10 / 581
Writing frame 11 / 581
Writing frame 12 / 581
Writing frame 13 / 581
Writing frame 14 / 581
Writing frame 15 / 581
Writing frame 16 / 581
Writing frame 17 / 581
Writing frame 18 / 581
Writing frame 19 / 581
Writing frame 20 / 581
Writing frame 21 / 581
Writing frame 22 / 581
Writing frame 23 / 581
Writing frame 24 / 581
Writing frame 25 / 581
Writing frame 26 / 581
Writing frame 27 / 581
Writing frame 28 / 581
Writing frame 29 / 581
Writing frame 30 / 581
Writing frame 31 / 581
Writing frame 32 / 581
Writing frame 33 / 581
Writing frame 34 / 581
Writing frame 35 / 581
Writing frame 36 / 581
Writing frame 37 / 581
Writing frame 38 / 581
Writing frame 39 / 581
Writing frame 40 / 581
Writing frame 41 / 581
Writ