# Project 6510

In [1]:
# !pip install cmake
# !pip install face_recognition
# !pip install numpy
# !pip install dlib
# !pip install torch
# !pip install torchvision
# !pip install 'git+https://github.com/facebookresearch/detectron2.git'

Code forked and tweaked from:
- https://github.com/ageitgey/face_recognition/blob/master/examples/facerec_from_webcam_faster.py

to extend, just add more people into the known_people folder

Detectron2 detection:
- https://gilberttanner.com/blog/detectron-2-object-detection-with-pytorch
- https://github.com/facebookresearch/detectron2

OpenCV human detection:
- https://thedatafrog.com/en/articles/human-detection-video/

Yolo object detection:
- https://pjreddie.com/darknet/yolo/
- https://medium.com/@luanaebio/detecting-people-with-yolo-and-opencv-5c1f9bc6a810

OpenCV eye detection:
- https://github.com/stepacool/Eye-Tracker/blob/No_GUI/track.py
- https://medium.com/@stepanfilonov/tracking-your-eyes-with-python-3952e66194a6

## Imports

### Basic imports

In [2]:
import cv2
import os
import torch
import numpy as np
import face_recognition
from glob import glob

### Detectron Imports

In [3]:
## Detectron2
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
## Unsafe workaround
# os.environ['KMP_DUPLICATE_LIB_OK']='True'

## Human Detection with Detectron 2

In [4]:
def initialize_detectron2():    
    # Create config
    cfg = get_cfg()
    cfg.merge_from_file("./detectron2-master/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")
    cfg.MODEL.DEVICE = 'cpu'
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
    cfg.MODEL.WEIGHTS = "detectron2://COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl"

    # Create predictor
    predictor = DefaultPredictor(cfg)
    return predictor, cfg

def make_detectron2_prediction(frame, predictor):
    frame = cv2.resize(frame, (480, 640))
    return predictor(frame)

def draw_detectron2_result(prediction, frame, cfg):
    person_instances = prediction['instances'][np.where(prediction['instances'].pred_classes == 0)]
    v = Visualizer(frame[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
    return v.draw_instance_predictions(person_instances.to("cpu"))

def detectron_unleashed(frame, detectron_predictor, detectron_cfg, itr):
    if itr%2 == 0:
        detectron_prediction = make_detectron2_prediction(frame, detectron_predictor)
        detectron_viz = draw_detectron2_result(detectron_prediction, frame, detectron_cfg).get_image()[:, :, ::-1]
    else:
        detectron_viz = frame
    display_results_detectron(face_locations, face_names, frame, detectron_viz)
    itr += 1
    return itr

def display_results_detectron(face_locations, face_names, frame, detectron_viz):
    """
    Displaying results
    return: None
    """
    for (top, right, bottom, left), name in zip(face_locations, face_names):
        # Scale back up face locations since the frame we detected in was scaled to 1/4 size
        top *= 4
        right *= 4
        bottom *= 4
        left *= 4

        cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
        cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
        font = cv2.FONT_HERSHEY_DUPLEX
        cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)
    detectron_img = cv2.resize(detectron_viz, (frame.shape[1], frame.shape[0]))
    frame_final = cv2.bitwise_or(detectron_img, frame)
    cv2.imshow('Video', frame_final)

## Human detection with OpenCV

In [5]:
def initialize_human_classifier():
    hog = cv2.HOGDescriptor()
    hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
    return hog

def make_human_prediction(frame, hog):
    boxes, weights = hog.detectMultiScale(frame, winStride=(12,12), scale = 1.03)
    return np.array([[x, y, x + w, y + h] for (x, y, w, h) in boxes])

def draw_detection_results(frame, boxes):
    for (xA, yA, xB, yB) in boxes:
        cv2.rectangle(frame, (xA, yA), (xB, yB),
                          (0, 255, 0), 2)
        cv2.putText(frame, f'Humaniod', (xA,yA), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1)
    return frame

## Human Detection with YOLO

In [6]:
from time import time

def get_yolo_classes():
    classes = None
    with open('coco.names', 'r') as f:
        classes = [line.strip() for line in f.readlines()]
    return classes

def init_yolo():
    net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
    return net


def make_yolo_prediction(frame, net):
    net.setInput(cv2.dnn.blobFromImage(frame, 0.00392, (416,416), (0,0,0), True, crop=False))
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    outs = net.forward(output_layers)
    return outs


def extract_box(detection, Width, Height):
    center_x = int(detection[0] * Width)
    center_y = int(detection[1] * Height)
    w = int(detection[2] * Width)
    h = int(detection[3] * Height)
    x = center_x - w / 2
    y = center_y - h / 2
    return x, y, w, h


def list_to_dict(lst):
    res_dct = {str(i): lst[0][i] for i in range(0, len(lst[0]))}
    return res_dct


def get_bounding_box(classes, outs, frame):
    class_ids = []
    confidences = []
    boxes = []
    Width = frame.shape[1]
    Height = frame.shape[0]
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.1:
                x, y, w, h = extract_box(detection, Width, Height)
                class_ids.append(class_id)
                confidences.append(float(confidence))
                boxes.append([x, y, w, h])
    return boxes, confidences, class_ids, class_id


def draw_yolo_result(frame, boxes, confidences, class_ids, class_id):
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.1, 0.1)
    #check if is people detection
    for i in indices:
        i = i[0]
        box = boxes[i]
        if class_ids[i]==0:
            label = str(classes[class_id]) 
            cv2.rectangle(frame, (round(box[0]),round(box[1])), 
                          (round(box[0]+box[2]),round(box[1]+box[3])), (0, 255, 0), 2)
            cv2.putText(frame, label, (round(box[0])-10,round(box[1])-10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    return frame

def yolo_main(frame, process_this_frame, yolo_net):
    if not process_this_frame:
        yolo_pred = make_yolo_prediction(frame, yolo_net)
        boxes, confidences, class_ids, class_id = get_bounding_box(classes, yolo_pred, frame)
        frame = draw_yolo_result(frame, boxes, confidences, class_ids, class_id)
    return frame

## Gaze Detection with OpenCV

In [7]:
import matplotlib.pyplot as plt

def initialize_eye_detector():
    detector_params = cv2.SimpleBlobDetector_Params()
    detector_params.filterByArea = True
    detector_params.maxArea = 1500
    detector = cv2.SimpleBlobDetector_create(detector_params)
    return cv2.CascadeClassifier('haarcascade_eye.xml'), detector

def crop_face(img, x, y, w, h):
    return img[y:y + h, x:x + w]


def detect_eyes(img, cascade):
    gray_frame = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    eyes = cascade.detectMultiScale(gray_frame, 1.03, 5)  # detect eyes
    width = np.size(img, 1)  # get face frame width
    height = np.size(img, 0)  # get face frame height
    left_eye = None
    right_eye = None
    for (x, y, w, h) in eyes:
        if y > height / 2:
            pass
        eyecenter = x + w / 2  # get the eye center
        if eyecenter < width * 0.5:
            left_eye = img[y:y + h, x:x + w]
        else:
            right_eye = img[y:y + h, x:x + w]
        
        cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
    
    return left_eye, right_eye


def cut_eyebrows(img):
    height, width = img.shape[:2]
    eyebrow_h = int(height / 4)
    img = img[eyebrow_h:height, 0:width]  # cut eyebrows out (15 px)
    return img


def blob_process(img, threshold, detector):
    gray_frame = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, img = cv2.threshold(gray_frame, threshold, 255, cv2.THRESH_BINARY)
    img = cv2.erode(img, None, iterations=2)
    img = cv2.dilate(img, None, iterations=4)
    img = cv2.medianBlur(img, 5)
    keypoints = detector.detect(img)
    return keypoints


def nothing(x):
    pass


def eye_detection_main(frame, eye_cascade, face_locations, process_this_frame, blob_detect):
    if not process_this_frame:
        for (y, w, h, x) in face_locations:
            cropped_face = crop_face(frame, x*4, y*4, w*2, h) ## as done before in face recog display results
            if cropped_face is not None:
                eyes = detect_eyes(cropped_face, eye_cascade)
                threshold = r = cv2.getTrackbarPos('threshold', 'video')
                for eye in eyes:
                    if eye is not None:
                        eye = cut_eyebrows(eye)
                        keypoints = blob_process(eye, threshold, blob_detect)
                        eye = cv2.drawKeypoints(eye, keypoints, eye, (0, 0, 255),
                                                      cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
                        
    return frame

## Helper functions

In [8]:
def read_images(path):
    """
    Make an array of all the images
    return: list, int, 
    """
    list_of_files = [f for f in glob(path+'*.jpg')]
    number_files = len(list_of_files)
    names = list_of_files.copy()
    return list_of_files, number_files, names

def get_known_faces(number_files, list_of_files, names):
    """
    get known names and face encodings
    return: two lists
    """
    known_face_encodings = []
    known_face_names = []
    for i in range(number_files):
        globals()['image_{}'.format(i)] = face_recognition.load_image_file(list_of_files[i])
        globals()['image_encoding_{}'.format(i)] = face_recognition.face_encodings(globals()['image_{}'.format(i)])[0]
        known_face_encodings.append(globals()['image_encoding_{}'.format(i)])
        names[i] = names[i].replace("known_people/", "")  
        known_face_names.append(names[i])
    return known_face_encodings, known_face_names

def get_face_names(face_encodings, known_face_encodings, known_face_names):
    face_names = []
    for face_encoding in face_encodings:
        # See if the face is a match for the known face(s)
        matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
        # If a match was found in known_face_encodings, just use the first one.
        # name = first_match(matches, known_face_names)
        name = known_face_match(known_face_encodings, known_face_names, face_encoding, matches)
        face_names.append(name)
    return face_names

def process_frame(process_this_frame, rgb_small_frame, face_names, face_locations):
    if process_this_frame:
        # Find all the faces and face encodings in the current frame of video
        face_locations = face_recognition.face_locations(rgb_small_frame)
        face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
        face_names = get_face_names(face_encodings, known_face_encodings, known_face_names)
    process_this_frame = not process_this_frame
    return face_names, face_locations, process_this_frame

def display_results(face_locations, face_names, frame):
    """
    Displaying results
    return: None
    """
    for (top, right, bottom, left), name in zip(face_locations, face_names):
        # Scale back up face locations since the frame we detected in was scaled to 1/4 size
        top *= 4
        right *= 4
        bottom *= 4
        left *= 4

        cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
        cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
        font = cv2.FONT_HERSHEY_DUPLEX
        cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)
    return frame
    
def first_match(matches, known_face_names):
    # If a match was found in known_face_encodings
    if True in matches:
        first_match_index = matches.index(True)
        name = known_face_names[first_match_index]
        return name
    return 'Unknown'
    
def known_face_match(known_face_encodings, known_face_names, face_encoding, matches):
    # use the known face with the smallest distance to the new face
    face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
    best_match_index = np.argmin(face_distances)
    if matches[best_match_index]:
        name = known_face_names[best_match_index]
        return name
    return 'Unknown'

def notify(title, text):
    os.system("""
              osascript -e 'display notification "{}" with title "{}"'
              """.format(text, title))
    
def notify_reset_timer(notification_timer, name):
    if(notification_timer <= 0 and name.lower() == "unknown"):
        notification_timer = 60
        notify("Shoulder Surfing Detected", "Quick behind you!")
    else:
        notification_timer-=1
    return notification_timer

## Main Code

In [9]:
# Get a reference to webcam #0 (the default one)
video_capture = cv2.VideoCapture(0)

# make array of sample pictures with encodings
dirname = os.path.dirname("__file__")
path = os.path.join(dirname, 'known_people/')

## Initializing OpenCV classifier
# hog = initialize_human_classifier()

# ## initializing detectron2
# detectron_predictor, detectron_cfg = initialize_detectron2()

## Initializing the yolo net
yolo_net = init_yolo()
eye_net, blob_detect = initialize_eye_detector()

## Reading images
list_of_files, number_files, names = read_images(path)

known_face_encodings, known_face_names = get_known_faces(number_files,
                                                         list_of_files,
                                                         names)

# Initialize some variables
face_locations = []
face_encodings = []
face_names = []
process_this_frame = True
notification_timer = 0

itr = 0

classes = get_yolo_classes()

cv2.namedWindow('video')
cv2.createTrackbar('threshold', 'video', 0, 255, nothing)

while True:
    # Grab a single frame of video
    ret, frame = video_capture.read()

    # Resize frame of video to 1/4 size for faster face recognition processing
    small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)

    # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
    rgb_small_frame = small_frame[:, :, ::-1]
    # Only process every other frame of video to save time
    face_names, face_locations, process_this_frame = process_frame(process_this_frame, 
                                                                   rgb_small_frame, 
                                                                   face_names,
                                                                   face_locations)
    
    frame = display_results(face_locations, face_names, frame)

    frame = yolo_main(frame, process_this_frame, yolo_net)
    
    frame = eye_detection_main(frame, eye_net, face_locations, process_this_frame, blob_detect)
    cv2.imshow('video', frame)
    #check for unknown entiites and alert user every ~30 seconds ish
    if face_names !=[]:
        notification_timer = notify_reset_timer(notification_timer, face_names[-1])

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    
# Release handle to the webcam
video_capture.release()
cv2.destroyAllWindows()

## TODO 

1. Gaze Detection (pose detection)
2. Consequence addition (notifications ignored)
3. Deranking - lose access
4. Threats - known attacks on opencv
5. lock the screen if main user is not there
6. Possible GUI
7. Build admin backend to keep track

## Reference Code

In [8]:
#code forked and tweaked from https://github.com/ageitgey/face_recognition/blob/master/examples/facerec_from_webcam_faster.py
#to extend, just add more people into the known_people folder

import face_recognition
import cv2
import numpy as np
import os
import glob

def notify(title, text):
    os.system("""
              osascript -e 'display notification "{}" with title "{}"'
              """.format(text, title))
    
# Get a reference to webcam #0 (the default one)
video_capture = cv2.VideoCapture(0)

#make array of sample pictures with encodings
known_face_encodings = []
known_face_names = []
dirname = os.path.dirname("__file__")
path = os.path.join(dirname, 'known_people/')

#make an array of all the saved jpg files' paths
list_of_files = [f for f in glob.glob(path+'*.jpg')]
#find number of known faces
number_files = len(list_of_files)

names = list_of_files.copy()

for i in range(number_files):
    globals()['image_{}'.format(i)] = face_recognition.load_image_file(list_of_files[i])
    globals()['image_encoding_{}'.format(i)] = face_recognition.face_encodings(globals()['image_{}'.format(i)])[0]
    known_face_encodings.append(globals()['image_encoding_{}'.format(i)])

    # Create array of known names
    names[i] = names[i].replace("known_people/", "")  
    known_face_names.append(names[i])

# Initialize some variables
face_locations = []
face_encodings = []
face_names = []
process_this_frame = True
notification_timer = 0

while True:
    # Grab a single frame of video
    ret, frame = video_capture.read()

    # Resize frame of video to 1/4 size for faster face recognition processing
    small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)

    # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
    rgb_small_frame = small_frame[:, :, ::-1]

    # Only process every other frame of video to save time
    if process_this_frame:
        # Find all the faces and face encodings in the current frame of video
        face_locations = face_recognition.face_locations(rgb_small_frame)
        face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)

        face_names = []
        for face_encoding in face_encodings:
            # See if the face is a match for the known face(s)
            matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
            name = "Unknown"
            # # If a match was found in known_face_encodings, just use the first one.
            # if True in matches:
            #     first_match_index = matches.index(True)
            #     name = known_face_names[first_match_index]

            # Or instead, use the known face with the smallest distance to the new face
            face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
            best_match_index = np.argmin(face_distances)
            if matches[best_match_index]:
                name = known_face_names[best_match_index]

            face_names.append(name)

    process_this_frame = not process_this_frame


    # Display the results
    for (top, right, bottom, left), name in zip(face_locations, face_names):
        # Scale back up face locations since the frame we detected in was scaled to 1/4 size
        top *= 4
        right *= 4
        bottom *= 4
        left *= 4

        # Draw a box around the face
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)

        # Draw a label with a name below the face
        cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
        font = cv2.FONT_HERSHEY_DUPLEX
        cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)

    # Display the resulting image
    cv2.imshow('Video', frame)
    
    #check for unknown entiites and alert user every ~30 seconds ish
    if(notification_timer <= 0 and name == "Unknown"):
        notification_timer = 60
        notify("Shoulder Surfing Detected", "Quick behind you!")
    else:
        notification_timer-=1
        
    # Hit 'q' on the keyboard to quit!
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release handle to the webcam
video_capture.release()
cv2.destroyAllWindows()

SyntaxError: invalid syntax (<ipython-input-8-404eb4fe7f38>, line 16)