In [1]:
import os
import argparse
import torch
import torch.backends.cudnn as cudnn
import numpy as np
from data import cfg_mnet, cfg_re50
from layers.functions.prior_box import PriorBox
from utils.nms.py_cpu_nms import py_cpu_nms
import cv2
from models.retinaface import RetinaFace
from utils.box_utils import decode, decode_landm
from torchvision import datasets, transforms
import time
import matplotlib.pyplot as plt
from torchvision.transforms.functional import crop, center_crop, rotate, InterpolationMode, pad, resize

try:
    import torchinfo
    import cv2
except ModuleNotFoundError:
    !pip install torchinfo
    !pip install opencv-python-headless
    import torchinfo
    import cv2
        

In [2]:
def check_keys(model, pretrained_state_dict):
    ckpt_keys = set(pretrained_state_dict.keys())
    model_keys = set(model.state_dict().keys())
    used_pretrained_keys = model_keys & ckpt_keys
    unused_pretrained_keys = ckpt_keys - model_keys
    missing_keys = model_keys - ckpt_keys
    assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
    return True

In [3]:
def remove_prefix(state_dict, prefix):
    ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
    f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
    return {f(key): value for key, value in state_dict.items()}

In [4]:
def load_model(model, pretrained_path, load_to_cpu):
    print('Loading pretrained model from {}'.format(pretrained_path))
    if load_to_cpu:
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
    else:
        device = torch.cuda.current_device()
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
        print("Model loaded to GPU")
    if "state_dict" in pretrained_dict.keys():
        pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
    else:
        pretrained_dict = remove_prefix(pretrained_dict, 'module.')
    check_keys(model, pretrained_dict)
    model.load_state_dict(pretrained_dict, strict=False)
    return model

In [5]:
def detection_model(network="resnet50"):
    if network == "mobile0.25":
        cfg = cfg_mnet
        trained_model = "./weights/mobilenet0.25_Final.pth"
    elif network == "resnet50":
        cfg = cfg_re50
        trained_model = "./weights/Resnet50_Final.pth"
    # net and model
    net = RetinaFace(cfg=cfg, phase = 'test')
    net = load_model(net, trained_model, False)
    net.eval()
    cudnn.benchmark = True
    device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") # Defines the computation device (cuda:0 => GPU)
    net = net.to(device)
    
    return net, cfg, device

In [6]:
def face_select(dets, selec_thresh):
    previous_area = 0
    max_area = 0
    prev_coords = np.zeros_like(dets[0])
    coords = np.zeros_like(dets[0])

    for b in dets:
        #if b[4] < selec_thresh: # Excludes lower score detections indicating possible background faces
            #continue
        
        height = b[3]-b[1] #ymax-ymin
        width = b[2]-b[0] #xmax-xmin
    
        b = list(map(int, b))
        bbox_area = width*height
        #print(len(dets))
        
        if len(dets) == 1: # Only one face present in the picture
            max_area = bbox_area
            coords[:] = b
        else:
            if bbox_area > previous_area:
                previous_area = bbox_area
                prev_coords[:] = b
            else:
                max_area = previous_area
                coords [:] = prev_coords
    face = np.append(coords, max_area)

    return face

In [87]:
def crop_align(img, dets, selec_thresh, net, cfg, device, final_dir, save=False):
    '''
    b[0], b[1] is the top left corner of the bounding box
    b[2], b[3] is the lower right corner of the bounding box
    b[4] relates to the the score of the detection
    b[5], b[6] is the left eye
    b[7], b[8] is the right eye
    b[9], b[10] is the nose
    b[11], b[12] is the left of the mouth
    b[13], b[14] is the right of the mouth
    '''
    
    img_raw = cv2.imread("/test_cuda/Pytorch_Retinaface/test/4.jpg", cv2.IMREAD_COLOR)
    
    face_coords = face_select(dets, selec_thresh)
    face_coords = list(map(int, face_coords)) # Coordinates must be integers
    
    # -------------------- Rotation Stage ---------------------
    left_eye = (face_coords[5], face_coords[6]) # Components: (x, y)
    right_eye = (face_coords[7], face_coords[8])
    if left_eye[1] > right_eye[1]:               # Right eye is higher
        # Clock-wise rotation
        aux_point = (right_eye[0], left_eye[1])
        a = right_eye[0] - left_eye[0]
        b = right_eye[1] - aux_point[1]
        
        cv2.line(img_raw, left_eye, right_eye, (255, 0, 0), 2)
        cv2.line(img_raw, aux_point, right_eye, (255, 0, 0), 2)
        cv2.line(img_raw, left_eye, aux_point, (255, 0, 0), 2)
        
        cv2.circle(img_raw, left_eye, 4, (0, 0, 255), cv2.FILLED)
        cv2.circle(img_raw, right_eye, 4, (0, 0, 255), cv2.FILLED)
        cv2.circle(img_raw, aux_point, 4, (0, 255, 0), cv2.FILLED)
        
        #plt.imshow(cv2.cvtColor(img_raw[face_coords[1]:face_coords[3], face_coords[0]:face_coords[2]], cv2.COLOR_BGR2RGB)) 
        #cv2.imwrite("/test_cuda/2.jpg", img_raw[face_coords[1]:face_coords[3], face_coords[0]:face_coords[2]])
        try:
            theta = np.rad2deg(np.arctan(b/a)) # Angle of rotation in degrees
            print("Right eye is higher, therefore, a counter clock-wise rotation of {} degrees is applied".format(-theta))
            rotated_tensor = rotate(img.squeeze(), angle=theta, interpolation=InterpolationMode.BILINEAR, center=right_eye)
        except ZeroDivisionError:
            print("Already aligned")
            rotated_tensor = img.squeeze()

    else:                                        # Left eye is higher
        # Counter clock-wise rotation
        aux_point = (left_eye[0], right_eye[1])
        a = right_eye[0] - left_eye[0]
        b = left_eye[1] - aux_point[1]
        

        cv2.line(img_raw, left_eye, right_eye, (255, 0, 0), 2)
        cv2.line(img_raw, aux_point, right_eye, (255, 0, 0), 2)
        cv2.line(img_raw, left_eye, aux_point, (255, 0, 0), 2)
        
        cv2.circle(img_raw, left_eye, 4, (0, 0, 255), cv2.FILLED)
        cv2.circle(img_raw, right_eye, 4, (0, 0, 255), cv2.FILLED)
        cv2.circle(img_raw, aux_point, 4, (0, 255, 0), cv2.FILLED)
        
        #plt.imshow(cv2.cvtColor(img_raw[face_coords[1]:face_coords[3], face_coords[0]:face_coords[2]], cv2.COLOR_BGR2RGB))
        #cv2.imwrite("/test_cuda/2.jpg", img_raw[face_coords[1]:face_coords[3], face_coords[0]:face_coords[2]])
        try:
            theta = np.rad2deg(np.arctan(b/a))
            print("Left eye is higher, therefore, a clock-wise rotation of {} degrees is applied".format(-theta))
            rotated_tensor = rotate(img.squeeze(), angle=-theta, interpolation=InterpolationMode.BILINEAR, center=left_eye)
        except ZeroDivisionError:
            print("Already aligned")
            rotated_tensor = img.squeeze()
        
    #plt.imshow(rotated_tensor.squeeze().permute(1, 2, 0).cpu().numpy().astype(int))
    
    loc, conf, _ = net(rotated_tensor.unsqueeze(0))  # Forward pass that gives the results <--------------
    
    im_height = rotated_tensor.shape[1]
    im_width = rotated_tensor.shape[2]
    
    resize1 = 1
    new_scale = torch.Tensor([rotated_tensor.shape[2], rotated_tensor.shape[1], rotated_tensor.shape[2], rotated_tensor.shape[1]])
    new_scale = new_scale.to(device)
    
    new_priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    new_priors = new_priorbox.forward()
    new_priors = new_priors.to(device)
    new_prior_data = new_priors.data
    
    new_boxes = decode(loc.data.squeeze(0), new_prior_data, cfg['variance'])
    new_boxes = new_boxes * new_scale / resize1
    new_boxes = new_boxes.cpu().numpy() # Tensor is moved to CPU (numpy doesn't support GPU)
    new_scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    
    # Score's threshold
    confidence_threshold = 0.0004 # Default value
    inds = np.where(new_scores > confidence_threshold)[0]
    new_boxes = new_boxes[inds]
    new_scores = new_scores[inds]
    
    # keep top-K before NMS
    top_k = 500 # Default value
    order = new_scores.argsort()[::-1][:top_k] # Extracts the indexes relating to the top scores
    new_boxes = new_boxes[order] # Array [300, 4] where in each line are the coordinates
    new_scores = new_scores[order] # Array [1, 300]
    
    # do NMS
    nms_threshold = 0.0004 # Default value
    new_dets = np.hstack((new_boxes, new_scores[:, np.newaxis])).astype(np.float32, copy=False)
    keep = py_cpu_nms(new_dets, nms_threshold)
    new_dets = new_dets[keep, :]
    
    # keep top-K faster NMS
    #keep_top_k = 500 # Default value
    #new_dets = new_dets[:keep_top_k, :]
    
    #rotated_bbox = new_dets[0]
    rotated_bbox = face_select(new_dets, selec_thresh)
    #print("rotated_bbox 1", rotated_bbox)
    rotated_bbox = list(map(int, rotated_bbox))
    #print("rotated_bbox 2", rotated_bbox)
    
    
    # -------------------- Cropping Stage ---------------------
    crop_height = rotated_bbox[3]-rotated_bbox[1] #ymax-ymin
    crop_width = rotated_bbox[2]-rotated_bbox[0] #xmax-xZmin
    crop_coordinates = (rotated_bbox[1], rotated_bbox[0], crop_height, crop_width)
    cropped_tensor = crop(rotated_tensor, *crop_coordinates)
    
    #plt.imshow(cropped_tensor.squeeze().permute(1, 2, 0).cpu().numpy().astype(int))
        
    image_array = cropped_tensor.permute(1,2,0).cpu().numpy()

    # Convert the numpy array to BGR format (required by OpenCV)
    cropped_image = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)
    cv2.imwrite("/test_cuda/3.jpg", image_array)
    
    final_size = (160, 160)
    #resized_tensor = resize(padded_tensor, final_size)
    resized_tensor = resize(cropped_tensor, final_size)
        
    image_array = resized_tensor.permute(1,2,0).cpu().numpy()
        
    cv2.imwrite("/test_cuda/4.jpg", image_array)
        
    if save == True:
    
        final_size = (160, 160)
        #resized_tensor = resize(padded_tensor, final_size)
        resized_tensor = resize(cropped_tensor, final_size)
        
        image_array = resized_tensor.permute(1,2,0).cpu().numpy()
        
        # Convert the numpy array to BGR format (required by OpenCV)
        cropped_image = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)
        
        cv2.imwrite(final_dir, cropped_image)
 
    return cropped_tensor

In [88]:
# https://github.com/biubug6/Pytorch_Retinaface/
def face_detection(net, cfg, device, img, final_dir, img_raw, save=False):
    torch.set_grad_enabled(False)
    
    resize1 = 1
    im_height, im_width, _ = img.shape
    scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)
    
    #tic = time.time()
    loc, conf, landms = net(img)  # Forward pass that gives the results <--------------
    #print('Forward time: {:.4f}'.format(time.time() - tic))
        
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    
    boxes = boxes * scale / resize1
    boxes = boxes.cpu().numpy() # Tensor is moved to CPU (numpy doesn't support GPU)
    scores = conf.squeeze(0).data.cpu().numpy()[:,1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                            img.shape[3], img.shape[2]])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize1
    landms = landms.cpu().numpy()
    
    
    # Score's threshold
    confidence_threshold = 0.02 # Default value
    inds = np.where(scores > confidence_threshold)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]
    
    # keep top-K before NMS
    top_k = 500 # Default value
    order = scores.argsort()[::-1][:top_k] # Extracts the indexes relating to the top scores
    boxes = boxes[order] # Array [300, 4] where in each line are the coordinates
    landms = landms[order] # Array [300, 10]
    scores = scores[order] # Array [1, 300]

    # do NMS
    nms_threshold = 0.04 # Default value
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
    keep = py_cpu_nms(dets, nms_threshold)
    dets = dets[keep, :]
    landms = landms[keep]
    
    
    # keep top-K faster NMS
    keep_top_k = 750 # Default value
    dets = dets[:keep_top_k, :]
    landms = landms[:keep_top_k, :]

    dets = np.concatenate((dets, landms), axis=1)
    
    for b in dets:
        text = "{:.4f}".format(b[4])
        b = list(map(int, b))
        cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 10)
        cx = b[0]
        cy = b[1] + 12
        #cv2.circle(img_raw, (0, 0), 10, (0, 255, 0), 4)
        #cv2.circle(img_raw, (b[0], b[1]), 1, (255, 0, 255), 4)
        #cv2.circle(img_raw, (b[2], b[3]), 1, (255, 0, 255), 4)
        cv2.putText(img_raw, text, (cx, cy),
                    cv2.FONT_HERSHEY_DUPLEX, 1, (255, 255, 255))
        # landms
        cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 8)
        cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 8)
        cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 8)
        cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 8)
        cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 8)
    
    #plt.imshow(cropped.permute(1, 2, 0).cpu().numpy().astype(int))
    
    
    #cv2.imwrite("/test_cuda/2.jpg", img_raw)
    
    
    #plt.imshow(cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB))
    
    if len(dets) == 0:
        final_size = (160, 160)
        resized_tensor = resize(img, final_size)
        image_array = resized_tensor.squeeze(0).permute(1,2,0).cpu().numpy()
        cropped_image = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)
        cv2.imwrite(final_dir, cropped_image)
        
    else:
        cropped = crop_align(img, dets, 0.1, net, cfg, device, final_dir, save)
    
    #plt.imshow(cropped.squeeze().permute(1, 2, 0).cpu().numpy().astype(int))
    
    if cropped.is_cuda: print("tensor in GPU")
    # show image
    


In [89]:
#------------- Splitting dataset -------------
#from dataset_split import DatasetSplitter
#import torchvision.io as io
#
#splitter = DatasetSplitter('/app/datasets/', '/app/data/', split = [80,20]) #Class isntance
#splitter.split_dataset() #Splitting dataset into train, test (and val if needed)
#
#train_dir, test_dir = splitter.data_dir()
#
#data_transform = transforms.Compose([
#    # Resize the images to 64x64
#    #transforms.Resize(size=(64, 64)),
#    # Flip the images randomly on the horizontal
#    #transforms.RandomHorizontalFlip(p=0.5), # p = probability of flip, 0.5 = 50% chance
#    # Turn the image into a torch.Tensor
#    transforms.Lambda(lambda image: torch.tensor(np.array(image).astype(np.float32)).unsqueeze(0)) # this also converts all pixel values from 0 to 255 to be between 0.0 and 1.0 
#])
#
#train_data = datasets.ImageFolder(root=train_dir, # target folder of images
#                                  transform=data_transform, # transforms to perform on data (images)
#                                  target_transform=None) # transforms to perform on labels (if necessary)
#
#test_data = datasets.ImageFolder(root=test_dir, 
#                                 transform=data_transform
#                                )
#
#print(f"Train data:\n{train_data}\nTest data:\n{test_data}")

In [90]:
# Debug mode
# Image 52 from identity 9902 results in a poor crop
net, cfg, device = detection_model()
file_path = "/test_cuda/Pytorch_Retinaface/test/4.jpg"
final_dir = "/test_cuda/Pytorch_Retinaface/test/4.jpg"
img_raw = cv2.imread(file_path, cv2.IMREAD_COLOR)
#img_raw_rgb = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB)
img = np.float32(img_raw)

face_detection(net, cfg, device, img, final_dir, img_raw, save=False)

Loading pretrained model from ./weights/Resnet50_Final.pth
Model loaded to GPU
Left eye is higher, therefore, a clock-wise rotation of 4.1849161251184155 degrees is applied
tensor in GPU


In [None]:
# Testing folder
#root = "/test_cuda/data"
dest_dir = "/test_cuda/digiface_cropped"
root="/test_cuda/Pytorch_Retinaface/test/4.jpg"

In [19]:
net, cfg, device = detection_model()

t_tic = time.time()

while True:
    if os.path.exists(dest_dir) and os.path.isdir(dest_dir):
        break
    else:
        os.makedirs(dest_dir, exist_ok=True)
                
for stage in os.listdir(root):
    data_path = os.path.join(root, stage)
    for identity in os.listdir(data_path):
        id_path = os.path.join(root, stage, identity)
        cropped_id_path = os.path.join(dest_dir, stage, identity)
        os.makedirs(cropped_id_path, exist_ok=True)
        for files in os.listdir(id_path):
            if not files.startswith('.'):
                file_path = os.path.join(id_path, files)
                final_dir = os.path.join(cropped_id_path, files)
                
                print(file_path)
                
                img_raw = cv2.imread(file_path, cv2.IMREAD_COLOR)
                #cropped_img_raw = c_crop(img_raw, 22)
                img_raw_rgb = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB)

                img = np.float32(img_raw_rgb)
                face_detection(net, cfg, device, img, final_dir, img_raw, save=False)
                        
print('Total time: {:.4f}'.format(time.time() - t_tic)) 

Loading pretrained model from ./weights/Resnet50_Final.pth
Model loaded to GPU


NotADirectoryError: [Errno 20] Not a directory: '/test_cuda/Pytorch_Retinaface/test/4.jpg'