In [1]:
# imports
import os
import sys
import argparse
import cv2
import yaml
import glob
from tqdm import tqdm
import numpy as np
import json
import pickle

from FaceBoxes import FaceBoxes
from TDDFA import TDDFA
from utils.render import render
#from utils.render_ctypes import render  # faster
from utils.depth import depth
from utils.pncc import pncc
from utils.uv import uv_tex
from utils.pose import calc_pose, P2sRt, matrix2angle
from utils.serialization import ser_to_ply, ser_to_obj
from utils.functions import draw_landmarks, get_suffix
from utils.tddfa_util import str2bool

from recrop_images import find_center_bbox, crop_image, eg3dcamparams, crop_final

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
parser = argparse.ArgumentParser(description='The demo of still image of 3DDFA_V2')
parser.add_argument('-i', '--input_path', type=str, default='examples/test/data.pkl')
parser.add_argument('-o', '--output', type=str, default='examples/test/quads.pkl')
parser.add_argument('-j', '--output_json', type=str, default='dataset.json')
parser.add_argument('-p', '--prefix', type=str, default='')
parser.add_argument('--size', type=int, default=1024)
parser.add_argument('--out_dir', type=str, default='./crop_samples/img')
parser.add_argument('--mode', type=str, default='gpu', help='gpu or cpu mode')
parser.add_argument('--config', type=str, default='configs/mb1_120x120.yml')
parser.add_argument('--individual', action='store_true', default=False)
parser.add_argument('--onnx', action='store_true', default=False)

args, unknown = parser.parse_known_args()

In [12]:
cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) #mb05_120x120

# Init FaceBoxes and TDDFA, recommend using onnx flag
if args.onnx:
    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
    os.environ['OMP_NUM_THREADS'] = '4'

    from FaceBoxes.FaceBoxes_ONNX import FaceBoxes_ONNX
    from TDDFA_ONNX import TDDFA_ONNX

    face_boxes = FaceBoxes_ONNX()
    tddfa = TDDFA_ONNX(**cfg)
else:
    gpu_mode = args.mode == 'gpu'
    tddfa = TDDFA(gpu_mode=gpu_mode, **cfg)
    face_boxes = FaceBoxes()


with open(args.input_path, "rb") as f:
    inputs = pickle.load(f, encoding="latin1").items()


In [4]:
def get_crop_bound(lm, method="ffhq"):
    if len(lm) == 106:
        left_e = lm[104]
        right_e = lm[105]
        nose = lm[49]
        left_m = lm[84]
        right_m = lm[90]
        center = (lm[1] + lm[31]) * 0.5
    elif len(lm) == 68:
        left_e = np.mean(lm[36:42], axis=0)
        right_e = np.mean(lm[42:48], axis=0)
        nose = lm[33]
        left_m = lm[48]
        right_m = lm[54]
        center = (lm[0] + lm[16]) * 0.5
    else:
        raise ValueError(f"Unknown type of keypoints with a length of {len(lm)}")



    if method == "ffhq":
        eye_to_eye = right_e - left_e
        eye_avg = (left_e + right_e) * 0.5
        mouth_avg = (left_m + right_m) * 0.5
        eye_to_mouth = mouth_avg - eye_avg
        x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
        x /= np.hypot(*x)
        x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
        y = np.flipud(x) * [-1, 1]
        c = eye_avg + eye_to_mouth * 0.1
    elif method == "default":
        eye_to_eye = right_e - left_e
        eye_avg = (left_e + right_e) * 0.5
        eye_to_nose = nose - eye_avg
        x = eye_to_eye.copy()
        x /= np.hypot(*x)
        x *= max(np.hypot(*eye_to_eye) * 2.4, np.hypot(*eye_to_nose) * 2.75)
        y = np.flipud(x) * [-1, 1]
        c = center
    else:
        raise ValueError('%s crop method not supported yet.' % method)
    crop_keypoint_dict = {
        "left_eye": left_e,
        "right_eye": right_e,
        "nose": nose,
        "left_mouth": left_m,
        "right_mouth": right_m,
        "center": center,
    }
    quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
    quad_dict = {
        "left": c - x - y,
        "right": c + x - y,
        "top": c - x + y,
        "bottom": c + x + y,
    }
    return quad.astype(np.float32), c, x, y, crop_keypoint_dict, quad_dict

In [20]:
size = 512
results_quad = {}
results_meta = {}
for i,item in enumerate(tqdm(inputs)):

    # get initial cropping box(quad) using landmarks
    img_path, landmarks = item #img_path: str, landmarks: np.ndarray
    img_path = args.prefix + img_path
    img_orig = cv2.imread(img_path, flags=cv2.IMREAD_COLOR)
    if img_orig is None:
        print(f'Cannot load image')
        continue
    quad, quad_c, quad_x, quad_y, kpd, qdd = get_crop_bound(landmarks)

    # draw kepoints
    from matplotlib import pyplot as plt
    for landmark_name, coordinates in qdd.items():
        x, y = coordinates
        # Draw a point (circle) on the image
        cv2.circle(img_orig, (int(x), int(y)), 20, (0, 255, 0), -1)  # You can adjust the color and size here
        # Add the landmark name as text near the point
        cv2.putText(img_orig, landmark_name, (int(x) + 10, int(y) - 10), cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 0, 255), 5)

    skip = False
    for iteration in range(1):
        bound = np.array([[0, 0], [0, size-1], [size-1, size-1], [size-1, 0]], dtype=np.float32)
        mat = cv2.getAffineTransform(quad[:3], bound[:3])
        img = crop_image(img_orig, mat, size, size) # 裁剪后resize成512x512
        h, w = img.shape[:2]

        # Detect faces, get 3DMM params and roi boxes
        boxes = face_boxes(img) # why detect face second time
        xmin, ymin, xmax, ymax, score = boxes[0]
        color = (0, 255, 0)  # Define the color (BGR format, so (0, 255, 0) is green)
        thickness = 2  # Define the thickness of the bounding box
        cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color, thickness)
        if len(boxes) == 0:
            print(f'No face detected')
            skip = True
            break

        param_lst, roi_box_lst = tddfa(img, boxes)
        box_idx = find_center_bbox(roi_box_lst, w, h)

        param = param_lst[box_idx]
        P = param[:12].reshape(3, -1)  # camera matrix
        s_relative, R, t3d = P2sRt(P)
        pose = matrix2angle(R)
        pose = [p * 180 / np.pi for p in pose]

        # Adjust z-translation in object space
        R_ = param[:12].reshape(3, -1)[:, :3]
        u = tddfa.bfm.u.reshape(3, -1, order='F')
        trans_z = np.array([ 0, 0, 0.5*u[2].mean() ]) # Adjust the object center
        trans = np.matmul(R_, trans_z.reshape(3,1))
        t3d += trans.reshape(3)

    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))    
    plt.show()

  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
''' Camera extrinsic estimation for GAN training '''
# Normalize P to fit in the original image (before 3DDFA cropping)
sx, sy, ex, ey = roi_box_lst[0]
scale_x = (ex - sx) / tddfa.size
scale_y = (ey - sy) / tddfa.size
t3d[0] = (t3d[0]-1) * scale_x + sx
t3d[1] = (tddfa.size-t3d[1]) * scale_y + sy
t3d[0] = (t3d[0] - 0.5*(w-1)) / (0.5*(w-1)) # Normalize to [-1,1]
t3d[1] = (t3d[1] - 0.5*(h-1)) / (0.5*(h-1)) # Normalize to [-1,1], y is flipped for image space
t3d[1] *= -1
t3d[2] = 0 # orthogonal camera is agnostic to Z (the model always outputs 66.67)

s_relative = s_relative * 2000
scale_x = (ex - sx) / (w-1)
scale_y = (ey - sy) / (h-1)
s = (scale_x + scale_y) / 2 * s_relative
# print(f"[{iteration}] s={s} t3d={t3d}")

if s < 0.7 or s > 1.3:
    print(f"Skipping[{i+1-len(results_quad)}/{i+1}]: {img_path} s={s}")
    skip = True
if abs(pose[0]) > 90 or abs(pose[1]) > 80 or abs(pose[2]) > 50:
    print(f"Skipping[{i+1-len(results_quad)}/{i+1}]: {img_path} pose={pose}")
    skip = True
if abs(t3d[0]) > 1. or abs(t3d[1]) > 1.:
    print(f"Skipping[{i+1-len(results_quad)}/{i+1}]: {img_path} pose={pose} t3d={t3d}")
    skip = True

quad_c = quad_c + quad_x * t3d[0]
quad_c = quad_c - quad_y * t3d[1]
quad_x = quad_x * s
quad_y = quad_y * s
c, x, y = quad_c, quad_x, quad_y
quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y]).astype(np.float32)


# final projection matrix
s = 1
t3d = 0 * t3d
R[:,:3] = R[:,:3] * s
P = np.concatenate([R,t3d[:,None]],1)
P = np.concatenate([P, np.array([[0,0,0,1.]])],0)
results_meta[img_path] = eg3dcamparams(P.flatten())
results_quad[img_path] = quad

# Save cropped images
cropped_img = crop_final(img_orig, size=size, quad=quad)
os.makedirs(args.out_dir, exist_ok=True)
cv2.imwrite(os.path.join(args.out_dir, os.path.basename(img_path).replace(".png",".jpg")), cropped_img)

In [None]:
# Save quads
print("results:", len(results_quad))
with open(args.output, 'wb') as f:
    pickle.dump(results_quad, f)

In [None]:
# Save meta data
results_new = []
for img, P  in results_meta.items():
    img = os.path.basename(img)
    res = [format(r, '.6f') for r in P]
    results_new.append((img,res))
with open(os.path.join(args.out_dir, args.output_json), 'w') as outfile:
    json.dump({"labels": results_new}, outfile, indent="\t")