# 3D Sparse Reconstruction
This notebook showcases how sparse face reconstruction with the Basel Face Model can be done directly in 3D.  
The resulting optimization problem is simpler as it does not contain projecting between 3D and 2D. However, more preprocessing is necessary.  
We need RGB-D data to be able to generate a pointcloud. In this notebook, the `BIWI Kinect Dataset` is used. It can be downloaded here: https://www.kaggle.com/kmader/biwi-kinect-head-pose-database  
As depth and color channel typically are not aligned, we first have to do registration.  
Finally, we can detect the landmarks in the 2D color image, project them with the depth information to 3D and then proceed with the optimization process

In [None]:
%cd ..
%reload_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pyrender
from scipy import optimize

from face_reconstruction.data.biwi import BiwiDataLoader
from face_reconstruction.data.iphone import IPhoneDataLoader
from face_reconstruction.graphics import draw_pixels_to_image, register_rgb_depth, backproject_points, interpolate_around, get_perspective_camera, setup_standard_scene, backproject_image
from face_reconstruction.landmarks import load_bfm_landmarks, detect_landmarks
from face_reconstruction.model import BaselFaceModel
from face_reconstruction.optim import BFMOptimization
from face_reconstruction.utils.math import geometric_median

# 1. Face Model

In [None]:
bfm = BaselFaceModel.from_h5("model2019_face12.h5")
bfm_landmarks = load_bfm_landmarks("model2019_face12_landmarks_v2")
bfm_landmark_indices = np.array(list(bfm_landmarks.values()))

In [None]:
n_shape_coefficients = bfm.get_n_shape_coefficients()
n_expression_coefficients = bfm.get_n_expression_coefficients()
n_color_coefficients = bfm.get_n_color_coefficients()

# 2. Input RGB-D Image 

In [None]:
run_id = 1
frame_id = 4

#loader = BiwiDataLoader(run_id)
loader = IPhoneDataLoader()
frame = loader.get_frame(frame_id)

In [None]:
img_width = loader.get_image_width()
img_height = loader.get_image_height()

In [None]:
img = frame.get_color_image()
plt.imshow(img)
plt.show()

In [None]:
depth_img = frame.get_depth_image()
plt.imshow(depth_img)
plt.colorbar()
plt.show()

## 2.1 Depth and RGB channels are not aligned (only BIWI)

In [None]:
masked_color_image = frame.get_color_image()
masked_color_image[frame.get_depth_image() == 0] = 0
plt.imshow(masked_color_image)

## 2.2 Align Depth and RGB channels (Registration, only BIWI)

In [None]:
points, colors, screen_positions = register_rgb_depth(frame.get_depth_image(), frame.get_color_image(), biwi_loader.get_depth_intrinsics(), biwi_loader.get_rgb_intrinsics(), biwi_loader.get_rgb_extrinsics())

In [None]:
img_depth_mask = np.zeros((img_height, img_width))
draw_pixels_to_image(img_depth_mask, screen_positions)

In [None]:
masked_color_image = frame.get_color_image()
masked_color_image[img_depth_mask == 0] = 0
plt.imshow(masked_color_image)
plt.show()

# 3. Render Point Cloud

In [None]:
if isinstance(loader, IPhoneDataLoader):
    depth_threshold = 0.5 # Drop all points behind that threshold
    
    intrinsics = frame.get_intrinsics()
    points = backproject_image(intrinsics, depth_img)
    points_to_render = points[:, :3]
    points_to_render[:,2] = -points[:, 2]  # Invert z-coordinate
    points_to_render *= 1000 # meter to millimeter
    colors = img.reshape(-1, 3)  # Just flatten color image
    
    foreground_mask = depth_img.reshape(-1) < depth_threshold
    points_to_render = points_to_render[foreground_mask]
    colors = colors[foreground_mask]
else:
    intrinsics = loader.get_rgb_intrinsics()
    points_to_render = np.array(points)
    points_to_render[:, 2] = -points_to_render[:, 2]  # Invert z-coordinate for easier rendering (point cloud will be right in front of camera)

In [None]:
perspective_camera = get_perspective_camera(intrinsics, img_width, img_height)
scene = setup_standard_scene(perspective_camera)
scene.add(pyrender.Mesh.from_points(points_to_render, colors=colors))

In [None]:
pyrender.Viewer(scene, use_raymond_lighting=True, viewport_size=(img_width, img_height))

# 4. Detect 3D Landmarks

In [None]:
landmarks_img = detect_landmarks(img)

In [None]:
if isinstance(loader, IPhoneDataLoader):
    rgb_depth_img = depth_img
else:
    # Create a depth image for easier querying of depth values
    rgb_depth_img = np.zeros((img_height, img_width))
    for point, screen_position in zip(points, screen_positions):
        rgb_depth_img[screen_position[1], screen_position[0]] = point[2]

In [None]:
# As RGB and depth channels are not aligned, we might not have exact depth information for every pixel in the color channel. Hence, we have to interpolate
interpolation_size = 1
rgb_depth_values = [interpolate_around(rgb_depth_img, pixel, interpolation_size) for pixel in landmarks_img]

In [None]:
landmark_points_3d = backproject_points(intrinsics, rgb_depth_values, landmarks_img)
landmark_points_3d_render = np.array(landmark_points_3d)
landmark_points_3d_render[:,2] = -landmark_points_3d_render[:,2]  # Invert z-coordinate for easier rendering (landmarks will be right in front of camera)
if isinstance(loader, IPhoneDataLoader):
    landmark_points_3d_render *= 1000  # meter to millimeter

In [None]:
landmark_points_3d_median = geometric_median(landmark_points_3d_render)
distances_from_median = np.linalg.norm(landmark_points_3d_render - landmark_points_3d_median, axis=1)

In [None]:
threshold_landmark_deviation = 500  # It can happen that depth information is bad and back-projected landmark points are far away from the other. These should be ignored
valid_landmark_points_3d = np.where((np.array(rgb_depth_values) != 0) & (distances_from_median < threshold_landmark_deviation))[0]

In [None]:
pixels_without_depth = 68 - len(valid_landmark_points_3d)
if pixels_without_depth > 0:
    print(f"There are {pixels_without_depth} pixels without depth information.")

In [None]:
scene = setup_standard_scene(perspective_camera)
scene.add(pyrender.Mesh.from_points(points_to_render, colors=colors))
scene.add(pyrender.Mesh.from_points(landmark_points_3d_render[valid_landmark_points_3d], colors=[[255, 0, 0] for _ in range(len(landmark_points_3d[valid_landmark_points_3d]))]))

In [None]:
pyrender.Viewer(scene, use_raymond_lighting=True, viewport_size=(img_width, img_height))

# 5. Optimization

In [None]:
n_params_shape = 3 # 20
n_params_expression = 3 # 10
weight_shape_params = 100 # 10000
weight_expression_params = 100 # 1000

In [None]:
sparse_optimization = BFMOptimization(bfm, n_params_shape, n_params_expression, weight_shape_params=weight_shape_params, weight_expression_params=weight_expression_params)
loss = sparse_optimization.create_sparse_loss_3d(bfm_landmark_indices[valid_landmark_points_3d], landmark_points_3d_render[valid_landmark_points_3d])

In [None]:
initial_camera_pose = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, -500], [0, 0, 0, 1]]) # position camera just in front of face

In [None]:
initial_params = sparse_optimization.create_parameters(
    [0 for _ in range(n_shape_coefficients)],
    [0 for _ in range(n_expression_coefficients)],
    initial_camera_pose
)

In [None]:
# This typically takes 20 seconds
sparse_context = sparse_optimization.create_optimization_context(loss, initial_params, max_nfev=100, verbose=2, x_scale='jac')
result = sparse_context.run_optimization()

# 6. Render

In [None]:
params = sparse_context.create_parameters_from_theta(result.x)

In [None]:
face_mesh = bfm.draw_sample(
        shape_coefficients=params.shape_coefficients, 
        expression_coefficients=params.expression_coefficients, 
        color_coefficients=[0 for _ in range(n_color_coefficients)])

In [None]:
bfm_landmark_vertices = np.array(face_mesh.vertices)[bfm_landmark_indices[valid_landmark_points_3d]]

In [None]:
def setup_scene(show_landmarks=True, show_pointcloud=True, show_mask=True):
    scene = setup_standard_scene(perspective_camera)
    if show_pointcloud:
        scene.add(pyrender.Mesh.from_points(points_to_render, colors=colors))
    if show_mask:
        scene.add(pyrender.Mesh.from_trimesh(bfm.convert_to_trimesh(face_mesh)), pose=params.camera_pose)
    if show_landmarks:
        scene.add(pyrender.Mesh.from_points(landmark_points_3d_render[valid_landmark_points_3d], colors=[[255, 0, 0] for _ in range(len(landmark_points_3d[valid_landmark_points_3d]))]))
        scene.add(pyrender.Mesh.from_points(bfm_landmark_vertices, colors=[[0, 255, 0] for _ in range(len(bfm_landmark_vertices))]), pose=params.camera_pose)
    return scene

## 6.1. Render Interactive 3d

In [None]:
scene = setup_scene(show_landmarks=True, show_pointcloud=True, show_mask=True)

In [None]:
pyrender.Viewer(scene, use_raymond_lighting=True, viewport_size=(img_width, img_height))

## 6.2. Render onto Input Image

In [None]:
scene = setup_scene(show_landmarks=False, show_pointcloud=False, show_mask=True)

In [None]:
r = pyrender.OffscreenRenderer(img_width, img_height)
color, depth = r.render(scene)
r.delete()

In [None]:
img_with_mask = np.array(img)
img_with_mask[depth != 0] = color[depth != 0]
plt.imshow(img_with_mask)
plt.show()