This notebook implements facial landmark detection, lip segmentation, and lighting estimation for lipstick rendering.

In [2]:
import cv2
import mediapipe as mp
import numpy as np
import matplotlib.pyplot as plt
import json
import os

## Step 1: Load CelebA Metadata

In [14]:
def load_celeba_data(image_dir = "CelebAMask-HQ\CelebAMask-HQ\CelebA-HQ-img"):
    # Assuming images are stored in 'img_align_celeba' directory relative to metadata file
    image_files = []
    print("path ",image_dir + " exists ",os.path.exists(image_dir))
    if os.path.exists(image_dir):
        image_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith('.jpg')]
    return image_files


files = load_celeba_data()

path  CelebAMask-HQ\CelebAMask-HQ\CelebA-HQ-img exists  True


## Step 2: Image Processing Functions

In [15]:
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image_resized = cv2.resize(image, (512, 512))
    image_rgb = cv2.cvtColor(image_resized, cv2.COLOR_BGR2RGB)
    return image_rgb

def lip_segmentation(image_rgb):
    # mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp.solutions.face_mesh.FaceMesh(
        static_image_mode=True, max_num_faces=1, refine_landmarks=True)
    results = face_mesh.process(image_rgb)
    
    lip_indices = [
        61, 146, 91, 181, 84, 17, 314, 405, 321, 375,
        291, 61, 185, 40, 39, 37, 0, 267, 269, 270,
        409, 291, 78, 95, 88, 178, 87, 14, 317, 402,
        318, 324, 308, 78, 191, 80, 81, 82, 13, 312,
        311, 310, 415, 308
    ]
    
    if results.multi_face_landmarks:
        face_landmarks = results.multi_face_landmarks[0]
        lip_landmarks = [face_landmarks.landmark[i] for i in lip_indices]
        mask = np.zeros(image_rgb.shape[:2], dtype=np.uint8)
        points = [
            (int(landmark.x * image_rgb.shape[1]), int(landmark.y * image_rgb.shape[0]))
            for landmark in lip_landmarks
        ]
        points = np.array(points, dtype=np.int32)
        cv2.fillPoly(mask, [points], 255)
        lip_mask = cv2.bitwise_and(image_rgb, image_rgb, mask=mask)
        return lip_mask, mask
    return None, None

## Step 3: Lighting Estimation

In [22]:
import torch

def evaluate_sh_basis_gpu(theta, phi):
    """Vectorized SH basis computation on GPU"""
    sh_basis = torch.zeros((theta.shape[0], 9), device=theta.device)
    
    # Y00
    sh_basis[:, 0] = 0.282095 * torch.ones_like(theta)
    # Y1m
    sh_basis[:, 1] = 0.488603 * torch.sin(theta) * torch.cos(phi)
    sh_basis[:, 2] = 0.488603 * torch.sin(theta) * torch.sin(phi)
    sh_basis[:, 3] = 0.488603 * torch.cos(theta)
    # Y2m
    sh_basis[:, 4] = 1.092548 * torch.sin(theta)**2 * torch.cos(2*phi)
    sh_basis[:, 5] = 1.092548 * torch.sin(theta) * torch.cos(theta) * torch.cos(phi)
    sh_basis[:, 6] = 0.315392 * (3*torch.cos(theta)**2 - 1)
    sh_basis[:, 7] = 1.092548 * torch.sin(theta) * torch.cos(theta) * torch.sin(phi)
    sh_basis[:, 8] = 0.546274 * torch.sin(theta)**2 * torch.cos(2*phi)
    
    return sh_basis

In [23]:
import torch
import numpy as np

def estimate_lighting_gpu(image_rgb, face_landmarks, device='cuda'):
    """GPU-accelerated lighting estimation using spherical harmonics"""
    # Convert inputs to torch tensors with explicit dtype
    image_tensor = torch.from_numpy(image_rgb).float().to(device)
    
    # Extract face geometry
    image_h, image_w = image_rgb.shape[:2]
    vertices = torch.tensor([
        [landmark.x * image_w, landmark.y * image_h, landmark.z]
        for landmark in face_landmarks.landmark
    ], dtype=torch.float32, device=device)
    
    # Compute face normals using neighboring vertices
    v1 = vertices[1:-1] - vertices[:-2]  # Size: N-2
    v2 = vertices[2:] - vertices[1:-1]   # Size: N-2
    
    # Debug size check
    assert v1.shape == v2.shape, f"Shape mismatch: v1 {v1.shape} vs v2 {v2.shape}"
    
    normals = torch.cross(v1, v2)
    normals = normals / (torch.norm(normals, dim=1, keepdim=True) + 1e-6)  # Add epsilon to avoid div by 0
    
    # Create face mask
    face_mask = torch.zeros((image_h, image_w), dtype=torch.float32, device=device)
    points = vertices[:, :2].cpu().numpy().astype(np.int32)
    face_mask = torch.from_numpy(
        cv2.fillConvexPoly(face_mask.cpu().numpy(), points, 1)
    ).to(device)
    
    # Sample face colors and positions
    y_coords, x_coords = torch.where(face_mask > 0)
    colors = image_tensor[y_coords, x_coords].float()  # Ensure float type
    
    # Convert to spherical coordinates
    positions = torch.stack([
        x_coords.float() / image_w * 2 - 1,
        y_coords.float() / image_h * 2 - 1
    ], dim=1)
    
    # Compute spherical harmonics basis
    theta = torch.arccos(torch.clamp(positions[:, 1], -1, 1))
    phi = torch.arctan2(positions[:, 0], torch.ones_like(positions[:, 0]))
    
    # Evaluate SH basis functions
    sh_basis = evaluate_sh_basis_gpu(theta, phi)
    
    # Debug dtype check
    assert sh_basis.dtype == colors.dtype, f"Dtype mismatch: sh_basis {sh_basis.dtype} vs colors {colors.dtype}"
    
    # Solve for coefficients using least squares
    sh_coefficients = torch.linalg.lstsq(sh_basis, colors).solution[:9]
    
    return sh_coefficients.cpu().numpy()



## Step 4: Physically-Based Lipstick Rendering

In [None]:
from OpenGL.GL import *
from OpenGL.GL.shaders import compileShader, compileProgram
import glfw

# Load and compile GLSL shader from file
def load_shader(shader_path, shader_type):
    with open(shader_path, 'r') as file:
        shader_code = file.read()
    shader = compileShader(shader_code, shader_type)
    return shader

# Compile shaders into OpenGL program
def create_shader_program(vertex_shader_path, fragment_shader_path):
    vertex_shader = load_shader(vertex_shader_path, GL_VERTEX_SHADER)
    fragment_shader = load_shader(fragment_shader_path, GL_FRAGMENT_SHADER)
    program = compileProgram(vertex_shader, fragment_shader)
    return program

# Shader file paths
vertex_shader_path = os.path.join('shaders', 'vertex_shader.glsl')
fragment_shader_path = os.path.join('shaders', 'fragment_shader.glsl')

# Integrate with the Main Pipeline
def render_lip_texture(image_rgb, sh_coefficients, base_color, specular_color, glossiness):
    # Initialize GLFW for creating an OpenGL context
    if not glfw.init():
        raise Exception("GLFW could not be initialized")
    
    # Create a window (hidden, since this is for computation only)
    window = glfw.create_window(512, 512, "Lip Renderer", None, None)
    glfw.make_context_current(window)

    # Compile and link shaders
    shader_program = create_shader_program(vertex_shader_path, fragment_shader_path)
    glUseProgram(shader_program)

    # Set uniform values
    for i in range(9):
        glUniform3fv(glGetUniformLocation(shader_program, f"shCoefficients[{i}]"), 1, sh_coefficients[i])
        glUniform3fv(glGetUniformLocation(shader_program, "baseColor"), 1, base_color)
        glUniform3fv(glGetUniformLocation(shader_program, "specularColor"), 1, specular_color)
        glUniform1f(glGetUniformLocation(shader_program, "glossiness"), glossiness)
        glUniform3f(glGetUniformLocation(shader_program, "viewPos"), 0.0, 0.0, 3.0)

    # Render to texture
    framebuffer = glGenFramebuffers(1)
    texture = glGenTextures(1)
    glBindTexture(GL_TEXTURE_2D, texture)
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 512, 512, 0, GL_RGB, GL_UNSIGNED_BYTE, None)
    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture, 0)

    glBindFramebuffer(GL_FRAMEBUFFER, framebuffer)
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)

    result = glReadPixels(0, 0, 512, 512, GL_RGB, GL_UNSIGNED_BYTE)
    result_image = np.frombuffer(result, dtype=np.uint8).reshape(512, 512, 3)

    # Clean up
    glBindFramebuffer(GL_FRAMEBUFFER, 0)
    glDeleteFramebuffers(1, [framebuffer])
    glfw.terminate()

    return result_image

## Main Processing Pipeline

In [24]:
import os

# Load CelebA Metadata
# metadata_path = 'celeba-dataset-metadata.json'
image_files = load_celeba_data(image_dir="CelebAMask-HQ\CelebAMask-HQ\CelebA-HQ-img") # change this on different devices

# Create output directory if it doesn't exist
output_dir = 'lip-segmentation-outputs'
os.makedirs(output_dir, exist_ok=True)

# Process Each Image
for image_path in image_files:
    # Get base filename without extension
    base_name = os.path.splitext(os.path.basename(image_path))[0]
    
    image_rgb = preprocess_image(image_path)
    lip_mask, lip_binary_mask = lip_segmentation(image_rgb)
    
    mp_face_mesh = mp.solutions.face_mesh
    with mp_face_mesh.FaceMesh(
        static_image_mode=True, max_num_faces=1, refine_landmarks=True) as face_mesh:
        results = face_mesh.process(image_rgb)
        if results.multi_face_landmarks:
            face_landmarks = results.multi_face_landmarks[0]
            sh_coefficients = estimate_lighting_gpu(image_rgb, face_landmarks)

            # Render lip BRDF (change the following constants)
            base_color = np.array([1.0, 0.3, 0.4], dtype=np.float32)  # Lip base color
            specular_color = np.array([1.0, 1.0, 1.0], dtype=np.float32)
            glossiness = 32.0
            lip_texture_img = render_lip_texture(image_rgb, sh_coefficients, base_color, specular_color, glossiness)
            plt.imshow(lip_texture_img)
            
            # Save outputs
            output_path = os.path.join(output_dir, f'{base_name}')
            cv2.imwrite(f'{output_path}_lip_mask.png', cv2.cvtColor(lip_mask, cv2.COLOR_RGB2BGR))
            cv2.imwrite(f'{output_path}_binary_mask.png', lip_binary_mask)
            np.save(f'{output_path}_lighting.npy', sh_coefficients)
            
            print(f'Processed and saved outputs for {base_name}')
        else:
            print(f'No face detected in {image_path}')

path  CelebAMask-HQ\CelebAMask-HQ\CelebA-HQ-img exists  True
Processed and saved outputs for 0
Processed and saved outputs for 1
Processed and saved outputs for 10
Processed and saved outputs for 100
Processed and saved outputs for 1000
Processed and saved outputs for 10000
Processed and saved outputs for 10001
Processed and saved outputs for 10002
Processed and saved outputs for 10003
Processed and saved outputs for 10004
Processed and saved outputs for 10005
Processed and saved outputs for 10006
Processed and saved outputs for 10007
Processed and saved outputs for 10008
Processed and saved outputs for 10009
Processed and saved outputs for 1001
Processed and saved outputs for 10010
Processed and saved outputs for 10011
Processed and saved outputs for 10012
Processed and saved outputs for 10013
Processed and saved outputs for 10014
Processed and saved outputs for 10015
Processed and saved outputs for 10016
Processed and saved outputs for 10017
Processed and saved outputs for 10018
Proc

KeyboardInterrupt: 

# Next Step - Parallelize the processing with a dataloader type multiprocessing. Perhaps using image transform