In [1]:
import scipy
import numpy as np
import cv2 as cv
import gradio as gr
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import threading, json, os, io


In [2]:
@staticmethod
def get_homography(X, W):
    assert(x.shape == w.shape)

    u = W[0,:]
    v = W[1,:]

    x = X[0,:]
    y = X[1,:]

    I = u.shape[0]

    A = np.zeros((2*I, 9))
    j = 0
    for i in range(0,I):
        A[j,:]  = [0, 0, 0, -u[i], -v[i], -1, y[i]*u[i], y[i]*v[i], y[i]]
        A[j+1,:]  = [u[i], v[i], 1, 0, 0, 0, -x[i]*u[i], -x[i]*v[i], -x[i]]
        j = j + 2

    U, D, V = np.linalg.svd(A)

    phi_h = V.T[:,-1]

    phi_hr = scipy.optimize.minimize(obj_funct,x0=phi_h, args=(x, y, u, v))

    phi = np.reshape(phi_hr.x, (3,3))
    
    return phi


@staticmethod
def obj_funct(phi, x, y, u, v):

    I = x.shape[0]

    sum_squares = 0.0

    for i in range(0,I):
        # Denominator is common to both x and y produced by model.
        d = phi[6] * u[i] + phi[7] * v[i] + phi[8]

        # Numerator of x from model
        n1 = phi[0] * u[i] + phi[1] * v[i] + phi[2]
        x_model = n1 / d

        # Numerator of y from model
        n2 = phi[3] * u[i] + phi[4] * v[i] + phi[5]
        y_model = n2 / d

        # Squared norm
        squared_norm = ( x[i] - x_model )**2 +  ( y[i] - y_model )**2

        # Sum of squared norms
        sum_squares = sum_squares + squared_norm

    return sum_squares


@staticmethod
def get_pose_hom(lam, X, W):
    hom = get_homography(X, W)
    lam_inv = np.linalg.inv(lam)

    hom_ext = np.dot(lam_inv, hom)

    U, L, V = np.linalg.svd(hom_ext[:, 0:2])
    new_L = np.array([[1, 0], [0, 1], [0, 0]])
    rotation = U @ new_L @ V

    r_column = np.cross(rotation[:, 0], rotation[:, 1])
    rotation = np.c_[rotation, r_column]

    if np.linalg.det(rotation) < 0:
        rotation[:, 2] *= -1

    scale = np.sum(hom_ext[:, 0:2] / rotation[:, 0:2]) / 6

    translation = hom_ext[:, 2] / scale
    
    return rotation, translation

@staticmethod
def get_pose_cv(lam, X, W):
    # Image is undistorted so dist can be zeros
    dist = np.zeros(4)

    _, rvec, translation = cv.solvePnP(W, X, lam, dist, flags=cv.SOLVEPNP_ITERATIVE)

    rotation, _ = cv.Rodrigues(rvec)
    
    return rotation, translation

@staticmethod
def read_json(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)  # Load the JSON data

    # Convert the specific keys to NumPy arrays
    lam = np.array(data['lambda'])  # Convert the 'mtx' list to a NumPy array
    dist = np.array(data['distortion'])  # Convert the 'dist' list to a NumPy array

    return lam, dist  # Return both arrays



class Clicks:
    @staticmethod
    def capture(points_list):
        return np.array(points_list, dtype=np.float32) if points_list else np.array([]).reshape(0, 2)
    
    @staticmethod
    def draw_numbered(image, points, radius=8):
        if isinstance(image, np.ndarray): image = Image.fromarray(image)
        img = image.copy()
        draw = ImageDraw.Draw(img)
        try: font = ImageFont.truetype("arial.ttf", 16)
        except: font = ImageFont.load_default()
        for i, (x, y) in enumerate(points):
            draw.ellipse([x-radius, y-radius, x+radius, y+radius], outline=(255, 0, 0), width=2, fill=(255, 255, 255))
            draw.text((x-5, y-8), str(i+1), fill=(0, 0, 0), font=font)
        return img

In [5]:
def create_pose_estimation_demo():
    # State
    state = {'points': [], 'image': None}
    
    def on_click(evt: gr.SelectData, img):
        if img is None: 
            return img, "No image", ""
    
        x, y = evt.index
        state['points'].append([int(x), int(y)])
    
        # Set the original image if it's not already set
        if state['image'] is None:
            state['image'] = img.copy()  # Store a copy of the original image
    
        # Draw points on the original image
        updated = Clicks.draw_numbered(state['image'], state['points'])  
        return np.array(updated), f"Selected {len(state['points'])} points", json.dumps(state['points'])

    def undo_last(img):
        if state['points']: 
            state['points'].pop()  # Remove the last point
        if img is not None:
            updated = Clicks.draw_numbered(state['image'], state['points'])  # Redraw the image with the updated points
            return np.array(updated), f"Selected {len(state['points'])} points", json.dumps(state['points'])
        return img, "No points selected", "[]"

    def clear_points(img):
        state['points'].clear()  # Clear all points
        if img is not None:
            updated = Clicks.draw_numbered(state['image'], [])  # Redraw the image with no points
            return np.array(updated), "Cleared", "[]"
        return img, "No points selected", "[]"

    def estimate_poses():
        
        return
    
    with gr.Blocks(title="Pose Estimation Demo") as demo:
        gr.Markdown("# Pose Estimation from 2D-3D Correspondences")
        
        with gr.Row():
            with gr.Column(scale=2):
                img = gr.Image(label="Upload Image (click to add points)", type="numpy", interactive=True)
                with gr.Row():
                    undo_btn = gr.Button("Undo", size="sm")
                    clear_btn = gr.Button("Clear", size="sm")
                status = gr.Textbox(label="Status", interactive=False)
                points_json = gr.Textbox(label="Selected Points",value="[]", interactive=False)
        
        with gr.Row():
            estimate_btn = gr.Button("Process Points", variant="primary", size="lg")
        
        with gr.Row():
            with gr.Column():
                homo_result = gr.Textbox(label="Homography to Pose", lines=6, interactive=False)
                cv_result = gr.Textbox(label="OpenCV solvePnP", lines=6, interactive=False)
            with gr.Column():
                axes_img = gr.Image(label="Coordinate Axes Overlay", interactive=False)
                plot_3d = gr.Image(label="3D Camera Pose", interactive=False)          
        
        # Event handlers
        img.select(on_click, inputs=[img], outputs=[img, status, points_json])
        undo_btn.click(lambda img: undo_last(img), inputs=[img], outputs=[img, status, points_json])
        clear_btn.click(lambda img: clear_points(img), inputs=[img], outputs=[img, status, points_json])
        estimate_btn.click(estimate_poses, outputs=[homo_result, cv_result, axes_img, plot_3d])

    return demo

In [6]:
# Launch the demo
demo = create_pose_estimation_demo()
demo.launch(debug=False, share=True)

* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://7a822a8371a8462b72.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




  image = Image.fromarray(img, 'RGB')
Traceback (most recent call last):
  File "/home/addk3/anaconda3/envs/vision/lib/python3.13/site-packages/gradio/queueing.py", line 667, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<5 lines>...
    )
    ^
  File "/home/addk3/anaconda3/envs/vision/lib/python3.13/site-packages/gradio/route_utils.py", line 349, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<11 lines>...
    )
    ^
  File "/home/addk3/anaconda3/envs/vision/lib/python3.13/site-packages/gradio/blocks.py", line 2284, in process_api
    data = await self.postprocess_data(block_fn, result["prediction"], state)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/addk3/anaconda3/envs/vision/lib/python3.13/site-packages/gradio/blocks.py", line 2007, in postprocess_data
    self.validate_output