In [1]:
import os
import sys
import torch
import numpy as np
import cv2
from pathlib import Path
from PIL import Image

# Install PyTorch3D if needed
if 'pytorch3d' not in sys.modules:
    !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'

from pytorch3d.transforms.so3 import so3_exp_map, so3_relative_angle
from pytorch3d.renderer.cameras import SfMPerspectiveCameras

Collecting git+https://github.com/facebookresearch/pytorch3d.git@stable
  Cloning https://github.com/facebookresearch/pytorch3d.git (to revision stable) to /private/var/folders/wv/_x9hjmys03x5gnbfl70ry2sr0000gn/T/pip-req-build-b75njgs1
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/pytorch3d.git /private/var/folders/wv/_x9hjmys03x5gnbfl70ry2sr0000gn/T/pip-req-build-b75njgs1
  Running command git checkout -q 75ebeeaea0908c5527e7b1e305fbc7681382db47
  Resolved https://github.com/facebookresearch/pytorch3d.git to commit 75ebeeaea0908c5527e7b1e305fbc7681382db47
  Preparing metadata (setup.py) ... [?25ldone

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
def load_images(image_dir):
    """Load images and compute SIFT features."""
    image_paths = list(Path(image_dir).glob('*.jpg')) + list(Path(image_dir).glob('*.png'))
    images = []
    features = []
    sift = cv2.SIFT_create()
    
    for path in image_paths:
        img = cv2.imread(str(path))
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        kp, des = sift.detectAndCompute(gray, None)
        images.append(img)
        features.append((kp, des))
        
    return images, features

In [3]:
def compute_relative_transforms(images, features):
    """Compute relative camera transforms using feature matching."""
    N = len(images)
    edges = []
    R_relative = []
    T_relative = []
    
    # Camera intrinsic matrix (approximate)
    K = np.array([
        [1000, 0, images[0].shape[1]/2],
        [0, 1000, images[0].shape[0]/2],
        [0, 0, 1]
    ])
    
    matcher = cv2.BFMatcher()
    
    for i in range(N):
        kp1, des1 = features[i]
        for j in range(i+1, N):
            kp2, des2 = features[j]
            
            # Match features
            matches = matcher.knnMatch(des1, des2, k=2)
            good_matches = [m for m, n in matches if m.distance < 0.7 * n.distance]
                    
            if len(good_matches) > 20:
                # Get matching points
                pts1 = np.float32([kp1[m.queryIdx].pt for m in good_matches])
                pts2 = np.float32([kp2[m.trainIdx].pt for m in good_matches])
                
                # Compute essential matrix and recover pose
                E, mask = cv2.findEssentialMat(pts1, pts2, K)
                _, R, t, _ = cv2.recoverPose(E, pts1, pts2, K)
                
                edges.append([i, j])
                R_relative.append(torch.from_numpy(R.astype(np.float32)))
                T_relative.append(torch.from_numpy(t[:, 0].astype(np.float32)))
    
    return torch.stack(R_relative), torch.stack(T_relative), torch.tensor(edges)

In [4]:
def optimize_camera_positions(R_relative, T_relative, relative_edges, n_cameras, device='cpu'):
    """Optimize absolute camera positions given relative transforms."""
    log_R_absolute = torch.randn(n_cameras, 3, device=device)
    T_absolute = torch.randn(n_cameras, 3, device=device)
    
    # First camera at origin
    log_R_absolute[0, :] = 0.
    T_absolute[0, :] = 0.
    
    log_R_absolute.requires_grad = True
    T_absolute.requires_grad = True
    
    cameras_relative = SfMPerspectiveCameras(
        R=R_relative.to(device),
        T=T_relative.to(device),
        device=device
    )
    
    optimizer = torch.optim.Adam([log_R_absolute, T_absolute], lr=0.01)
    
    for iteration in range(1000):
        optimizer.zero_grad()
        
        R_absolute = so3_exp_map(log_R_absolute)
        
        cameras_absolute = SfMPerspectiveCameras(
            R=R_absolute,
            T=T_absolute,
            device=device
        )
        
        trans_i = cameras_absolute.get_world_to_view_transform()[relative_edges[:, 0]]
        trans_j = cameras_absolute.get_world_to_view_transform()[relative_edges[:, 1]]
        trans_rel = trans_i.inverse().compose(trans_j)
        
        matrix_rel = trans_rel.get_matrix()
        R_composed = matrix_rel[:, :3, :3]
        T_composed = matrix_rel[:, 3, :3]
        
        R_loss = (1. - so3_relative_angle(R_composed, cameras_relative.R, cos_angle=True)).mean()
        T_loss = ((T_composed - cameras_relative.T)**2).sum(1).mean()
        
        loss = R_loss + T_loss
        loss.backward()
        optimizer.step()
        
        if iteration % 100 == 0:
            print(f'Iteration {iteration}, Loss: {loss.item():.6f}')
    
    return R_absolute.detach(), T_absolute.detach()

In [None]:
# Main execution
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load and process images
images = load_images('media')
images = images.to(device)

# Extract features
with torch.no_grad():
    features = extract_features(images)

# Compute relative transforms
R_relative, T_relative, relative_edges = compute_relative_transforms(features)

# Optimize camera positions
R_absolute, T_absolute = optimize_camera_positions(
    R_relative, 
    T_relative, 
    relative_edges, 
    n_cameras=len(images),
    device=device
)

# Print results
for i in range(len(images)):
    print(f'\nCamera {i}:')
    print(f'Rotation:\n{R_absolute[i]}')
    print(f'Translation: {T_absolute[i]}')