# Mount Drive (Optional)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Clone the main repository

In [None]:
!git clone https://github.com/arielz001/Hands-On-SDM.git

In [None]:
!cd Hands-On-SDM && pwd && ls


### Download pretrained model

In [None]:
!pip install gdown
!gdown "https://drive.google.com/uc?id=1VX-Kg8yrFLgTGVLdVFuPYuE9lLecLmOb"

In [None]:
!unzip checkpoint.zip

In [17]:
!mv ./checkpoint ./Hands-On-SDM/SDM-UniPS/

# Now, we are going to take images with the camera and take your flashlight of your cellphone

### First: Write the name of your object

In [None]:
obj_name = input("Add object's name: ")

### If you are using COLAB you should run this code

In [None]:
from IPython.display import display, Javascript
from google.colab import output
import base64
import PIL.Image
import io
import time
import os
import numpy as np
js = """
async function takePhoto() {
  const div = document.createElement('div');
  const capture = document.createElement('button');
  capture.textContent = 'Capture';
  div.appendChild(capture);
  document.body.appendChild(div);

  const video = document.createElement('video');
  video.style.display = 'block';
  const stream = await navigator.mediaDevices.getUserMedia({video: true});
  video.srcObject = stream;
  video.play();
  div.appendChild(video);

  await new Promise((resolve) => capture.onclick = resolve);
  const canvas = document.createElement('canvas');
  canvas.width = video.videoWidth;
  canvas.height = video.videoHeight;
  canvas.getContext('2d').drawImage(video, 0, 0);
  stream.getTracks().forEach(track => track.stop());
  div.remove();
  return canvas.toDataURL('image/jpeg', 0.9);
}
"""
def take_photo():
    vista = input("Add view's number: ")
    dirdata = f"Hands-On-SDM/SDM-UniPS/data"
    !mkdir {dirdata}
    dir = f"Hands-On-SDM/SDM-UniPS/data/{obj_name}"
    !mkdir {dir}
    dir2 = f"{dir}/{vista}.data"
    !mkdir {dir2}
    for light in range(10):
      display(Javascript(js))
      data = output.eval_js('takePhoto()')
      img_data = base64.b64decode(data.split(",")[1])
      image = PIL.Image.open(io.BytesIO(img_data))

      filename = f"{dir2}/L ({light}).png"
      light += 1
      image.save(filename)
      print(f"Saved image as: {filename}")


take_photo()


# Generate Masks

In [None]:
!pip install transformers segmentation_refinement
!pip install git+https://github.com/facebookresearch/segment-anything.git
!pip install opencv-python pycocotools matplotlib
!pip install transformers
!pip install OpenEXR IMath

#### Now, We are going to generate masks to estimate normals

In [None]:
!wget -O sam_vit_b_01ec64.pth https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth

#### Generate mask is difficult with hsv in a real environment and *segment anything network* (SAM) is not sufficient for this task.

#### So, we are going to use a different approach to generate the mask.

#### We will use the *depth anything network* to generate the depth map, and in the depth image is easier to generate the mask with SAM

In [None]:
### import cv2
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
import torch
sam = sam_model_registry["vit_b"](checkpoint="sam_vit_b_01ec64.pth")
sam.to("cuda" if torch.cuda.is_available() else "cpu")
import numpy as np
import matplotlib.pyplot as plt
from transformers import pipeline
from PIL import Image
import torch
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
import cv2


# --------------------------------------------------
# DEPTH ANYTHING (transfomers pipeline)
# --------------------------------------------------
pipe = pipeline(
    task="depth-estimation",
    model="depth-anything/Depth-Anything-V2-Small-hf"
)

def get_depth(image_pil):
    depth_pil = pipe(image_pil)["depth"]  # PIL Image
    depth_np = np.array(depth_pil).astype(np.float32)
    print(f"[INFO] depth map:", depth_np.shape)
    return depth_np


# --------------------------------------------------
# INPUT
# --------------------------------------------------
vista = input("Add view's number to mask: ")

img_path = f"Hands-On-SDM/SDM-UniPS/data/{obj_name}/{vista}.data/L (6).png"
print("Files inside folder:")
!ls "Hands-On-SDM/SDM-UniPS/data/{obj_name}/{vista}.data/"

# --------------------------------------------------
# Load image
# --------------------------------------------------

img_rgb = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)

# --------------------------------------------------
# DEPTH
# --------------------------------------------------
depth_map = get_depth(Image.fromarray(img_rgb))

# Normalize Depth
depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
depth_uint8 = (depth_norm * 255).astype(np.uint8)
depth_rgb = cv2.cvtColor(depth_uint8, cv2.COLOR_GRAY2RGB)

# --------------------------------------------------
# LOAD SAM
# --------------------------------------------------
sam = sam_model_registry["vit_b"](checkpoint="sam_vit_b_01ec64.pth")
mask_generator = SamAutomaticMaskGenerator(
    sam,
    points_per_side=30,
    pred_iou_thresh=0.88,
    stability_score_thresh=0.92,
    min_mask_region_area=10
)

# --------------------------------------------------
# RUN SAM ON DEPTH MAP
# --------------------------------------------------
print("[INFO] Generating SAM masks on depth map...")
masks = mask_generator.generate(depth_rgb)
print(f"[INFO] Total masks: {len(masks)}")

# --------------------------------------------------
# SHOW MASKS
# --------------------------------------------------
mask_dict = {}
max_masks = min(len(masks), 24)
n_cols, n_rows = 3,8

fig, axes = plt.subplots(n_rows, n_cols, figsize=(16, 30))
axes = axes.flatten()

for idx in range(max_masks):
    m = masks[idx]
    mask_uint8 = (m['segmentation'].astype(np.uint8) * 255)

    display_img = depth_rgb.copy()
    display_img[mask_uint8 > 0] = [0, 255, 0]

    axes[idx].imshow(display_img)
    axes[idx].set_title(f"{idx}", fontsize=10)
    axes[idx].axis('off')

    mask_dict[idx] = mask_uint8

for ax in axes[max_masks:]:
    ax.axis("off")

plt.tight_layout()
plt.show()

# --------------------------------------------------
# SELECT ONE MASK AND SAVE
# --------------------------------------------------
selected_idx = int(input(f"Select SAM mask index (0-{max_masks-1}): "))
selected_mask = mask_dict[selected_idx]

mask_path = f"Hands-On-SDM/SDM-UniPS/data/{obj_name}/{vista}.data/mask.png"
cv2.imwrite(mask_path, selected_mask)
print(f"[INFO] Saved mask {selected_idx} → {mask_path}")


### Get Normals from SDM Uni-PS

In [None]:
!python Hands-On-SDM/SDM-UniPS/main.py \
    --session_name Hands-On-SDM/SDM-UniPS/results/{obj_name}  \
    --test_dir Hands-On-SDM/SDM-UniPS/data/{obj_name} \
    --checkpoint Hands-On-SDM/SDM-UniPS/checkpoint/ \
    --target normal_and_brdf

Show normals and base color

In [None]:
import cv2
import matplotlib.pyplot as plt

path_results = f'Hands-On-SDM/SDM-UniPS/results/{obj_name}/results/0.data'

color_base = f'{path_results}/baseColor.png'
color_rough = f'{path_results}/roughness.png'
color_metal = f'{path_results}/metallic.png'
normals = f'{path_results}/normal.png'

images = [color_base, color_rough, color_metal, normals]
titles = ['Base Color', 'Roughness', 'Metallic', 'Normals']

fig, ax = plt.subplots(2, 2, figsize=(12, 8))

for i, img_path in enumerate(images):
    img = cv2.imread(img_path)
    if img is None:
        print(f"Image not found: {img_path}")
        continue
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    row = i // 2
    col = i % 2
    ax[row, col].imshow(img)
    ax[row, col].set_title(titles[i])
    ax[row, col].axis('off')

plt.tight_layout()
plt.show()


We can also relight the scene

In [None]:
!python Hands-On-SDM/SDM-UniPS/relighting.py --datadir ./Hands-On-SDM/SDM-UniPS/results/{obj_name}/results/0.data

# Depth

### Depth Map Generation from a Normal Map

To generate a depth map from a normal map, there are several approaches. The most common method is the Frankot-Chellappa integration, although any consistent integration technique can work well.

In this example, we solve the problem using a **least-squares system** that estimates the depth $z(x,y)$ from the gradients derived from the normal map:

$$
p = \frac{\partial z}{\partial x} = -\frac{n_x}{n_z}, \quad
q = \frac{\partial z}{\partial y} = -\frac{n_y}{n_z}
$$

A sparse linear system is constructed, including local constraints for each valid pixel according to the object mask. For each pixel, constraints with its neighbors (top, bottom, left, and right) are imposed, resulting in an over-determined system $A z = b$.

Finally, the depth is obtained by solving the system using **sparse least squares** (`scipy.sparse.linalg.lsmr`). The solution is normalized so that the minimum depth is zero, and the mask is applied to set pixels outside the object to zero.


In [38]:
import torch
import numpy as np
import scipy.sparse
import scipy.sparse.linalg


def comp_depth_4edge_sparse(mask, normal):
    if isinstance(mask, torch.Tensor):
        mask = mask.cpu().numpy()
    if isinstance(normal, torch.Tensor):
        normal = normal.cpu().numpy()

    h, w = mask.shape
    n_pixels = h * w

    mask = (mask > 0.5).astype(np.float32)
    indices = lambda i, j: i * w + j

    nx = normal[:, :, 0].flatten()
    ny = normal[:, :, 1].flatten()
    nz = normal[:, :, 2].flatten() + 1e-8

    # take gradients
    p = -nx / nz
    q = -ny / nz

    rows = []
    cols = []
    data = []
    b = []

    for i in range(h):
        for j in range(w):
            if mask[i, j] == 0:
                continue
            idx = indices(i, j)

            # here we put restrictions on the gradients for the noise in normals
            if j < w - 1 and mask[i, j + 1] == 1:
                idx_r = idx + 1
                rows += [len(b), len(b)]
                cols += [idx, idx_r]
                data += [-1, 1]
                b.append(p[idx])

            if i < h - 1 and mask[i + 1, j] == 1:
                idx_d = idx + w
                rows += [len(b), len(b)]
                cols += [idx, idx_d]
                data += [-1, 1]
                b.append(q[idx])

            if j > 0 and mask[i, j - 1] == 1:
                idx_l = idx - 1
                rows += [len(b), len(b)]
                cols += [idx, idx_l]
                data += [1, -1]
                b.append(p[idx])

            if i > 0 and mask[i - 1, j] == 1:
                idx_u = idx - w
                rows += [len(b), len(b)]
                cols += [idx, idx_u]
                data += [1, -1]
                b.append(q[idx])

    A = scipy.sparse.coo_matrix((data, (rows, cols)), shape=(len(b), n_pixels))
    b = np.array(b)

    AtA = A.T @ A
    Atb = A.T @ b
    # x = scipy.sparse.linalg.spsolve(AtA, Atb)
    # We could solve it with LSMR or other solvers
    x = scipy.sparse.linalg.lsmr(A, b)[0]


    depth = x.reshape(h, w)
    depth -= depth.min()
    depth[mask == 0] = 0.0

    return depth

In [43]:
import sys
sys.path.append('/content/Hands-On-SDM')  # Ajusta según tu ruta
from utils import obj_functions as ob

def depth_process(normal, mask=None, albedo=None, path=None):
    if mask is None:
        mask = np.ones(normal[:, :, 0].shape, dtype=np.uint8) * 255
    normal[mask == 0] = [0.0, 0.0, 0.0]
    print(f"mask.shape in depth_process: {mask.shape}")
    normal[:, :, 1] = normal.copy()[:, :, 1] * -1

    # solve depth
    depth = comp_depth_4edge_sparse(mask, normal)

    if depth.ndim == 2:
        depth = depth[:, :, None]

    # Depth to 3D
    ver, tri = ob.Depth2VerTri(depth, mask)
    temp_albedo = albedo.astype(np.uint8)
    ob.save_as_ply(f"{path}/normals_3D.ply", depth, normal, temp_albedo, mask, tri)
    return depth

In [46]:
def normals2depth(path_results, path_mask):

    for vista in os.listdir(path_results):

        normal = plt.imread(os.path.join(path_results, vista, 'normal.png')).astype(np.float32)
        normal = normal[:, :, :3]  # if this have alpha
        normal = normal * 2.0 - 1.0  # normalize

        mask = cv2.imread(os.path.join(path_mask, vista, 'mask.png'), cv2.IMREAD_GRAYSCALE)
        color_img = cv2.imread(os.path.join(path_results, vista, 'baseColor.png'), cv2.IMREAD_UNCHANGED)

        normals = normal.copy()
        normals[mask == 0] = 0

        depth = depth_process(normals, mask, albedo=color_img, path=os.path.join(path_results, vista))

        plt.imshow(depth, cmap='gray')
        plt.show()



In [None]:
normals2depth(f'Hands-On-SDM/SDM-UniPS/results/{obj_name}/results', path_mask=f'Hands-On-SDM/SDM-UniPS/data/{obj_name}')


### Visualize reconstruction

In [None]:
!pip install trimesh pyglet
import trimesh
import os
ply_file = os.path.join(path_results, 'normals_3D.ply')
mesh = trimesh.load(ply_file)
mesh.show()
