# Import

In [1]:
import cv2
import torch
import numpy as np
from moge.model.v2 import MoGeModel
import os

# Setup

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


# Load the model from huggingface hub

In [3]:


print("Loading MoGe-2 model...")
model = MoGeModel.from_pretrained("Ruicheng/moge-2-vitl-normal").to(device)
print("Model loaded successfully!")


Loading MoGe-2 model...
Model loaded successfully!



# Input and output paths

In [4]:

input_path = "/root/moge_work/images/ test101.png"  # Change this to your image path
output_dir = "output"
os.makedirs(output_dir, exist_ok=True)


# Read the input image and convert to tensor (3, H, W) with RGB values normalized to [0, 1]

when you write imread the cv2.imread() takes image in BGR format but most of the model wants RGB format if you dont do it then color will be swapped 

A red apple in BGR: [0, 0, 255] (high blue value)
Same apple in RGB: [255, 0, 0] (high red value)

for the second line 
The Doc said to normalize them and use permute like
* PyTorch CNNs expect channel-first format: (C, H, W) 
* OpenCV/PIL use channel-last format: (H, W, C)

In [5]:
print(f"Reading image from {input_path}...")
input_image_bgr = cv2.imread(input_path)
if input_image_bgr is None:
    raise ValueError(f"Could not read image at {input_path}")

input_image_rgb = cv2.cvtColor(input_image_bgr, cv2.COLOR_BGR2RGB)
input_image = torch.tensor(input_image_rgb / 255, dtype=torch.float32, device=device).permute(2, 0, 1)


print(f"Image shape: {input_image.shape}")

Reading image from /root/moge_work/images/ test101.png...
Image shape: torch.Size([3, 1070, 722])



# Infer
The part where model does it work

In [17]:
print("Running inference...")
with torch.no_grad():  # Save memory
    output = model.infer(input_image)


print("Inference complete!")
print(f"Output keys: {output.keys()}")
# print(f"just output {output['normal']}")

Running inference...
Inference complete!
Output keys: dict_keys(['points', 'intrinsics', 'depth', 'mask', 'normal'])


In [8]:

# Save outputs
base_name = os.path.splitext(os.path.basename(input_path))[0]

## 1. Save depth map

Distance from the camera to each pixel in the scene


In [21]:
print(f"Depth Shape (H,W) {output['depth'].shape}")

Depth Shape (H,W) torch.Size([1070, 722])


In [None]:
depth = output["depth"].cpu().numpy() # inorder to process with numpy you need to move the tensor to cpu first else you will get an error
depth_normalized = ((depth - depth.min()) / (depth.max() - depth.min()) * 255).astype(np.uint8) # .astype(np.uint8): Converts to 8-bit integers for image format
depth_colored = cv2.applyColorMap(depth_normalized, cv2.COLORMAP_INFERNO)
cv2.imwrite(f"{output_dir}/{base_name}_depth.png", depth_colored)
print(f"Saved depth map to {output_dir}/{base_name}_depth.png")


Saved depth map to output/ test101_depth.png


  depth_normalized = ((depth - depth.min()) / (depth.max() - depth.min()) * 255).astype(np.uint8) # .astype(np.uint8): Converts to 8-bit integers for image format
  depth_normalized = ((depth - depth.min()) / (depth.max() - depth.min()) * 255).astype(np.uint8) # .astype(np.uint8): Converts to 8-bit integers for image format


## 2. Save normal map

In [25]:
if "normal" in output:
    normal = output["normal"].cpu().numpy()
    # Convert from [-1, 1] to [0, 255]
    normal_vis = ((normal + 1) / 2 * 255).astype(np.uint8)
    normal_vis_bgr = cv2.cvtColor(normal_vis, cv2.COLOR_RGB2BGR)
    cv2.imwrite(f"{output_dir}/{base_name}_normal.png", normal_vis_bgr)
    print(f"Saved normal map to {output_dir}/{base_name}_normal.png")

Saved normal map to output/ test101_normal.png


Surface orientation (which direction the surface is facing) at each pixel

its actually in range [1 , -1] so cahanged it to 0 to 255 for img png 

Shape: (Height, Width, 3) - 3D unit vector per pixel
Coordinate System: Same as points (OpenCV camera coords)
Values: Unit vectors (length = 1) in range [-1, 1] for each component

Points perpendicular to the surface
Used to understand surface orientation


Usage:

Relighting
Surface analysis
Better 3D reconstruction
Material/texture estimation


Red channel: x-direction (left-right)
Green channel: y-direction (up-down)
Blue channel: z-direction (forward-backward)

In [35]:
print(f"just output of normal {output['normal'].shape}")
print(f"just output of a random point {output['normal'][289][277]}")

just output of normal torch.Size([1070, 722, 3])
just output of a random point tensor([-0.3092,  0.0696, -0.9484], device='cuda:0')


## 3. Save mask

In [None]:
mask = output["mask"].cpu().numpy()
mask_vis = (mask * 255).astype(np.uint8) # It was in [0,1], convert to [0,255]
cv2.imwrite(f"{output_dir}/{base_name}_mask.png", mask_vis)
print(f"Saved mask to {output_dir}/{base_name}_mask.png")

Saved mask to output/ test101_mask.png


In [38]:
print(f"Visualization of mask shape: {mask_vis.shape}")

print(f"see a random point of mask: {mask_vis[289][277]}")

Visualization of mask shape: (1070, 722)
see a random point of mask: 255


# 4. Save point cloud as PLY

In [45]:
points = output["points"].cpu().numpy()
mask_np = mask.astype(bool)



### Get valid points and colors

In [46]:
valid_points = points[mask_np]
valid_colors = input_image_rgb[mask_np]

# Write PLY file


### **Complete PLY File Structure:**
```
ply                           ← File type
format ascii 1.0              ← Text format
element vertex 514136         ← Number of points
property float x              ← Property definitions
property float y
property float z
property uchar red
property uchar green
property uchar blue
end_header                    ← End of header
1.234 -0.567 3.890 255 128 64 ← Point 1: position + color
0.123 0.456 2.789 120 200 50  ← Point 2: position + color
-0.987 1.234 5.678 30 60 90   ← Point 3: position + color
...                           ← 514,133 more lines

In [47]:
ply_path = f"{output_dir}/{base_name}_pointcloud.ply"
with open(ply_path, 'w') as f:
    f.write("ply\n")
    f.write("format ascii 1.0\n")
    f.write(f"element vertex {len(valid_points)}\n")
    f.write("property float x\n")
    f.write("property float y\n")
    f.write("property float z\n")
    f.write("property uchar red\n")
    f.write("property uchar green\n")
    f.write("property uchar blue\n")
    f.write("end_header\n")
    for point, color in zip(valid_points, valid_colors):
        f.write(f"{point[0]} {point[1]} {point[2]} {color[0]} {color[1]} {color[2]}\n")

print(f"Saved point cloud to {ply_path}")


Saved point cloud to output/ test101_pointcloud.ply


  x y z red green blue <br>
   1.2 0.5 3.4 255 128 64


# 5. Print camera intrinsics


- **What it is**: Camera parameters that describe how 3D points project to 2D pixels
- **Shape**: `(3, 3)` matrix
- **Format**: Standard camera intrinsic matrix:
```
[[fx,  0, cx],
 [ 0, fy, cy],
 [ 0,  0,  1]]

fx, fy: Focal lengths (in pixels)
cx, cy: Principal point (usually image center)
Usage:

Converting between 3D coordinates and 2D pixels
Camera calibration
3D reconstruction pipelines
Computing field of view (FOV)

In [39]:
intrinsics = output["intrinsics"].cpu().numpy()
print("\nCamera Intrinsics:")
print(intrinsics)

print("\n✅ All outputs saved successfully!")
print(f"Point map shape: {output['points'].shape}")
print(f"Depth map shape: {output['depth'].shape}")
print(f"Normal map shape: {output['normal'].shape if 'normal' in output else 'N/A'}")


Camera Intrinsics:
[[4.409797 0.       0.5     ]
 [0.       2.975583 0.5     ]
 [0.       0.       1.      ]]

✅ All outputs saved successfully!
Point map shape: torch.Size([1070, 722, 3])
Depth map shape: torch.Size([1070, 722])
Normal map shape: torch.Size([1070, 722, 3])
