# Model Evaluator
Use This Notebook to evaluate model runs.

In [1]:
import torch
import torch.nn as nn
import open3d as o3d
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
class VoxelAutoEncoder(nn.Module):
    """
    A simple 3D CNN-based encoder-decoder model.
    Input/Output shape (B, 1, 64, 128, 64).
    """
    def __init__(self):
        super(VoxelAutoEncoder, self).__init__()

        # Encoder path
        self.enc1 = self.double_conv(1, 32)  # Input channels = 1, output channels = 32
        self.enc2 = self.double_conv(32, 64)
        self.enc3 = self.double_conv(64, 128)
        self.enc4 = self.double_conv(128, 256)
        
        # Downsampling layers
        self.pool = nn.MaxPool3d(2)
        
        # Bottleneck
        self.bottleneck = self.double_conv(256, 512)
        
        # Decoder path
        self.upconv4 = nn.ConvTranspose3d(512, 256, kernel_size=2, stride=2)
        self.dec4 = self.double_conv(512, 256)  # Concatenate + Conv
        self.upconv3 = nn.ConvTranspose3d(256, 128, kernel_size=2, stride=2)
        self.dec3 = self.double_conv(256, 128)
        self.upconv2 = nn.ConvTranspose3d(128, 64, kernel_size=2, stride=2)
        self.dec2 = self.double_conv(128, 64)
        self.upconv1 = nn.ConvTranspose3d(64, 32, kernel_size=2, stride=2)
        self.dec1 = self.double_conv(64, 32)
        
        # Final output layer
        self.final = nn.Conv3d(32, 1, kernel_size=1)  # Output channels = 1 (filled voxel volume)

        # Sigmoid
        self.sigmoid = nn.Sigmoid()

    def double_conv(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv3d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm3d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv3d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm3d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        # Encoder
        enc1 = self.enc1(x)  # Output: (32, 64, 128, 64)
        enc2 = self.enc2(self.pool(enc1))  # Output: (64, 32, 64, 32)
        enc3 = self.enc3(self.pool(enc2))  # Output: (128, 16, 32, 16)
        enc4 = self.enc4(self.pool(enc3))  # Output: (256, 8, 16, 8)
        
        # Bottleneck
        bottleneck = self.bottleneck(self.pool(enc4))  # Output: (512, 4, 8, 4)
        
        # Decoder
        dec4 = self.upconv4(bottleneck)  # Output: (256, 8, 16, 8)
        dec4 = self.dec4(torch.cat((dec4, enc4), dim=1))  # Skip connection
        
        dec3 = self.upconv3(dec4)  # Output: (128, 16, 32, 16)
        dec3 = self.dec3(torch.cat((dec3, enc3), dim=1))  # Skip connection
        
        dec2 = self.upconv2(dec3)  # Output: (64, 32, 64, 32)
        dec2 = self.dec2(torch.cat((dec2, enc2), dim=1))  # Skip connection
        
        dec1 = self.upconv1(dec2)  # Output: (32, 64, 128, 64)
        dec1 = self.dec1(torch.cat((dec1, enc1), dim=1))  # Skip connection
        
        # Final output layer
        out = self.final(dec1)  # Output: (1, 64, 128, 64)
        
        return self.sigmoid(out)

In [3]:
"""
Returns the tensor given a point cloud.
Also uses min and max bound to avoid empty space in the tensor/pcd
copied from: alex_ml_model_experiments_voxel_grid notebook dataset class
"""
def get_3d_tensor_from_pcd(pcd):
        points = np.asarray(pcd.points)
        min_bound = np.min(points, axis=0)
        max_bound = np.max(points, axis=0)
        grid_size = 32 # TODO IN PARAMS
        voxel_size = (max_bound - min_bound) / grid_size
        
        normalized_points = (points - min_bound) / voxel_size
        grid_points = np.floor(normalized_points).astype(int)
        grid_points = np.clip(grid_points, 0, grid_size - 1)
        grid_tensor = torch.zeros((grid_size, grid_size, grid_size), dtype=torch.int32)
        for point in grid_points:
            grid_tensor[tuple(point)] = 1
        return grid_tensor.float()

def visualize_3d_tensor(voxel_tensor, threshold=0.5):
    normalized_tensor = torch.where(voxel_tensor > threshold, 1, 0)
    occupied_indices = np.argwhere(normalized_tensor.numpy() > 0)
    point_cloud = o3d.geometry.PointCloud()
    point_cloud.points = o3d.utility.Vector3dVector(occupied_indices)
    o3d.visualization.draw_geometries([point_cloud], width=500, height=500)

"""
Visualize the results of a PCD using a given model
"""
def predict_and_visualize(input_pcd_path, truth_pcd_path, model, threshold=0.5):
    input_pcd = o3d.io.read_point_cloud(input_pcd_path)
    truth_pcd = o3d.io.read_point_cloud(truth_pcd_path)
    input_tensor = get_3d_tensor_from_pcd(input_pcd).to(device)
    if truth_pcd_path != "":
        truth_tensor = get_3d_tensor_from_pcd(truth_pcd).to(device)
    
    model.eval()
    with torch.no_grad():
        input_tensor = input_tensor.unsqueeze(0).unsqueeze(0) # Add batch dimension + channel
        reconstructed_tensor = model(input_tensor)
        # Visualize
        voxel_tensor = input_tensor.squeeze(0).squeeze(0).cpu()
        visualize_3d_tensor(voxel_tensor, threshold)
        voxel_tensor = reconstructed_tensor.squeeze(0).squeeze(0).cpu()
        visualize_3d_tensor(voxel_tensor, threshold)
        #print(truth_tensor.shape)
        if truth_tensor is not None:
            visualize_3d_tensor(truth_tensor.cpu())
        

In [7]:
# If the whole model was saved
#model = torch.load("../assets/model_exports/model_epoch_9.pth")
#model = torch.load("../models/voxel_weights_6.pth")
#model = model.to(device)

# If only the state dict was saved
model = VoxelAutoEncoder()
state_dict = torch.load("../models/voxel_weights_6.pth")
model.load_state_dict(state_dict)

  state_dict = torch.load("../models/voxel_weights_6.pth")


<All keys matched successfully>

In [8]:
#torch.save(model.state_dict(), f'./state_dict_alex_model_v2.pth')

In [9]:
input_path = "../assets/voxel32/4_cut.ply"
full_path = "../assets/voxel32/4_full.ply"
predict_and_visualize(input_path, full_path, model, threshold=0.2)

#input_path = "../assets/unclean-estimated-pcds/pc_generator_frame_50.ply"
#full_path = ""
#predict_and_visualize(input_path, full_path, model, threshold=0.2)

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same