# Project imports

In [1]:
"""
All needed imports included here
"""
%load_ext autoreload
%autoreload 2
from pathlib import Path
import numpy as np
import matplotlib as plt
import torch
import pytorch_lightning as pl

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


# Data Loading step

In [2]:
"""
Create data loaders and augmentations needed here
"""
from Data.ShapeNetDataLoader import ShapeNetVoxelData
from Utils.visualization import visualize_occupancy

overfit = False

shapenet_core_path = Path("Data/ShapeNetCoreVoxel32")
shapenet_splits_csv_path = Path("Data/shapenet_splits.csv")
voxel_filename = "model_3.binvox"
# load only models from some synsets
synset_id_filter = ["04379243"]  # tables
train_data = ShapeNetVoxelData(shapenet_core_path=shapenet_core_path, shapenet_splits_csv_path=shapenet_splits_csv_path, split="train", 
    overfit=overfit, synset_id_filter=synset_id_filter, voxel_filename=voxel_filename
)
print(f"Train Set Size: {len(train_data)}")
val_data = ShapeNetVoxelData(shapenet_core_path=shapenet_core_path, shapenet_splits_csv_path=shapenet_splits_csv_path, split="val",
    overfit=overfit, synset_id_filter=synset_id_filter, voxel_filename=voxel_filename
)
print(f"Validation Set Size: {len(val_data)}")
test_data = ShapeNetVoxelData(shapenet_core_path=shapenet_core_path, shapenet_splits_csv_path=shapenet_splits_csv_path, split="test",
    overfit=overfit, synset_id_filter=synset_id_filter, voxel_filename=voxel_filename
)
print(f"Test Set Size: {len(test_data)}")

train_sample = train_data[0]
print(f'Voxel Dimensions: {train_sample.shape}')

visualize_occupancy(train_sample.squeeze(), flip_axes=True)

Train Set Size: 1368
Validation Set Size: 236
Test Set Size: 383
Voxel Dimensions: (1, 32, 32, 32)


Output()

# Create Autoencoder

In [3]:
#%env CUDA_LAUNCH_BLOCKING=1
"""
AutoEncoder Models and/or different techniques used to encode the mesh to a smaller dimensions
"""
from Networks.VoxelAutoencoder import VoxelAutoencoder
from pytorch_lightning.loggers.tensorboard import TensorBoardLogger
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

# lower kl_divergence_scale -> smoother latent space but worse reconstruction
# however a higher kl_divergence_scale causes too much overlap between latent space distributions which is impractical for retrieval
kl_divergence_scale=0.05
latent_dim = 64 # tried 128 which gives slightly better reconstructions, but worse retrieval
model = VoxelAutoencoder(train_data, val_data, test_data, device, kl_divergence_scale=kl_divergence_scale, latent_dim=latent_dim)

logger = TensorBoardLogger("tb_logs", name="my_model")
model_checkpoint = ModelCheckpoint(
    monitor="val_loss",
    dirpath="Assets/Models/VoxelAutoencoder/",
    filename="voxel-autoencoder-02-{epoch:0004d}-{val_loss:.4f}",
    save_top_k=3,
    every_n_epochs=8,
    mode="min",
)
tqdm_progess_bar = TQDMProgressBar(refresh_rate=1)
early_stopping = EarlyStopping(monitor="val_loss", patience=32, mode="min")
trainer = pl.Trainer(
    max_epochs=1024,
    gpus=1 if torch.cuda.is_available() else None,
    log_every_n_steps=1,
    logger=logger,
    callbacks=[model_checkpoint, tqdm_progess_bar, early_stopping],
    profiler="simple"
)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


# Training

In [None]:
trainer.fit(model)

# Visualize Reconstruction

In [4]:
best_model_path = "D:/Models/VoxelAutoencoder/voxel-autoencoder-01-epoch=0279-val_loss=0.0346.ckpt"
# best_model_path = model_checkpoint.best_model_path  # TODO: uncomment this line if you retrain
model = VoxelAutoencoder.load_from_checkpoint(
    best_model_path, train_set=train_data, val_set=val_data, test_set=test_data, device=device, kl_divergence_scale=kl_divergence_scale, latent_dim=latent_dim
)
model.eval()


VoxelAutoencoder(
  (_model): Network(
    (_encoder): Sequential(
      (0): Conv3d(1, 8, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
      (1): BatchNorm3d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ELU(alpha=1.0)
      (3): Conv3d(8, 16, kernel_size=(4, 4, 4), stride=(2, 2, 2), padding=(1, 1, 1))
      (4): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ELU(alpha=1.0)
      (6): Conv3d(16, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
      (7): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (8): ELU(alpha=1.0)
      (9): Conv3d(32, 64, kernel_size=(4, 4, 4), stride=(2, 2, 2), padding=(1, 1, 1))
      (10): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (11): ELU(alpha=1.0)
      (12): Flatten(start_dim=1, end_dim=-1)
      (13): Linear(in_features=32768, out_features=512, bias=True)
      (14):

In [5]:


# visualize reconstruction
train_test_sample = train_data[1]

visualize_occupancy(train_test_sample.squeeze(), flip_axes=True)

sample_tensor = torch.from_numpy(train_test_sample[np.newaxis, :])


with torch.no_grad():
    decoded_test = model(sample_tensor)
    print(model.encode(sample_tensor)[0].shape)

tmp_decoded = decoded_test.clone()
tmp_decoded[decoded_test<0.5] = 0
tmp_decoded[decoded_test>=0.5] = 1

decoded_test_np = tmp_decoded.squeeze().detach().numpy()

visualize_occupancy(decoded_test_np, flip_axes=True)

Output()

torch.Size([64])


Output()

# compute latent vectors of training samples

In [6]:
latent_vectors = {}
model.eval()
with torch.no_grad():
    for i, train_sample in enumerate(train_data):
        sample_tensor = torch.from_numpy(train_sample[np.newaxis, :])
        vec = torch.zeros(64)
        for i in range(4):
            vec += model.encode(sample_tensor)[0]
        vec /= 4
        latent_vectors[vec] = (train_sample, train_data._model_paths[i])

In [11]:
# compute latent vector of test sample
test_sample = train_data[40]
model.eval()
with torch.no_grad():
    # result is stochastic -> can be run multiple times to get different results
    sample_tensor = torch.from_numpy(test_sample[np.newaxis, :])
    test_latent_vector = torch.zeros(64)
    for i in range(4):
        test_latent_vector += model.encode(sample_tensor)[0]
    test_latent_vector /= 4

print("Test sample:")
visualize_occupancy(test_sample.squeeze(), flip_axes=True)  

# find closest latent vector
min_distance = float('inf')
best_voxel_match_0 = None
best_match_path = None
best_voxel_match_1 = None
best_voxel_match_2 = None
for train_latent_vector, data in latent_vectors.items():
    train_voxel, train_path = data
    distance = torch.dist(test_latent_vector, train_latent_vector)
    if (distance < min_distance):
        min_distance = distance
        best_voxel_match_2 = best_voxel_match_1
        best_voxel_match_1 = best_voxel_match_0
        best_voxel_match_0 = train_voxel
        best_match_path = train_path

print(f"Best match path: {best_match_path}")
print("Retrieved object 1:")
visualize_occupancy(best_voxel_match_0.squeeze(), flip_axes=True)
print("Retrieved object 2:")
visualize_occupancy(best_voxel_match_1.squeeze(), flip_axes=True)
print("Retrieved object 3:") 
visualize_occupancy(best_voxel_match_2.squeeze(), flip_axes=True)   

Test sample:


Output()

Best match path: Data\ShapeNetCoreVoxel32\04379243\1d43a3a22ee451e62511fca00e0288b\model_3.binvox
Retrieved object 1:


Output()

Retrieved object 2:


# Test retrieval using reconstructed point clouds

In [6]:
val_latent_vectors = {}
model.eval()
with torch.no_grad():
    for i, train_sample in enumerate(val_data):
        sample_tensor = torch.from_numpy(train_sample[np.newaxis, :])
        vec = torch.zeros(64)
        for i in range(4):
            vec += model.encode(sample_tensor)[0]
        vec /= 4
        val_latent_vectors[vec] = (train_sample, val_data._model_paths[i])

In [20]:
from Networks.obj2pointcloud import *
from Networks.pointclouddataset import *
from Networks.mesh2mesh import *
from pyntcloud import PyntCloud
import pandas as pd

model.double()
def point2vox(points,dims=(32,32,32)):
    w,h,d = dims
    points = pd.DataFrame(points, columns=['x','y','z'])
    cloud = PyntCloud(points)
    voxelgrid_id = cloud.add_structure("voxelgrid", n_x=w, n_y=h, n_z=d)
    voxelgrid = cloud.structures[voxelgrid_id]
    x_cords = voxelgrid.voxel_x
    y_cords = voxelgrid.voxel_y
    z_cords = voxelgrid.voxel_z
    voxel = np.zeros((w, h, d))
    for x, y, z in zip(x_cords, y_cords, z_cords):
        voxel[x][y][z] = np.float32(1.0)
    return voxel

encoder = PointNetEncoder(numpoints=1000)
encoder.load_state_dict(torch.load(f'./models/runs/pcd_stage2/encoder_stage2_best.pth'))
encoder.eval()
decoder = PointNetDecoder(numpoints=4000)
decoder.load_state_dict(torch.load(f'./models/runs/pcd_stage1/decoder_best.pth'))
decoder.eval()
val_dataset = retrieval_dataset_voxel(split='val')
val_dataloader = torch.utils.data.DataLoader(
        val_dataset,     # Datasets return data one sample at a time; Dataloaders use them and aggregate samples into batches
        batch_size=1,   # The size of batches is defined here
        shuffle=False,   # During validation, shuffling is not necessary anymore
        num_workers=4,   # Data is usually loaded in parallel by num_workers
        pin_memory=True  # This is an implementation detail to speed up data uploading to the GPU
    )  

correct_count = 0
with torch.no_grad():
    for i, batch in enumerate(val_dataloader):
        input = batch['partial'].permute(0,2,1)
        recon = decoder(encoder(input))
        out = recon.permute(0,2,1).detach().numpy()

        voxels = point2vox(out[0])

        voxels = voxels[np.newaxis, :]
        
        print(voxels.shape)
        # encode voxels
        sample_tensor = torch.from_numpy(voxels[np.newaxis, :])
        test_latent_vector = torch.zeros(64)
        for x in range(2):
            test_latent_vector += model.encode(sample_tensor)[0]
        test_latent_vector /= 2

        best_match_path: str
        best_voxel_match_0 = None
        min_distance = float('inf')
        for val_latent_vector, data in val_latent_vectors.items():
            train_voxel, train_path = data
            distance = torch.dist(test_latent_vector, val_latent_vector)
            if (distance < min_distance):
                min_distance = distance
                best_voxel_match_0 = train_voxel
                best_match_path = train_path
        print(str(best_match_path).split("\\")[3])
        print(str(batch["full"][0]).split("/")[3])
        if (str(best_match_path).split("\\")[3] == str(batch["full"][0]).split("/")[3]):
            print("x")
            correct_count += 1

       # visualize_occupancy(voxels.squeeze(), flip_axes=True)
        
        print(i)

print(correct_count)


(1, 32, 32, 32)
0
(1, 32, 32, 32)
1
(1, 32, 32, 32)
2
(1, 32, 32, 32)
3
(1, 32, 32, 32)
4
(1, 32, 32, 32)
5
(1, 32, 32, 32)
6
(1, 32, 32, 32)
7
(1, 32, 32, 32)
8
(1, 32, 32, 32)
9
(1, 32, 32, 32)
10
(1, 32, 32, 32)
11
(1, 32, 32, 32)
12
(1, 32, 32, 32)
13
(1, 32, 32, 32)
14
(1, 32, 32, 32)
15
(1, 32, 32, 32)
16
(1, 32, 32, 32)
17
(1, 32, 32, 32)
18
(1, 32, 32, 32)
19
(1, 32, 32, 32)
20
(1, 32, 32, 32)
21
(1, 32, 32, 32)
22
(1, 32, 32, 32)
23
(1, 32, 32, 32)
24
(1, 32, 32, 32)
25
(1, 32, 32, 32)
26
(1, 32, 32, 32)
27
(1, 32, 32, 32)
28
(1, 32, 32, 32)
29
(1, 32, 32, 32)
30
(1, 32, 32, 32)
31
(1, 32, 32, 32)
32
(1, 32, 32, 32)
33
(1, 32, 32, 32)
34
(1, 32, 32, 32)
35
(1, 32, 32, 32)
36
(1, 32, 32, 32)
37
(1, 32, 32, 32)
38
(1, 32, 32, 32)
39
(1, 32, 32, 32)
40
(1, 32, 32, 32)
41
(1, 32, 32, 32)
42
(1, 32, 32, 32)
43
(1, 32, 32, 32)
44
(1, 32, 32, 32)
45
(1, 32, 32, 32)
46
(1, 32, 32, 32)
47
(1, 32, 32, 32)
48
(1, 32, 32, 32)
49
(1, 32, 32, 32)
50
(1, 32, 32, 32)
51
(1, 32, 32, 32)
52
(1,

KeyboardInterrupt: 