# Project imports

In [67]:
"""
All needed imports included here
"""
%load_ext autoreload
%autoreload 2
from pathlib import Path
import numpy as np
import matplotlib as plt
import torch
import pytorch_lightning as pl

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
cuda


# Data Loading step

In [68]:
"""
Create data loaders and augmentations needed here
"""
from Data.ShapeNetDataLoader import ShapeNetVoxelData
from Utils.visualization import visualize_occupancy

overfit = False

shapenet_core_path = Path("D:\ShapeNetCoreVoxel32")
shapenet_splits_csv_path = Path("Data/shapenet_splits.csv")
voxel_filename = "model_3.binvox"
# load only models from some synsets
synset_id_filter = ["04379243"]  # tables
train_data = ShapeNetVoxelData(shapenet_core_path=shapenet_core_path, shapenet_splits_csv_path=shapenet_splits_csv_path, split="train", 
    overfit=overfit, synset_id_filter=synset_id_filter, voxel_filename=voxel_filename
)
print(f"Train Set Size: {len(train_data)}")
val_data = ShapeNetVoxelData(shapenet_core_path=shapenet_core_path, shapenet_splits_csv_path=shapenet_splits_csv_path, split="val",
    overfit=overfit, synset_id_filter=synset_id_filter, voxel_filename=voxel_filename
)
print(f"Validation Set Size: {len(val_data)}")
test_data = ShapeNetVoxelData(shapenet_core_path=shapenet_core_path, shapenet_splits_csv_path=shapenet_splits_csv_path, split="test",
    overfit=overfit, synset_id_filter=synset_id_filter, voxel_filename=voxel_filename
)
print(f"Test Set Size: {len(test_data)}")

train_sample = train_data[0]
print(f'Voxel Dimensions: {train_sample.shape}')

visualize_occupancy(train_sample.squeeze(), flip_axes=True)

Train Set Size: 1368
Validation Set Size: 236
Test Set Size: 383
Voxel Dimensions: (1, 32, 32, 32)


Output()

# Reconstruction Networks

# Purifying predicted Meshes

In [70]:
"""
Code to purify meshes predicted by the previous networks to be used in the retrieval step
"""

'\nCode to purify meshes predicted by the previous networks to be used in the retrieval step\n'

# Mesh Encoding

In [69]:
#%env CUDA_LAUNCH_BLOCKING=1
"""
AutoEncoder Models and/or different techniques used to encode the mesh to a smaller dimensions
"""
from Networks.VoxelAutoencoder import VoxelAutoencoder
from pytorch_lightning.loggers.tensorboard import TensorBoardLogger
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

# lower kl_divergence_scale -> smoother latent space but worse reconstruction
# however a higher kl_divergence_scale causes too much overlap between latent space distributions which is impractical for retrieval
kl_divergence_scale=0.05
latent_dim = 64 # tried 128 which gives slightly better reconstructions, but worse retrieval
model = VoxelAutoencoder(train_data, val_data, test_data, device, kl_divergence_scale=kl_divergence_scale, latent_dim=latent_dim)

logger = TensorBoardLogger("tb_logs", name="my_model")
model_checkpoint = ModelCheckpoint(
    monitor="val_loss",
    dirpath="D:/Models/VoxelAutoencoder/",
    filename="voxel-autoencoder-01-{epoch:0004d}-{val_loss:.4f}",
    save_top_k=3,
    every_n_epochs=8,
    mode="min",
)
tqdm_progess_bar = TQDMProgressBar(refresh_rate=1)
early_stopping = EarlyStopping(monitor="val_loss", patience=32, mode="min")
trainer = pl.Trainer(
    max_epochs=1024,
    gpus=1 if torch.cuda.is_available() else None,
    log_every_n_steps=1,
    logger=logger,
    callbacks=[model_checkpoint, tqdm_progess_bar, early_stopping],
    profiler="simple"
)

trainer.fit(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type    | Params
-----------------------------------
0 | _model | Network | 34.1 M
-----------------------------------
34.1 M    Trainable params
0         Non-trainable params
34.1 M    Total params
136.242   Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 28:  12%|█▏        | 3/26 [2:22:04<18:09:15, 2841.56s/it, loss=0.0603, v_num=257]
Epoch 293: 100%|██████████| 26/26 [00:07<00:00,  3.58it/s, loss=0.034, v_num=258] 


FIT Profiler Report

Action                             	|  Mean duration (s)	|Num calls      	|  Total time (s) 	|  Percentage %   	|
----------------------------------------------------------------------------------------------------------------------------------------
Total                              	|  -              	|_              	|  2253.2         	|  100 %          	|
----------------------------------------------------------------------------------------------------------------------------------------
run_training_epoch                 	|  7.6579         	|294            	|  2251.4         	|  99.92          	|
run_training_batch                 	|  0.25499        	|6468           	|  1649.3         	|  73.195         	|
optimizer_step_with_closure_0      	|  0.12885        	|6468           	|  833.39         	|  36.986         	|
training_step_and_backward         	|  0.11862        	|6468           	|  767.22         	|  34.05          	|
model_forward                  

In [71]:
# visualize reconstruction
train_test_sample = train_data[1]

visualize_occupancy(train_test_sample.squeeze(), flip_axes=True)

sample_tensor = torch.from_numpy(train_test_sample[np.newaxis, :])

model.eval()
with torch.no_grad():
    decoded_test = model(sample_tensor)
    print(model.encode(sample_tensor)[0].shape)

tmp_decoded = decoded_test.clone()
tmp_decoded[decoded_test<0.5] = 0
tmp_decoded[decoded_test>=0.5] = 1

decoded_test_np = tmp_decoded.squeeze().detach().numpy()

visualize_occupancy(decoded_test_np, flip_axes=True)

Output()

torch.Size([64])


Output()

In [106]:
# compute latent vectors of training samples
latent_vectors = {}
model.eval()
with torch.no_grad():
    for train_sample in train_data:
        sample_tensor = torch.from_numpy(train_sample[np.newaxis, :])
        vec = torch.zeros(64)
        for i in range(2):
            vec += model.encode(sample_tensor)[0]
        vec /= 2
        latent_vectors[vec] = train_sample

for latent_vector in latent_vectors.items():
    print(latent_vector)
    break

(tensor([ 0.1903, -0.6447, -0.1866, -0.5829, -0.1963,  0.2412,  0.1207,  0.0647,
        -0.5527, -0.5833, -0.3085, -0.1395,  1.3158,  0.1276, -0.0217, -0.2293,
        -0.1428,  2.3342, -1.1617,  0.0449,  0.7585, -1.4124,  0.4898, -0.0788,
         0.1833, -0.9383,  0.6033,  0.6963, -0.5918,  0.4943,  1.0437, -0.3674,
         0.4529, -1.0308, -1.2869,  0.7310, -1.3705, -0.1673,  0.9079, -0.5481,
        -0.1301,  0.1290, -1.0288, -0.8064, -0.4644,  0.6228, -0.7107,  0.1057,
        -0.7725, -0.8885, -0.1140, -0.0827, -0.5144, -1.1386,  1.0071, -0.0679,
         1.8071, -0.5854, -1.1766, -0.7938, -0.2865, -0.3643,  0.5412, -0.6091]), array([[[[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]],

        [[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0.,

In [113]:
# compute latent vector of test sample
test_sample = test_data[42]
model.eval()
with torch.no_grad():
    # result is stochastic -> can be run multiple times to get different results
    sample_tensor = torch.from_numpy(test_sample[np.newaxis, :])
    test_latent_vector = torch.zeros(64)
    for i in range(2):
        test_latent_vector += model.encode(sample_tensor)[0]
    test_latent_vector /= 2

print("Test sample:")
visualize_occupancy(test_sample.squeeze(), flip_axes=True)  

# find closest latent vector
min_distance = float('inf')
best_voxel_match_0 = None
best_voxel_match_1 = None
best_voxel_match_2 = None
for train_latent_vector, train_voxel in latent_vectors.items():
    distance = torch.dist(test_latent_vector, train_latent_vector)
    if (distance < min_distance):
        min_distance = distance
        best_voxel_match_2 = best_voxel_match_1
        best_voxel_match_1 = best_voxel_match_0
        best_voxel_match_0 = train_voxel

print("Retrieved object 1:")
visualize_occupancy(best_voxel_match_0.squeeze(), flip_axes=True)
print("Retrieved object 2:")
visualize_occupancy(best_voxel_match_1.squeeze(), flip_axes=True)
print("Retrieved object 3:") 
visualize_occupancy(best_voxel_match_2.squeeze(), flip_axes=True)   

Test sample:


Output()

Retrieved object 1:


Output()

Retrieved object 2:


Output()

Retrieved object 3:


Output()

# Mesh Retreival Networks

In [None]:
"""
Models/Techniques to use the previous encoding steps to retreive objects from a specified database
"""
# TODO: store latent vector of all shapenet models
# TODO: encode voxel and find obj that has the closest latent vector

# Inference and Full Testing

In [None]:
"""
Testing the entire pipeline implemented with added visualizations and discussions.
"""

# Citations

[1].....