In [1]:
import numpy as np
import time
import matplotlib.pyplot as plt
%matplotlib inline
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import open3d as o3d
import os
from torch.utils.data import Dataset, DataLoader
from dataclasses import dataclass
import sys
from pathlib import Path
from pytorch3d.loss import chamfer_distance


sys.path.append(str(Path.cwd().parent))

from Helpers.data import PointCloudDataset
import Helpers.PointCloudOpen3d as pc

if torch.cuda.is_available():
    device = "cuda"

elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

print(f'Using: {device}')

Using: cuda


In [None]:
names = {}
for root, _, files in os.walk("../Data/ModelNet40"):
    for file in files:
        if file.endswith('.off'):
            full_path = os.path.join(root, file)
            if f'train' in full_path:
                name = file.split('_')[0]
                if name in names:
                    names[name] = names[name] + 1
                else:
                    names[name] = 0

big_object_classes = []
for name in names.keys():
    if names[name] > 190: 
        print(f'{name} : {names[name]}')
        big_object_classes.append(name)
print(len(names))

In [21]:
point_size = 2048
train_dataset = PointCloudDataset("../Data/ModelNet40", point_size, 'train', object_classes = None )
test_dataset = PointCloudDataset("../Data/ModelNet40", point_size, 'test', object_classes = None)
print(len(test_dataset))
print(len(train_dataset))

  scaled_points = ((points - min_coords) / range_coords) #scales points on range [0, 1]
  scaled_points = (scaled_points * (2**num_bits - 1)).astype(int) # scales points to [0, 2^num_bits]


2468
9843


In [23]:
train_loader = DataLoader(train_dataset, batch_size = 128, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = 32, shuffle = True)

val_loader = test_loader

In [136]:
class ConvDecoder_5700T(nn.Module):
    
    def __init__(self, point_size, latent_dim):
        super().__init__()
        
        self.point_size = point_size

        self.l1 = nn.Linear(latent_dim, 768)
        self.l2 = nn.Linear(768 , 1024)
        self.l3 = nn.Linear(1024, 2048)
        self.l4 = nn.Linear(2048, 3072)
        self.l5 = nn.Linear(3072, point_size * 3)

    def forward(self, x):
        x = F.gelu(self.l1(x))
        x = F.gelu(self.l2(x))
        x = F.gelu(self.l3(x))
        x = F.gelu(self.l4(x))
        x = self.l5(x)
        x = x.view(-1, self.point_size, 3)
        return x    
    
class ConvEncoder_5700T(nn.Module):
    
    def __init__(self, point_size, latent_size):
        super().__init__()

        self.point_size = point_size 

        # Blowup point representation from 3 to 32
        self.conv1 = nn.Conv1d(3, 32, 1)
        self.conv2 = nn.Conv1d(32, 64, 1)

        # Points talk to each other wo/ downsampling 
        self.conv3 = nn.Conv1d(64, 64, kernel_size = 9, stride= 1, padding= 4)
        self.conv4 = nn.Conv1d(64, 64, kernel_size = 9, stride = 1, padding = 4)
        self.conv5 = nn.Conv1d(64, 64, kernel_size = 9, stride = 1, padding = 4)
        self.conv6 = nn.Conv1d(64, 64, kernel_size = 9, stride = 1, padding = 4)


        # Downsampling 
        self.conv7 = nn.Conv1d(64, 64, kernel_size = 8, stride = 2, padding = 3)
        self.conv8 = nn.Conv1d(64, 64, kernel_size = 8, stride = 2, padding = 3)
        self.conv9 = nn.Conv1d(64, 64, kernel_size = 8, stride = 2, padding = 3)
        self.conv10 = nn.Conv1d(64, 64, kernel_size = 8, stride = 2, padding = 3)
        self.conv11 = nn.Conv1d(64, 64, kernel_size = 8, stride = 2, padding = 3)


        # Linear 
        self.lin1 = nn.Linear(point_size * 2 , 1024)
        self.lin2 = nn.Linear(1024, 768)
        self.lin3 = nn.Linear(768, latent_size)

    def forward(self, x):
        
        x = F.gelu(self.conv1(x))
        x = F.gelu(self.conv2(x))

        x = F.gelu(self.conv3(x))
        x = F.gelu(self.conv4(x))
        x = F.gelu(self.conv5(x))

        x = F.gelu(self.conv6(x))
        x = F.gelu(self.conv7(x))
        x = F.gelu(self.conv8(x))
        x = F.gelu(self.conv9(x))
        x = F.gelu(self.conv10(x))
        x = F.gelu(self.conv11(x))

        x = x.view(-1, self.point_size * 2)

        x = F.gelu(self.lin1(x))
        x = F.gelu(self.lin2(x))
        x = self.lin3(x)

        return x


class ConvAE_5700T(nn.Module):
    
    def __init__(self, point_size, latent_size):
        super().__init__()

        self.encoder = ConvEncoder_5700T(point_size, latent_size)
        self.decoder = ConvDecoder_5700T(point_size, latent_size)

    def forward(self, x):
        latent_rep = self.encoder(x)
        reconstructed_cloud = self.decoder(latent_rep)
        return reconstructed_cloud

In [139]:
model = ConvAE_5700T(2048, 256)
total = 0
for name,param in model.named_parameters():
    if "encoder" in name: 
        total += param.numel()

print(total)

5493504


In [None]:

model = ConvAE_5700T(2048, 512).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0001)

min_val_loss = np.inf

epochs = 500

for epoch in range(epochs):

    # Train one epoch
    train_loss = 0 

    for data in train_loader:
        
        x = data['points'].to(device)

        reconstructed_x = model(x.permute(0,2,1)) # Model expects point clouds to be (3, num_points)

        optimizer.zero_grad()

        loss, _ = chamfer_distance(x, reconstructed_x)

        loss.backward()

        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)
    # Calculate validation loss

    val_loss = 0 

    for data in val_loader:

        x = data['points'].to(device)

        with torch.no_grad():
            reconstructed_x = model(x.permute(0,2,1))
            loss, _ = chamfer_distance(x, reconstructed_x)
            val_loss+= loss.item()

    val_loss /= len(val_loader)

    print(f'\nEpoch {epoch+1} \t Train Loss: {train_loss:.5f} \t Val Loss: {val_loss:.5f}')

    # Save best model
    if val_loss < min_val_loss:
        print(f'Val Loss Decreased({min_val_loss:.6f} ---> {val_loss:.5f}) \t Saving The Model')
        min_val_loss = val_loss

        torch.save(model.state_dict(), './trained_models/ConvEnc_LinDec/ConvAutoEncoder_ModelNet40_5700T')
    


Epoch 1 	 Train Loss: 0.22954 	 Val Loss: 0.06668
Val Loss Decreased(inf ---> 0.06668) 	 Saving The Model

Epoch 2 	 Train Loss: 0.06493 	 Val Loss: 0.06580
Val Loss Decreased(0.066678 ---> 0.06580) 	 Saving The Model

Epoch 3 	 Train Loss: 0.06474 	 Val Loss: 0.06567
Val Loss Decreased(0.065796 ---> 0.06567) 	 Saving The Model

Epoch 4 	 Train Loss: 0.06397 	 Val Loss: 0.05653
Val Loss Decreased(0.065671 ---> 0.05653) 	 Saving The Model

Epoch 5 	 Train Loss: 0.05071 	 Val Loss: 0.04757
Val Loss Decreased(0.056530 ---> 0.04757) 	 Saving The Model

Epoch 6 	 Train Loss: 0.04764 	 Val Loss: 0.04430
Val Loss Decreased(0.047573 ---> 0.04430) 	 Saving The Model

Epoch 7 	 Train Loss: 0.04369 	 Val Loss: 0.04267
Val Loss Decreased(0.044302 ---> 0.04267) 	 Saving The Model

Epoch 8 	 Train Loss: 0.04153 	 Val Loss: 0.03951
Val Loss Decreased(0.042667 ---> 0.03951) 	 Saving The Model

Epoch 9 	 Train Loss: 0.04021 	 Val Loss: 0.03930
Val Loss Decreased(0.039508 ---> 0.03930) 	 Saving The Mod

KeyboardInterrupt: 

In [100]:
best_model = ConvAE_3200T(2048, 384)
best_model.load_state_dict(torch.load('./trained_models/ConvAutoEncoder_ModelNet40_3200T', weights_only=True))
best_model.to(device)
best_model.eval()
pc.visualize_random_reconstruction(best_model, val_loader, device)

In [111]:
pc.visualize_random_reconstruction(best_model, val_loader, device)

In [146]:
pc.visualize_random_reconstruction(model, val_loader, device)

In [None]:
# Results: 
# Medium conv net: .0029 val loss best 
# 4700T: .00194
# 3200T: .00186
# 5700T: 