In [2]:
import numpy as np
import time
import matplotlib.pyplot as plt
%matplotlib inline
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import open3d as o3d
import os
from torch.utils.data import Dataset, DataLoader
from dataclasses import dataclass
import sys
from pathlib import Path
from pytorch3d.loss import chamfer_distance


sys.path.append(str(Path.cwd().parent))

from Helpers.data import PointCloudDataset
import Helpers.PointCloudOpen3d as pc

if torch.cuda.is_available():
    device = "cuda"

elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

print(f'Using: {device}')

from Helpers.data2 import PointCloudDataset2

Using: cuda


In [73]:
names = {}
for root, _, files in os.walk("../Data/ModelNet40"):
    for file in files:
        if file.endswith('.off'):
            full_path = os.path.join(root, file)
            if f'train' in full_path:
                name = file.split('_')[0]
                if name in names:
                    names[name] = names[name] + 1
                else:
                    names[name] = 0

big_object_classes = []
for name in names.keys():
    if names[name] > 190: 
        print(f'{name} : {names[name]}')
        big_object_classes.append(name)
print(len(names))

airplane : 625
bed : 514
bookshelf : 571
bottle : 334
car : 196
chair : 888
desk : 199
dresser : 199
mantel : 283
monitor : 464
night : 199
piano : 230
plant : 239
sofa : 679
table : 391
toilet : 343
tv : 266
vase : 474
40


In [14]:
t2 = PointCloudDataset2("../Data/ModelNet40", 1024, 'train', object_classes = ['vase'] )
t = PointCloudDataset("../Data/ModelNet40", 1024, 'train', object_classes = ['vase'] )




In [51]:
point_size = 1024
# train_dataset = PointCloudDataset2("../Data/ModelNet40", point_size, 'train', object_classes = big_object_classes )
# train_loader = DataLoader(train_dataset, batch_size = 64, shuffle = True)
test_dataset = PointCloudDataset2("../Data/ModelNet40", point_size, 'test', object_classes=big_object_classes)
# test_loader = DataLoader(test_dataset, batch_size = 32, shuffle = True)
print(len(test_dataset))
print(len(train_dataset))

  scaled_points = ((points - min_coords) / range_coords) #scales points on range [0, 1]
  scaled_points = (scaled_points * (2**num_bits - 1)).astype(int) # scales points to [0, 2^num_bits]


1758
7112


In [43]:
train_loader = DataLoader(train_dataset, batch_size = 128, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = 32, shuffle = True)


In [15]:
x = next(iter(train_loader))['points'][0]

cloud = pc.get_point_cloud(x)
pc.visualize_point_cloud(cloud)

In [19]:
point_size = 1024
# train_dataset = PointCloudDataset("../Data/ModelNet40", point_size, 'train', object_classes = ['chair', 'sofa'])
# train_loader = DataLoader(train_dataset, batch_size = 64, shuffle = True)
test_dataset = PointCloudDataset("../Data/ModelNet40", point_size, 'test', object_classes=['chair', 'sofa'])
test_loader = DataLoader(test_dataset, batch_size = 32, shuffle = True)
# print(len(test_dataset))
print(len(train_dataset))


1569


In [40]:
class ConvDecoder(nn.Module):
    
    def __init__(self, point_size, latent_dim):
        super().__init__()
        
        self.point_size = point_size

        self.l1 = nn.Linear(latent_dim, 768)
        self.l2 = nn.Linear(768 , 1024)
        self.l3 = nn.Linear(1024, 2048)
        self.l4 = nn.Linear(2048, 3072)
        self.l5 = nn.Linear(3072, point_size * 3)

    def forward(self, x):
        x = F.gelu(self.l1(x))
        x = F.gelu(self.l2(x))
        x = F.gelu(self.l3(x))
        x = F.gelu(self.l4(x))
        x = self.l5(x)
        x = x.view(-1, self.point_size, 3)
        return x    
    
class ConvEncoder(nn.Module):
    
    def __init__(self, point_size, latent_size):
        super().__init__()

        # Blowup point representation from 3 to 32
        self.conv1 = nn.Conv1d(3, 16, 1)
        self.conv2 = nn.Conv1d(16, 32, 1)

        # Points talk to each other wo/ downsampling 
        self.conv3 = nn.Conv1d(32, 32, kernel_size = 9, stride= 1, padding= 4)
        self.conv4 = nn.Conv1d(32, 32, kernel_size = 9, stride = 1, padding = 4)
        self.conv5 = nn.Conv1d(32, 32, kernel_size = 9, stride = 1, padding = 4)

        # Downsampling 
        self.conv6 = nn.Conv1d(32, 32, kernel_size = 8, stride = 2, padding = 3)
        self.conv7 = nn.Conv1d(32, 32, kernel_size = 8, stride = 2, padding = 3)
        self.conv8 = nn.Conv1d(32, 32, kernel_size = 8, stride = 2, padding = 3)
        self.conv9 = nn.Conv1d(32, 32, kernel_size = 8, stride = 2, padding = 3)

        # Linear 
        self.lin1 = nn.Linear(2048, 1024)
        self.lin2 = nn.Linear(1024, 768)
        self.lin3 = nn.Linear(768, latent_size)

    def forward(self, x):
        
        x = F.gelu(self.conv1(x))
        x = F.gelu(self.conv2(x))

        x = F.gelu(self.conv3(x))
        x = F.gelu(self.conv4(x))
        x = F.gelu(self.conv5(x))

        x = F.gelu(self.conv6(x))
        x = F.gelu(self.conv7(x))
        x = F.gelu(self.conv8(x))
        x = F.gelu(self.conv9(x))

        x = x.view(-1, 2048)

        x = F.gelu(self.lin1(x))
        x = F.gelu(self.lin2(x))
        x = F.gelu(self.lin3(x))

        return x


class ConvAE(nn.Module):
    
    def __init__(self, point_size, latent_size):
        super().__init__()

        self.encoder = ConvEncoder(point_size, latent_size)
        self.decoder = ConvDecoder(point_size, latent_size)

    def forward(self, x):
        latent_rep = self.encoder(x)
        reconstructed_cloud = self.decoder(latent_rep)
        return reconstructed_cloud


In [12]:
class PointCloudAE(nn.Module):
    def __init__(self, point_size, latent_size):
        super(PointCloudAE, self).__init__()
        
        self.latent_size = latent_size
        self.point_size = point_size
        
        self.conv1 = torch.nn.Conv1d(3, 64, 1)
        self.conv2 = torch.nn.Conv1d(64, 128, 1)
        self.conv3 = torch.nn.Conv1d(128, self.latent_size, 1)
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(self.latent_size)
        
        self.dec1 = nn.Linear(self.latent_size,256)
        self.dec2 = nn.Linear(256,256)
        self.dec3 = nn.Linear(256,self.point_size*3)

    def encoder(self, x): 
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.bn3(self.conv3(x))
        x = torch.max(x, 2, keepdim=True)[0]
        x = x.view(-1, self.latent_size)
        return x
    
    def decoder(self, x):
        x = F.relu(self.dec1(x))
        x = F.relu(self.dec2(x))
        x = self.dec3(x)
        return x.view(-1, self.point_size, 3)
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [52]:

# net = PointCloudAE(1024, 256).to(device)
# net = ConvAE(1024, 512).to(device)

optimizer = optim.AdamW(net.parameters(), lr=0.0001)

def train_epoch():
    epoch_loss = 0
    for i, data in enumerate(train_loader):
        optimizer.zero_grad()
        
        x = data['points'].to(device)

        output = net(x.permute(0,2,1)) # transpose data for NumberxChannelxSize format

        # loss = F.mse_loss(x, output)
        loss, _  = chamfer_distance(x, output)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        
    if i == 0:
        return epoch_loss / 1
    return epoch_loss/i


train_loss_list = []  
test_loss_list = []  

for i in range(40000) :

    startTime = time.time()
    
    train_loss = train_epoch() #train one epoch, get the average loss
    train_loss_list.append(train_loss)

    
    epoch_time = time.time() - startTime
    
    if i % 100 == 0:
        print(f'Epoch {i} Train_loss: {train_loss}')

Epoch 0 Train_loss: 0.00023084146219348026
Epoch 100 Train_loss: 0.000217502844911492
Epoch 200 Train_loss: 0.0002131844547958198
Epoch 300 Train_loss: 0.0002102990555365316
Epoch 400 Train_loss: 0.00021030257127925076
Epoch 500 Train_loss: 0.00020553628432522103
Epoch 600 Train_loss: 0.0002082797288577157
Epoch 700 Train_loss: 0.00020384389155713673
Epoch 800 Train_loss: 0.00020241157908458264
Epoch 900 Train_loss: 0.00020250683115922253
Epoch 1000 Train_loss: 0.0001977582440965555
Epoch 1100 Train_loss: 0.00020172991472381082


KeyboardInterrupt: 

In [27]:
torch.save(net.state_dict(), 'trained_models/furniture_10k')

In [49]:
torch.save(net, 'trained_models/furniture_model_weights_50k')

In [50]:
model = torch.load('trained_models/furniture_model_weights_50k', weights_only = False)

In [None]:
model = net
x = next(iter(train_loader))['points'][0]

cloud = pc.get_point_cloud(x)
pc.visualize_point_cloud(cloud)

with torch.no_grad():
    data = x.unsqueeze(0).permute(0,2,1).to(device)

    rec_x = np.array(model(data)[0].to('cpu'))
    cloud = pc.get_point_cloud(rec_x)
    pc.visualize_point_cloud(cloud)
# x = next(iter(train_loader))['points'][0]

# cloud = pc.get_point_cloud(x)
# pc.visualize_point_cloud(cloud)

# with torch.no_grad():
#     data = x.unsqueeze(0).permute(0,2,1).to(device)

#     rec_x = np.array(model(data)[0].to('cpu'))
#     cloud = pc.get_point_cloud(rec_x)
#     pc.visualize_point_cloud(cloud)

In [22]:
model = net
x = next(iter(test_loader))['points'][0]

cloud = pc.get_point_cloud(x)
pc.visualize_point_cloud(cloud)

with torch.no_grad():
    data = x.unsqueeze(0).permute(0,2,1).to(device)

    rec_x = np.array(model(data)[0].to('cpu'))
    cloud = pc.get_point_cloud(rec_x)
    pc.visualize_point_cloud(cloud)