In [2]:
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import open3d as o3d
import os
import torch
from torch.utils.data import Dataset, DataLoader
from dataclasses import dataclass
import sys
from pathlib import Path
import time

sys.path.append(str(Path.cwd().parent))

from Helpers.data import PointCloudDataset
import Helpers.PointCloudOpen3d as pc

if torch.cuda.is_available():
    device = "cuda"

elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

print(f'Using: {device}')

Using: mps


In [3]:
train_dataset = PointCloudDataset("../ModelNet40", 5000, 'train', ['person'])
train_loader = DataLoader(train_dataset, batch_size = 4, shuffle = False)

In [4]:
print(len(train_dataset))

88


In [5]:
class MLPEncoder(nn.Module):

    def __init__(self, config):
        super().__init__()

        self.fc1 = nn.Linear(config.input_dim, config.hidden_dim1)
        self.fc2 = nn.Linear(config.hidden_dim1, config.hidden_dim2)
        self.fc3 = nn.Linear(config.hidden_dim2, config.latent_dim)

    def forward(self, x):
        x = F.gelu(self.fc1(x))
        x = F.gelu(self.fc2(x))
        x = self.fc3(x)

        return x

class MLPDecoder(nn.Module):

    def __init__(self, config):
        super().__init__()

        self.fc1 = nn.Linear(config.latent_dim, config.hidden_dim2)
        self.fc2 = nn.Linear(config.hidden_dim2, config.hidden_dim1)
        self.fc3 = nn.Linear(config.hidden_dim1, config.input_dim)

    def forward(self, x):
        x = F.gelu(self.fc1(x))
        x = F.gelu(self.fc2(x))
        x = self.fc3(x)

        return x


class Autoencoder(nn.Module):

    def __init__(self, config):
        super().__init__()

        self.encoder = MLPEncoder(config)
        self.decoder = MLPDecoder(config)

    def forward(self,x):
        latent_rep = self.encoder(x)
        out = self.decoder(latent_rep)
        return out


# class Autoencoder(nn.Module):

#     def __init__(self, config):
#         super().__init__()

#         self.fc = nn.Linear(config.input_dim, config.input_dim)

#     def forward(self,x):
#         y = self.fc(x)
#         return x + (.00001 * y)
    

class PointCloudAutoencoder(nn.Module):
    def __init__(self, config):
        super(PointCloudAutoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(config.input_dim, config.hidden_dim),
            nn.ReLU(),
            nn.Linear(config.hidden_dim, config.latent_dim)
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(config.latent_dim, config.hidden_dim),
            nn.ReLU(),
            nn.Linear(config.hidden_dim, config.input_dim)
        )

    def forward(self, x):
        latent = self.encoder(x)
        reconstructed = self.decoder(latent)
        return reconstructed


In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class PointCloudAE(nn.Module):
    def __init__(self, point_size, latent_size):
        super(PointCloudAE, self).__init__()
        
        self.latent_size = latent_size
        self.point_size = point_size
        
        self.conv1 = torch.nn.Conv1d(3, 1024, 1)
        self.conv2 = torch.nn.Conv1d(1024, 768, 1)
        self.conv3 = torch.nn.Conv1d(768, self.latent_size, 1)
        self.bn1 = nn.BatchNorm1d(1024)
        self.bn2 = nn.BatchNorm1d(768)
        self.bn3 = nn.BatchNorm1d(self.latent_size)
        
        self.dec1 = nn.Linear(self.latent_size,1024)
        self.dec2 = nn.Linear(1024,2048)
        self.dec3 = nn.Linear(2048,self.point_size*3)

    def encoder(self, x): 
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.bn3(self.conv3(x))
        x = torch.max(x, 2, keepdim=True)[0]
        x = x.view(-1, self.latent_size)
        return x
    
    def decoder(self, x):
        x = F.relu(self.dec1(x))
        x = F.relu(self.dec2(x))
        x = self.dec3(x)
        return x.view(-1, self.point_size, 3)
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    

In [None]:
x = next(iter(train_loader))['points'][0]
cloud = pc.get_point_cloud(x)
pc.visualize_point_cloud(cloud)

In [7]:
model = PointCloudAE(5000,768).to(device)


In [13]:
point_cloud_size = 5000 


# @dataclass 
# class MLPAutoEncoderConfig:
#     input_dim = point_cloud_size * 3
#     hidden_dim1 = 3072
#     hidden_dim2 = 1048
#     latent_dim = 512

# @dataclass 
# class MLPAutoEncoderConfig:
#     input_dim = point_cloud_size * 3
#     hidden_dim1 = point_cloud_size * 3
#     hidden_dim2 = point_cloud_size * 3
#     latent_dim = point_cloud_size * 3

# @dataclass 
# class MLPAutoEncoderConfig:
#     input_dim = point_cloud_size * 3
#     hidden_dim = 2048
#     latent_dim = 512


# config = MLPAutoEncoderConfig()

# model = Autoencoder(config).to(device)
# model = PointCloudAutoencoder(config).to(device)
# model = PointCloudAE(5000,256).to(device)

optim = torch.optim.AdamW(model.parameters(), lr= 1e-4)

epochs = 2000


report_rate = 600

s= time.time()

for epoch in range(epochs):

    running_loss = 0 

    batch_count = 0 

    for i, data in enumerate(train_loader):

        x = data['points'].to(device).transpose(1,2)
        # x = x.view(x.shape[0], -1).to(device)
        
        optim.zero_grad()

        pred = model(x).transpose(1,2)

        loss = F.mse_loss(pred, x)

        loss.backward()

        optim.step()

        running_loss += loss.item()

        batch_count +=1

    if epoch % 50 == 49:
        print(f'Epoch {epoch:<3} Epoch Loss: {running_loss / batch_count}')

        # if i % report_rate == report_rate - 1:
        #     print(f'Batch {i:<3} Running Loss: {running_loss / report_rate}')
        #     running_loss = 0

print(time.time() - s)
    

Epoch 49  Epoch Loss: 179824.39273626154
Epoch 99  Epoch Loss: 140382.34097151321
Epoch 149 Epoch Loss: 114006.89859286221
Epoch 199 Epoch Loss: 251136.1066194014
Epoch 249 Epoch Loss: 249616.58377699417
Epoch 299 Epoch Loss: 59660.905065363106
Epoch 349 Epoch Loss: 289422.68826363306
Epoch 399 Epoch Loss: 29257.697004491634
Epoch 449 Epoch Loss: 453240.9243177934
Epoch 499 Epoch Loss: 585186.1697689403
Epoch 549 Epoch Loss: 92474.45200486617
Epoch 599 Epoch Loss: 243859.16436854276
Epoch 649 Epoch Loss: 879324.6589587819
Epoch 699 Epoch Loss: 78364.81754996559
Epoch 749 Epoch Loss: 189705.5731232383
Epoch 799 Epoch Loss: 289108.6335027868
Epoch 849 Epoch Loss: 54135.53244313327
Epoch 899 Epoch Loss: 591263.5530154489
Epoch 949 Epoch Loss: 217342.05106995322
Epoch 999 Epoch Loss: 48297.966102426704
Epoch 1049 Epoch Loss: 189620.63842218573
Epoch 1099 Epoch Loss: 1156464.5088398673
Epoch 1149 Epoch Loss: 16823.673126220703
Epoch 1199 Epoch Loss: 707890.912905433
Epoch 1249 Epoch Loss: 7

In [18]:
x = next(iter(train_loader))['points'][2]
cloud = pc.get_point_cloud(x)
pc.visualize_point_cloud(cloud)


with torch.no_grad():
    x = x.T.unsqueeze(0).to(device)
    rec_x = np.array(model(x)[0].to('cpu'))
    cloud = pc.get_point_cloud(rec_x)
    pc.visualize_point_cloud(cloud)

  rec_x = np.array(model(x)[0].to('cpu'))


In [None]:
x.view(x.shape[0], -1).shape

torch.Size([16, 15000])