### Imports

In [1]:
from IPython.display import clear_output

In [2]:
!pip install -q path.py
!pip install -q pytorch3d
# https://github.com/facebookresearch/pifuhd/issues/77
!pip install -q 'torch==1.6.0+cu101' -f https://download.pytorch.org/whl/torch_stable.html
!pip install -q 'torchvision==0.7.0+cu101' -f https://download.pytorch.org/whl/torch_stable.html
!pip install -q 'pytorch3d==0.2.5'
!pip install -q Ninja
clear_output()

In [3]:
import numpy as np
import math
import random
import os
import torch
import scipy.spatial.distance
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms, utils
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pytorch3d

import plotly.graph_objects as go
import plotly.express as px

from path import Path

from pytorch3d.loss import chamfer

random.seed = 42

In [4]:
!wget http://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip # /ModelNet40.zip - 40 classes
!unzip -q ModelNet10.zip

path = Path("ModelNet10")

folders = [dir for dir in sorted(os.listdir(path)) if os.path.isdir(path/dir)]

clear_output()
classes = {folder: i for i, folder in enumerate(folders)}
# classes

#### Imports from helping.py

In [5]:
!gdown https://drive.google.com/uc?id=1CVwVxdfUfP6TRcVUjjJvQeRcgCGcnSO_
from helping import *
clear_output()

### Load Data


In [None]:
from google.colab import drive
drive.mount('/content/drive')
clear_output()

beds_train = torch.load('drive/MyDrive/Thesis/dataloaders/dataloader_beds_both/trainloader.pth')
beds_valid = torch.load('drive/MyDrive/Thesis/dataloaders/dataloader_beds_both/validloader.pth')

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

## Autoencoders

### PCAutoEncoder

In [9]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.1)

In [55]:
class PointNetAE(nn.Module):
    def __init__(self, num_points=1024, z_dim=100):
        super(PointNetAE, self).__init__()
        self.num_points = num_points

        self.encoder = PointEncoder(num_points, z_dim=z_dim)
        self.decoder = PointDecoder(num_points, z_dim=z_dim)

    def reparameterize(self, mu, log_var):
        std = torch.exp(log_var / 2)
        eps = torch.randn_like(std)
        return mu + std * eps

    def forward(self, x):
        x, mu, logvar = self.encoder(x)
        # x = self.reparameterize(mu, logvar)
        x = self.decoder(x)
        return x


class PointEncoder(nn.Module):
    def __init__(self, num_points, z_dim):
        super(PointEncoder, self).__init__()
        self.num_points = num_points
        self.feature_dim = z_dim
        self.convs = nn.Sequential(
            nn.Conv1d(3, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Conv1d(128, num_points, 1),
            nn.BatchNorm1d(num_points),
        )

        self.dense = nn.Sequential(
            nn.Linear(num_points, 512),
            nn.ReLU(),
            nn.Linear(512, self.feature_dim)
        )

        self.dense.apply(init_weights)

        self.mu_fc = nn.Linear(self.feature_dim, z_dim)
        self.log_var_fc = nn.Linear(self.feature_dim, z_dim)

    def forward(self, x):
        x = self.convs(x)
        x, _ = torch.max(x, 2) # instead of maxpool
        x = x.view(-1, self.num_points)
        x = self.dense(x)
        x_relu = torch.relu(x)
        mu, log_var = self.mu_fc(x_relu), self.log_var_fc(x_relu)
        return x, mu, log_var


'''
use only dense layers in decoder
'''
# class PointDecoder(nn.Module):
#     def __init__(self, num_points, z_dim):
#         super(PointDecoder, self).__init__()
#         self.num_points = num_points
#         self.dense_layers = nn.Sequential(
#             nn.Linear(z_dim, 256),
#             nn.Dropout(0.1),
#             nn.ReLU(),
#             nn.Linear(256, 512),
#             nn.Dropout(0.2),
#             nn.ReLU(),
#             nn.Linear(512, num_points),
#             nn.Dropout(0.3),
#             nn.Linear(num_points, num_points*3),
#             nn.Tanh()
#         )
#         self.dense_layers.apply(init_weights)

#     def forward(self, x):
#         batchsize = x.size()[0]
#         x = self.dense_layers(x)
#         x = x.view(batchsize, 3, self.num_points)
#         return x

'''
apply Conv1d to increase dimensionality (1 -> 3), 3: x, y, z
'''
# class PointDecoder(nn.Module):
#     def __init__(self, num_points, z_dim):
#         super(PointDecoder, self).__init__()
#         self.num_points = num_points
#         self.dense_layers = nn.Sequential(
#             nn.Linear(z_dim, 256),
#             nn.Dropout(0.1),
#             nn.ReLU(),
#             nn.Linear(256, 512),
#             nn.Dropout(0.2),
#             nn.ReLU(),
#             nn.Linear(512, num_points),
#             nn.Dropout(0.3),
#             nn.Tanh()
#         )
#         self.conv = nn.Sequential(
#             nn.Conv1d(1, 3, 1),
#         )
#         self.dense_layers.apply(init_weights)

#     def forward(self, x):
#         batchsize = x.size()[0]
#         x = self.dense_layers(x).reshape(batchsize, 1, self.num_points)
#         x = self.conv(x)
#         return x


'''
apply Conv1d to increase number of points (to 1024)
'''
class PointDecoder(nn.Module):
    def __init__(self, num_points, z_dim):
        super(PointDecoder, self).__init__()
        self.num_points = num_points
        self.z_dim = z_dim
        self.conv = nn.Sequential(
            nn.Conv1d(100, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Conv1d(128, num_points, 1),
            nn.BatchNorm1d(num_points),
            nn.ReLU(),
        )
        self.dense_layers = nn.Sequential(
            nn.Linear(num_points, num_points*3),
            nn.Dropout(0.3),
            nn.Tanh()
        )
        self.dense_layers.apply(init_weights)

    def forward(self, x):
        batchsize = x.size()[0]
        x = x.reshape(batchsize, self.z_dim, 1)
        x = self.conv(x).reshape(batchsize, self.num_points)
        x = self.dense_layers(x)
        x = x.reshape(batchsize, 3, self.num_points)
        return x

In [65]:
# encoder = PointEncoder(1024, 100)
# decoder = PointDecoder(1024, 100)
pc_autoencoder = PointNetAE(num_points=1024, z_dim=100)
for x, _ in beds_train:
    x = x.float().permute(0, 2, 1)
    res = pc_autoencoder(x).detach().cpu()
    res = res[0]
    pcshow(res[0], res[1], res[2])
    break
pc_autoencoder

PointNetAE(
  (encoder): PointEncoder(
    (convs): Sequential(
      (0): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
      (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU()
      (6): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
      (7): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (dense): Sequential(
      (0): Linear(in_features=1024, out_features=512, bias=True)
      (1): ReLU()
      (2): Linear(in_features=512, out_features=100, bias=True)
    )
    (mu_fc): Linear(in_features=100, out_features=100, bias=True)
    (log_var_fc): Linear(in_features=100, out_features=100, bias=True)
  )
  (decoder): PointDecoder(
    (conv): Sequential(
      (0): Conv1d(100, 128, kernel_size=(1,), stride=(1,))
      (1): Batc

In [11]:
def train_pcautoencoder(autoencoder, x, loss_func, optimizer):
    '''
    loss function must be chamfer distance
    '''
    optimizer.zero_grad()
    x = x.float().to(device).permute(0, 2, 1)
    output = autoencoder(x)
    dist1, dist2 = loss_func(x, output)

    try:
        # dist2 might be None if x_normals and y_normals (args to loss_func) are None
        loss = (torch.mean(dist1)) + (torch.mean(dist2))
    except:
        loss = (torch.mean(dist1))

    loss.backward()
    optimizer.step()

    return loss.data.item()


def validate_pcautoencoder(autoencoder, x, loss_func):
    '''
    loss function must be chamfer distance
    '''
    with torch.no_grad():
        x = x.float().to(device).permute(0, 2, 1)
        output = autoencoder(x)
        dist1, dist2 = loss_func(x, output)

        try:
            # dist2 might be None if x_normals and y_normals (args to loss_func) are None
            loss = (torch.mean(dist1)) + (torch.mean(dist2))
        except:
            loss = (torch.mean(dist1))

        return loss.data.item()

In [None]:
# import gc
# gc.collect() # garbace collector
# torch.cuda.empty_cache()

In [12]:
import gc

def train_with_chamfer_dist(autoencoder, train_loader, loss_func, optimizer, valid_loader, train_func, validate_func, epochs=100, print_every_e=5, valid_every=5, scheduler=None):
    autoencoder.train()
    for epoch in range(1, epochs+1):
        losses = []
        for x, _ in train_loader:
            loss = train_func(autoencoder, x, loss_func, optimizer)
            losses.append(loss)
        if scheduler:
            scheduler.step()

        if epoch % print_every_e == 0 or epoch == 1:
            # torch.save(autoencoder.state_dict(), f"models_autoencoder/model_{epoch}.pth")
            # torch.save(D.state_dict(), f"models2/D_{epoch}.pth")
            print(f'{epoch}:\ttrain loss: {np.mean(losses)}')
        if epoch % valid_every == 0:
            valid_losses = []
            for x, _ in valid_loader:
                valid_loss = validate_func(autoencoder, x, loss_func)
                valid_losses.append(valid_loss)
            print(f'\tvalidation loss: {np.mean(valid_losses)}')

In [58]:
pc_autoencoder = PointNetAE(num_points=1024, z_dim=100)
pc_autoencoder.to(device)

optimizer = optim.AdamW(pc_autoencoder.parameters(), lr=0.005, betas=(0.8, 0.8))
# optimizer = optim.SGD(pc_autoencoder.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=200, gamma=0.1)
loss_func = chamfer.chamfer_distance


train_with_chamfer_dist(pc_autoencoder, beds_train, loss_func, optimizer, valid_loader=beds_valid,
                        train_func=train_pcautoencoder, validate_func=validate_pcautoencoder, epochs=1000, print_every_e=20, valid_every=20, scheduler=scheduler)

1:	train loss: 985.104118347168
20:	train loss: 132.90518140792847
	validation loss: 126.20811208089192
40:	train loss: 124.97280502319336
	validation loss: 123.52557627360027
60:	train loss: 124.68352890014648
	validation loss: 123.78553263346355
80:	train loss: 124.38706111907959
	validation loss: 123.76788838704427
100:	train loss: 124.17557001113892
	validation loss: 123.92074584960938
120:	train loss: 124.56211948394775
	validation loss: 124.03826649983723
140:	train loss: 123.83728551864624
	validation loss: 123.60908762613933
160:	train loss: 123.84712076187134
	validation loss: 124.56482950846355
180:	train loss: 123.56409358978271
	validation loss: 124.40093994140625
200:	train loss: 123.66000175476074
	validation loss: 124.93374125162761
220:	train loss: 122.01719617843628
	validation loss: 123.59573618570964
240:	train loss: 122.10397005081177
	validation loss: 123.7787602742513
260:	train loss: 122.17292499542236
	validation loss: 123.80077107747395
280:	train loss: 121.763

KeyboardInterrupt: ignored

In [64]:
local_sample = None
for sample, _ in beds_valid:
    sample = sample.permute(0, 2, 1)
    local_sample = sample[0]
    x, y, z = local_sample[:][0], local_sample[:][1], local_sample[:][2]
    # pcshow(x, y, z)
    break
pc_autoencoder.eval()
with torch.no_grad():
    samplee = local_sample.unsqueeze(0).float().to(device)
    # samplee = sample.float().to(device)
    out = pc_autoencoder(samplee)
first = out[0].detach().cpu()
x, y, z = first[:][0], first[:][1], first[:][2]
pcshow(x, z, y)

In [None]:
local_sample = None
for sample, _ in beds_train:
    sample = sample.permute(0, 2, 1)
    local_sample = sample[0]
    x, y, z = local_sample[:][0], local_sample[:][1], local_sample[:][2]
    pcshow(x, y, z)
    break
# pc_autoencoder.eval()
with torch.no_grad():
    samplee = local_sample.unsqueeze(0).float().to(device)
    # samplee = sample.float().to(device)
    out = pc_autoencoder(samplee)
first = out[0].detach().cpu()
x, y, z = first[:][0], first[:][1], first[:][2]
pcshow(x, z, y)