In [21]:
import argparse
import os, sys
import time
import copy
import math
import pickle
import statistics

import numpy as np
import pandas as pd
import open3d as o3d
import torchvision
import pytorch3d
from tqdm import tqdm
import matplotlib.pyplot as plt
import plotly.graph_objects as go

# Import pytorch dependencies
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.nn.modules.utils import _single, _pair, _triple

# Import toolkits
from summer2022_toolbox.visualization_3D_objects import *
from summer2022_toolbox.preprocessing import *
from summer2022_toolbox.read_object import *
from summer2022_toolbox.model_averaging import *
from summer2022_toolbox.model_PCA import *
from summer2022_toolbox.morphable_model import *
from summer2022_toolbox.model_evaluation import *

In [42]:
"""
model by dhiraj inspried from Charles
"""

import torch
import torch.nn as nn
import torch.nn.functional as F


class PCAutoEncoder(nn.Module):
    """ Autoencoder for Point Cloud 
    Input: Batch of Point Cloud B x 3 x N
    Output: reconstructed points
    """
    def __init__(self, point_dim, num_points):
        super(PCAutoEncoder, self).__init__()

        self.conv1 = nn.Conv1d(in_channels=point_dim, out_channels=64, kernel_size=1)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=1)
        self.conv3 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=1)
        self.conv4 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=1)
        self.conv5 = nn.Conv1d(in_channels=128, out_channels=1024, kernel_size=1)

        self.fc1 = nn.Linear(in_features=1024, out_features=1024)
        self.fc2 = nn.Linear(in_features=1024, out_features=1024)
        self.fc3 = nn.Linear(in_features=1024, out_features=num_points*3)

        #batch norm
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
    
    def forward(self, x):

        batch_size = x.shape[0]
        point_dim = x.shape[1]
        num_points = x.shape[2]

        #encoder
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn1(self.conv2(x)))
        x = F.relu(self.bn1(self.conv3(x)))
        x = F.relu(self.bn2(self.conv4(x)))
        x = F.relu(self.bn3(self.conv5(x)))

        # do max pooling 
        x = torch.max(x, 2, keepdim=True)[0]
        x = x.view(-1, 1024)
        # get the global embedding
        global_feat = x

        #decoder
        x = F.relu(self.bn3(self.fc1(x)))
        x = F.relu(self.bn3(self.fc2(x)))
        reconstructed_points = self.fc3(x)

        #do reshaping
        reconstructed_points = reconstructed_points.reshape(batch_size, point_dim, num_points)

        return reconstructed_points , global_feat

### Sanity Check

In [43]:
# GPU check                
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device =='cuda':
    print("Run on GPU...")
else:
    print("Run on CPU...")

# Model Definition  
net = PCAutoEncoder(3, 2048)
net = net.to(device)

# Test forward pass
data = torch.randn(5,3,2048).to(device)
# Forward pass "data" through "net" to get output "out" 
out = net.forward(data)   #Your code here

# Check output shape
assert(out[0].detach().cpu().numpy().shape == (5,3,2048))
print("Forward pass successful")

# Shape Observation
# Forward pass of a single image
data = torch.randn(5,3,2048).to(device)
# Forward pass "data" through "net" to get output "out" 
out = net.forward(data)   #Your code here

# Print the output shape and number of parameteres throughout the network
for name, module in net.named_modules():
    if isinstance(module, nn.Conv1d) or isinstance(module, nn.Linear):
        # # Get the weight of the module as a NumPy array
        weight = module.weight.cpu().detach().numpy()     #Your code here
        # # Compute the number of parameters in the weight
        num_Param = np.prod(weight.shape)  #Your code here
        
        print(f'{name:10} {str(weight.shape):20} {str(num_Param):10}')
#############################################

Run on GPU...
Forward pass successful
conv1      (64, 3, 1)           192       
conv2      (64, 64, 1)          4096      
conv4      (128, 64, 1)         8192      
conv5      (1024, 128, 1)       131072    
fc1        (1024, 1024)         1048576   
fc3        (6144, 1024)         6291456   


### Import Preprocessed ModelNet40 cars

In [44]:
#load all aligned cars
f1 = open('data/preprocessed/car/train_aligned.txt','rb')
X_train = pickle.load(f1)
f2 = open('data/preprocessed/car/test_aligned.txt','rb')
X_test = pickle.load(f2)

print(X_train[0].shape)

(3, 4929)


In [45]:
TRAIN_BATCH_SIZE = 12
VAL_BATCH_SIZE = 10

# construct dataloader
train_loader = DataLoader(
    X_train, 
    batch_size=TRAIN_BATCH_SIZE, 
    shuffle=True, 
    num_workers=4
)
val_loader = DataLoader(
    X_test, 
    batch_size=VAL_BATCH_SIZE, 
    shuffle=False, 
    num_workers=4
)

### Deploy model on GPU

In [46]:
# GPU check                
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device =='cuda':
    print("Run on GPU...")
else:
    print("Run on CPU...")

# Model Definition  
model = PCAutoEncoder(3, 4929)
model = model.to(device)

# Check if on GPU
assert(next(model.parameters()).is_cuda)

Run on GPU...


In [47]:
from pytorch3d.loss import chamfer_distance
# initial learning rate = 0.1, decay rate = 0.1
INITIAL_LR = 0.001

# momentum for optimizer = 0.9
MOMENTUM = 0.9

# L2 regularization strength = 0.0001
REG = 1e-4

# create loss function: Chamfer distance
criterion = lambda recon_x, x: chamfer_distance(recon_x, x).to(device)

# Add optimizer
optimizer = optim.Adam(model.parameters(), lr=INITIAL_LR)

### Training

In [48]:
# some hyperparameters
# total number of training epochs
EPOCHS = 2
DECAY_EPOCHS = 70
DECAY = 0.7
# DECAY = 0.1

# the folder where the trained model is saved
CHECKPOINT_FOLDER = "./saved_model"

# start the training/validation process
# the process should take about 5 minutes on a GTX 1070-Ti
# if the code is written efficiently.
best_loss = 1e20
current_learning_rate = INITIAL_LR

start = time.time()
print("==> Training starts!")
print("="*50)

# store loss learning curve
train_loss_lst = []
valid_loss_lst = []

for i in range(0, EPOCHS):
    # handle the learning rate scheduler.
    if i % DECAY_EPOCHS == 0 and i != 0:
        current_learning_rate = current_learning_rate * DECAY
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_learning_rate
        print("Current learning rate has decayed to %f" %current_learning_rate)
    
    
    # switch to train mode
    model.train()
    
    print("Epoch %d:" %i)

    train_loss = 0 # track training loss if you want
    
    # Train the model for 1 epoch.
    for batch_idx, (inputs) in enumerate(train_loader):
        # copy inputs to device
        inputs = inputs.float().to(device)

        # compute the output and loss
        outputs = model(inputs)
        dist1, dist2 = criterion(outputs, inputs)

        loss = (torch.mean(dist1)) + (torch.mean(dist2))
        train_loss += loss

        # zero the gradient
        optimizer.zero_grad()

        # backpropagation
        loss.backward()

        # apply gradient and update the weights
        optimizer.step()

    avg_loss = train_loss / len(train_loader)
    print("Training loss: %.4f" %(avg_loss))
    train_loss_lst.append(avg_loss.to('cpu').detach().numpy())

    # switch to eval mode
    model.eval()

    # this help you compute the validation accuracy
    total_examples = 0
    correct_examples = 0
    
    val_loss = 0 # again, track the validation loss if you want
    
    # disable gradient during validation, which can save GPU memory
    with torch.no_grad():
        for batch_idx, (inputs) in enumerate(val_loader):
            # copy inputs to device
            inputs = inputs.float().to(device)
            # compute the output and loss
            outputs = model(inputs)

            inputs = inputs.transpose(1, 2)
            outputs = outputs.transpose(1, 2)

            dist1, dist2 = criterion(outputs, inputs)
            print(dist1, dist2)

            loss = (torch.mean(dist1)) + (torch.mean(dist2))
            val_loss += loss

    avg_loss = val_loss / len(val_loader)
    print("Validation loss: %.4f" % (avg_loss))
    
    valid_loss_lst.append(avg_loss.cpu().detach().numpy())
    
    # save the model checkpoint
    if avg_loss < best_loss:
        best_val_acc = avg_loss
        if not os.path.exists(CHECKPOINT_FOLDER):
            os.makedirs(CHECKPOINT_FOLDER)
        print("Saving ...")
        state = {'state_dict': model.state_dict(),
                'epoch': i,
                'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_FOLDER, 'resnet20.pth'))
        
    print('')

print("="*50)
print(f"==> Optimization finished in {time.time() - start:.2f}s! Best validation accuracy: {best_val_acc:.4f}")

==> Training starts!
Epoch 0:


RuntimeError: CUDA out of memory. Tried to allocate 232.00 MiB (GPU 0; 3.94 GiB total capacity; 2.22 GiB already allocated; 181.12 MiB free; 2.53 GiB reserved in total by PyTorch)