In [7]:
import os
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import Resize, Compose, ToTensor, Normalize
import datetime as dt
import numpy as np

import matplotlib.pyplot as plt

import time
import trimesh

import math
from math import sqrt
import random

#mind import order
from mesh_to_sdf import get_surface_point_cloud
from mesh_to_sdf.utils import sample_uniform_points_in_unit_sphere

os.environ['PYOPENGL_PLATFORM'] = 'egl' #opengl seems to only work with TPU
!PYOPENGL_PLATFORM=egl python -c 'from OpenGL import EGL'
print(os.environ['PYOPENGL_PLATFORM'])

USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')    

egl


In [2]:
class MLP(nn.Module):
    def __init__(self, in_features, hidden_features, hidden_layers, out_features):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.hidden_layers = hidden_layers
        self.hidden_features = hidden_features
        self.layers = []
        
        #input layer
        self.layers += [nn.Linear(in_features, hidden_features), nn.LeakyReLU(0.1)]

        #hidden layers
        for i in range(hidden_layers):
          self.layers += [nn.Linear(hidden_features, hidden_features), nn.LeakyReLU(0.1)]

        #output layer
        self.layers += [nn.Linear(hidden_features, out_features)]
        
        self.layers = nn.Sequential(*self.layers)
    
    def forward(self, coords):
        #output = torch.sigmoid(self.layers(coords))
        output = self.layers(coords)
        return output
    
    def weights(self):
        weights = []
        for l in self.layers:
            if isinstance(l, nn.Linear):
                weights.append(l.weight.data.detach())
        return weights
                
    def biases(self):
        biases = []
        for l in self.layers:
            if isinstance(l, nn.Linear):
                biases.append(l.bias.data.detach())      
        return biases

In [3]:
class SDFMesh():
  def __init__(self, filename, nsamples):
    mesh = trimesh.load(filename)

    c0, c1 = mesh.vertices.min(0) - 1e-3, mesh.vertices.max(0) + 1e-3
    self.corners = [c0, c1]

    # print("recentering... ", mesh.vertices[0])
    # mesh.vertices -= mesh.vertices.mean(0)
    # mesh.vertices /= np.max(np.abs(mesh.vertices))
    # mesh.vertices = .5 * (mesh.vertices)
    # print("done... ", mesh.vertices[0])

    #mesh, number_of_points = 500000, surface_point_method='scan', sign_method='normal', scan_count=100, scan_resolution=400, sample_point_count=10000000, normal_sample_count=11, min_size=0
    #surface_point_cloud = get_surface_point_cloud(mesh, surface_point_method='sample')
    surface_point_cloud = get_surface_point_cloud(mesh, surface_point_method='scan', scan_count=20, scan_resolution=400)
    self.coords, self.samples = surface_point_cloud.sample_sdf_near_surface(nsamples//2, use_scans=False, sign_method='normal')
    unit_sphere_points = sample_uniform_points_in_unit_sphere(nsamples//2)
    samples = surface_point_cloud.get_sdf_in_batches(unit_sphere_points, use_depth_buffer=False)
    self.coords = np.concatenate([self.coords, unit_sphere_points]).astype(np.float32)
    self.samples = np.concatenate([self.samples, samples]).astype(np.float32)

    # self.samples = self.samples[self.coords[:, 0] < 0]
    # self.coords = self.coords[self.coords[:, 0] < 0]

    print(self.corners, self.coords.shape, self.samples.shape)

class SDFDataset(Dataset):
    def __init__(self, coords, samples):
        super().__init__()
        self.samples = torch.from_numpy(samples)
        self.coords = torch.from_numpy(coords)

    def __len__(self):
      return self.coords.shape[0]

    def __getitem__(self, index):
      if index > self.coords.shape[0]: raise IndexError
      return self.coords[index,:], torch.tensor([self.samples[index]])

In [4]:
def train_OverfitShape(sdf, config, worker_fn):    
    epochs = config['epochs']
    learning_rate = config['learning_rate']
    batch_size = config['batch_size']
    num_batches = config['samples'] / batch_size

    model = None
    model = MLP(in_features=3, out_features=1, hidden_layers=config['hidden_layers'], hidden_features=config['hidden_features'])
        
    if USE_GPU:
        model.cuda()
        
    model.train(True)
    dataset = SDFDataset(sdf.coords, sdf.samples)
    
    dataloader = DataLoader(dataset=dataset,
                            batch_size=batch_size, 
                            shuffle=config['shuffle_dataset'],
                            num_workers=0,
                            pin_memory=True,
                            worker_init_fn=worker_fn)
    
    optimizer = torch.optim.Adam(lr=learning_rate, params=model.parameters(),
                                 weight_decay=config['weight_decay'])
    loss_func = nn.MSELoss(reduction='mean')

    ### Actual training loop    
    for e in range(epochs):
        count = 0
        epoch_loss = 0

        for batch_idx, (x_train, y_train) in enumerate(dataloader):
            x_train, y_train = x_train.to(device), y_train.to(device)
            count += batch_size
            optimizer.zero_grad()
            y_pred = model(x_train)
            loss = loss_func(y_pred, y_train)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            
        epoch_loss /= num_batches
        
        ## Logging
        if e % 2 == 0:
            print('Logging epoch ')
            msg = '{}\tEpoch: {}:\t[{}/{}]\tloss: {:.6f}'.format(
                "monke", e + 1, count, len(dataset), epoch_loss)
            print(msg)
            
    return model

In [5]:
mesh_files = {
    'sphere1'    : 'sphere1.obj',
}

samples = 128*128*4

default_config = dict(
    mesh='',
    architecture='OverfitShape',
    epochs=10,
    samples=samples,
    batch_size=64,
    learning_rate=0.001,
    weight_decay=0.0001,
    hidden_layers=3,
    hidden_features=16,
    shuffle_dataset=False
)

def plt_weights(sdf):
    for w in sdf.weights():
        plt.imshow(dump_data(w), cmap = 'gray')
        plt.show()
        
def dump_data(dat):
  dat = dat.cpu().detach().numpy()
  return dat

# Determinism test

Before setting the use_deterministic_algorithms flag to true, one needs to restart the notebook and 
change a cuda related environment variable: CUBLAS_WORKSPACE_CONFIG=:4096:8

see https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility for more info.

- Enabling this debug workspace for cuda alone does not seem influence determinism of training.
- The Dataloader needs to have num_workers=0 or a init fn where np.random.seed is set.
- enabling torch.backends.cudnn.benchmark can lead to nondeterminism but in this case didn't
- torch.backends.cudnn.deterministic didn't change anything about the training randomness
- neither did torch.use_deterministic_algorithms

https://github.com/pytorch/pytorch/issues/7068#issuecomment-484918113


In [8]:
torch.cuda.manual_seed_all(25)
torch.manual_seed(25)
torch.cuda.manual_seed(25)
np.random.seed(25)
random.seed(25)

default_config['shuffle_dataset']=False

torch.backends.cudnn.benchmark = True
torch.use_deterministic_algorithms(False)
torch.backends.cudnn.deterministic = False

def _init_fn(worker_id):
   np.random.seed(int(25))

print("Run Time: {}".format(dt.datetime.now()))
sphere1_sdfmesh = SDFMesh(mesh_files['sphere1'], samples)
sphere1 = train_OverfitShape(sphere1_sdfmesh, default_config, _init_fn)
print("training done.")

Run Time: 2021-11-04 17:51:39.560132
[TrackedArray([-0.701, -0.701, -0.701]), TrackedArray([0.701, 0.701, 0.701])] (65536, 3) (65536,)
Logging epoch 
monke	Epoch: 1:	[65536/65536]	loss: 0.005563
Logging epoch 
monke	Epoch: 3:	[65536/65536]	loss: 0.000359
Logging epoch 
monke	Epoch: 5:	[65536/65536]	loss: 0.000248
Logging epoch 
monke	Epoch: 7:	[65536/65536]	loss: 0.000218
Logging epoch 
monke	Epoch: 9:	[65536/65536]	loss: 0.000203
training done.


In [None]:
plt_weights(sphere1)