In [1]:
import deeppy_project as dp

import torch
import numpy as np


LearnFrame = dp.LearnFrame
SANE = dp.models.cv.Sane
IngpDataset = dp.dataset.IngpDataset
UniquePerBatchSampler = dp.sampler.UniquePerBatchSampler
DatasetLoader = dp.DatasetLoader


import shutil
try:
    shutil.rmtree('logs')
except:
    pass

# Configure Hyper Parameters

In [2]:
#Sequence Length
pos_token_size = 10
mlp_token_size = 53
hash_token_size = 380 - pos_token_size - mlp_token_size
window_size = pos_token_size + mlp_token_size + hash_token_size

#Training Parameters
batch_size = 32
gradient_accumulation_steps = 1
lr = 2e-4
total_epocs = 20
gamma = np.asarray([0.4, 0.05, 0.53, 0.02])
gamma = gamma / np.sum(gamma)

#Transformer Parameters
input_dim = 256
embed_dim = 256 * 4
latent_dim = 128
num_heads = 4
num_layers = 4
dropout = 0.3
bias = False
projection_dim = 128





In [3]:
data_config = {
    'hash_encoding': {
        'num_levels': 16,
        'level_dim': 2,
        'input_dim': 3,
        'log2_hashmap_size': 19,
        'base_resolution': 16
    },
    'mlp': {
        'num_layers': 3,  # Number of layers in geometric MLP
        'hidden_dim': 64,  # Hidden dimension size
    }
}

# Initialize CUDA environment

In [4]:
dp.env_config.use_amp = True
dp.env_config.torch_compile = True
print(dp.env_config.device)
print(dp.env_config.log_dir)
print(dp.env_config.checkpoint_dir)

cuda
logs
checkpoints


# Configure DatasetLoader

In [5]:
#Configure IngpDataset object
#Similar to torch dataset
#__len__
#__getitem__(idx)
dataset_config = {
    "data_path": "../data/",
    "config": data_config,
    "window_size": window_size,
    "token_size": input_dim,
    "pos_token_size" : pos_token_size,
    "mlp_token_size" : mlp_token_size,
    "permutation_augment" : True,
    "data_buffer_size" : 1
}

dataset = IngpDataset(**dataset_config)

#Create torch dataloader arguments
#Shuffle = False because of UniquePerBatchSampler
#The sampler will shuffle the indices
dataloader_args = {
    "num_workers" : 24,
    "prefetch_factor" : 3,
    "shuffle" : False,
    "pin_memory" : True,
    "persistent_workers" : True,
    "drop_last" : True
}

#Create deeppy datasetloader object
#Automatically create test-valid splits
#Automatically handles dataloaders
#UniquePerBatchSampler
     # it increase len(dataset) -> len(dataset) * repeat
     # it still sample indices without replacement untill the dataset is exhausted
     # it guarantees that only one instance of an object is in the batch, so different views of the same object
            #is not compared to itself as a negative sample
datasetloader_args = {
    "data" : dataset,
    "batch_size" : batch_size,
    "splits" : [0.88, 0.12, 0], 
    "dataloader_args" : dataloader_args,
    "sampler" : UniquePerBatchSampler,
    "sampler_args" : {"num_repeats" : 20000},
}


data = DatasetLoader(**datasetloader_args)
print(f"train dataset size : {len(data.train_dataset)}")
print(f"train dataloader size : {len(data.train_loader)}")
print(f"test dataset size : {len(data.test_dataset)}")
print(f"test dataset size : {len(data.test_loader)}")


max_positions = data.train_dataset.dataset.max_positions


train dataset size : 302
train dataloader size : 180000
test dataset size : 41
test dataset size : 20000


In [6]:
#Calculate total parameter numbers in the dataset to calculate how many steps is an epoch
n_object = len(data.train_dataset) * 3 #One object and n augmentation
n_param = 7000000
t_param = n_object * n_param

pass_param = (hash_token_size) * (input_dim / 2) * (batch_size * gradient_accumulation_steps)



epochs = int(t_param/pass_param) * total_epocs

save_freq = 5000
steps = ((epochs + save_freq - 1) // save_freq) * save_freq
print(f"epoch : {total_epocs} - steps : {steps}")


epoch : 20 - steps : 100000


# SANE Config

In [7]:
#Create OneCycleLR scheduler (deeppy.nn.optimizer.scheduler)
Scheduler_params = {
                "scheduler" : torch.optim.lr_scheduler.OneCycleLR,
                "auto_step":True,
                 "max_lr": lr,
                "total_steps": steps,
                "pct_start": 0.20,
                "anneal_strategy": "cos",
                "cycle_momentum": True,
                "base_momentum": 0.85,
                "max_momentum": 0.95,
                "div_factor": 25,
                "final_div_factor": 10000.0,
                "three_phase": False,
                "last_epoch": -1,
}

#Create optimizer (deeppy.nn.optimizer.optimizer)
Optimizer_params = {
    "optimizer":torch.optim.AdamW,
    "optimizer_args":{"lr":lr,  "weight_decay" : 1e-3, "fused" : True, "amsgrad":False},
    "clipper":torch.nn.utils.clip_grad_norm_,
    "clipper_params":{"max_norm" : 5.0},
    "scheduler_params":Scheduler_params,
    "gradient_accumulation_steps" : gradient_accumulation_steps
}

#Create Sane
Sane_params = {
    "optimizer_params":Optimizer_params,
    "max_positions" : max_positions,
    "input_dim":input_dim,
    "latent_dim":latent_dim,
    "projection_dim" : projection_dim,
    "embed_dim":embed_dim,
    "num_heads":num_heads,
    "num_layers":num_layers,
    "context_size":window_size,
    "dropout":dropout,
    "bias" : bias,
    "gamma" : gamma,
    "ntx_temp" : 0.1,
    "pos_token_size" : pos_token_size,
    "noise_augment" : 0.1,
}


model = SANE(**Sane_params)



In [8]:
print("Autoencoder")
net = 0
print(model.nets[net])


len_parameters = len(list(model.nets[net].parameters()))

op_params = []
for group in model.optimizer.optimizer.param_groups:
    for p in group["params"]:
        if f"Net{net}" in p.dpname:
            op_params.append(p.dpname)
print(f"Number of parameters (as tensors) : {len_parameters}")
print(f"Number of parameters in optimizer (as tensors) : {len(op_params)}")

Autoencoder
Network(
  (model): Sequential(
    (0): SaneLinearTokenizerBeforePosition(
      (linear_hash): Linear(in_features=256, out_features=1024, bias=True)
      (linear_mlp): Linear(in_features=256, out_features=1024, bias=True)
    )
    (1): SaneXYZPositionalEmbedding(
      (hash_xyz): Linear(in_features=3, out_features=1024, bias=True)
      (hash_index_global): ChunkwisePositionalEmbedding(
        (positional_embedding): Embedding(6098109, 8)
      )
      (hash_layer): ChunkwisePositionalEmbedding(
        (positional_embedding): Embedding(16, 8)
      )
      (hash_index_layerwise): ChunkwisePositionalEmbedding(
        (positional_embedding): Embedding(700000, 8)
      )
      (mlp_embed): Embedding(63, 1024)
    )
    (2): Dropout(p=0.3, inplace=False)
    (3): TransformerEncoder(
      (layers): ModuleList(
        (0-3): 4 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=1024, o

In [9]:
print("NTX HEAD")
net = 1
print(model.nets[net])


len_parameters = len(list(model.nets[net].parameters()))

op_params = []
for group in model.optimizer.optimizer.param_groups:
    for p in group["params"]:
        if f"Net{net}" in p.dpname:
            op_params.append(p.dpname)
print(f"Number of parameters (as tensors) : {len_parameters}")
print(f"Number of parameters in optimizer (as tensors) : {len(op_params)}")

NTX HEAD
Network(
  (model): OptimizedModule(
    (_orig_mod): Sequential(
      (0): AttentionPooling()
      (1): Linear(in_features=128, out_features=128, bias=True)
      (2): ReLU()
      (3): Linear(in_features=128, out_features=64, bias=True)
      (4): Identity()
    )
  )
)
Number of parameters (as tensors) : 5
Number of parameters in optimizer (as tensors) : 5


In [10]:
print("Rotation HEAD")
net = 2
print(model.nets[net])


len_parameters = len(list(model.nets[net].parameters()))

op_params = []
for group in model.optimizer.optimizer.param_groups:
    for p in group["params"]:
        if f"Net{net}" in p.dpname:
            op_params.append(p.dpname)
print(f"Number of parameters (as tensors) : {len_parameters}")
print(f"Number of parameters in optimizer (as tensors) : {len(op_params)}")

Rotation HEAD
Network(
  (model): OptimizedModule(
    (_orig_mod): Sequential(
      (0): concatInputsWithPosition(
        (unique_pos): Embedding(10, 128)
        (layer_pos): Embedding(2, 128)
        (rot_token): Embedding(1, 128)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=256, bias=True)
        (dropout): Dropout(p=0.3, inplace=False)
        (linear2): Linear(in_features=256, out_features=128, bias=True)
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.3, inplace=False)
        (dropout2): Dropout(p=0.3, inplace=False)
        (activation): GELU(approximate='none')
      )
      (2): getFirstTokenOutput()
      (3): Linear(in_features=128, out_feature

# Create a LearnFrame

In [11]:
lf = LearnFrame(model,data, initialze= True)

Checkpoint directory already exists


In [None]:
lf.train(test_freq=250, save_freq=10000, steps = steps, test_steps=gradient_accumulation_steps)

0.05%|[30m[90m░░░░░░░░░░[0m| 000049/100000 [0:00:43<24:41:41, 1.12it/s]