In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import ChebConv  # Using Chebyshev convolution
from torch_geometric.nn import knn_graph
from torch_geometric.utils import get_laplacian, get_mesh_laplacian
# from torch_sparse import coalesce
from torch_geometric.utils import degree

from embedder_dataset import EmbedderDataset
# from torch.utils.data import DataLoader
from torch_geometric.data import Data, DataLoader

from models import *
from misc import compute_edge_indices, compute_laplacian#, compute_batched_edge_indices
from point_cloud_datasest import PointCloudDataset
from torch.utils.tensorboard import SummaryWriter
torch.__version__, "cuda available" if torch.cuda.is_available() else "cpu only"

('2.4.0+cu124', 'cuda')

In [2]:
device = "cpu" if not torch.cuda.is_available() else "cuda"

# ds_dir = "datasets_embedded"
# ds_fname = "ds_1024.pth" # 1024 points per shape
ds_path = r"C:\Projects\multiSetup\datasets_embedded\ds_1024.pth"
ds = torch.load(ds_path)
dataset_data = ds.data

print(f"Using {device} device, ds length - {len(ds)}, data.shape - {dataset_data.shape}")

EPOCHS = 200
BATCH_SIZE = 128
SPLIT_FACTOR = .8
TRAIN_MODE = (False, True)[1]
REMOVE_OLD_MODELS = True
POINTS_PER_SHAPE = 1024
POINT_DIM = 3


  ds = torch.load(ds_path)


Using cuda device, ds length - 27571, data.shape - (27571, 3072)


In [3]:
diffision_ae_ds = PointCloudDataset(torch.from_numpy(dataset_data), k=6)
len(diffision_ae_ds)
train_loader = DataLoader(diffision_ae_ds, batch_size=BATCH_SIZE, shuffle=True)



In [4]:
# batch = next(iter(train_loader))
# batch # DataBatch(x=[131072, 3], edge_index=[2, 786432], edge_weight=[786432], batch=[131072], ptr=[129])

In [5]:
def train_autoencoder(model, train_loader, device, epochs=1000, lr=1e-3, log_dir="runs/autoencoder"):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()

    # Initialize TensorBoard writer
    writer = SummaryWriter(log_dir=log_dir)

    best_loss = float('inf')  # Initialize best loss to infinity
    best_model_path = None

    for epoch in range(epochs):
        epoch_loss = 0.0
        for batch_idx, batch in enumerate(train_loader):
            x = batch.x  # Node features for all graphs in the batch
            edge_index = batch.edge_index  # Edge indices for all graphs in the batch
            edge_weight = batch.edge_weight  # Edge weights (Laplacian) for all graphs in the batch
            model.train()
            optimizer.zero_grad()

            # Move data to the specified device (GPU or CPU)
            x = x.to(device)  # torch.Size([128, 1024, 3])
            edge_index = edge_index.to(device)  # torch.Size([128, 2, 6144])
            edge_weight = edge_weight.to(device)

            # Forward pass
            reconstructed = model(x, edge_index, edge_weight=edge_weight)
            loss = loss_fn(reconstructed, x)

            # Backward pass
            loss.backward()
            optimizer.step()

            # Accumulate loss for the epoch
            epoch_loss += loss.item()

            # Log batch-level loss to TensorBoard
            writer.add_scalar('Loss/train_batch', loss.item(), epoch * len(train_loader) + batch_idx)

        # Average loss per epoch
        avg_epoch_loss = epoch_loss / len(train_loader)

        # Log epoch-level loss to TensorBoard
        writer.add_scalar('Loss/train_epoch', avg_epoch_loss, epoch)

        # Print progress
        print(f'Epoch {epoch}/{epochs}, Loss: {avg_epoch_loss}')

        # Check if the current model is the best one (based on loss)
        if avg_epoch_loss < best_loss:
            best_loss = avg_epoch_loss
            best_model_path = f"models/best_model_epoch_{epoch}.pth"
            torch.save(model.state_dict(), best_model_path)
            print(f"Best model saved with loss {best_loss} at epoch {epoch}")

    # Close TensorBoard writer when training is complete
    writer.close()

    # Return the path to the best model for further usage
    return best_model_path

# Initialize and train the model
hidden_features = 64
latent_dim = 32

model = DiffusionNetAutoencoder(POINT_DIM, hidden_features, latent_dim).to(device)

# Call the training function with TensorBoard logging
best_model_path = train_autoencoder(model, train_loader, device)
print(f"Best model saved at: {best_model_path}")

Epoch 0/1000, Loss: 11.172327995300293
Epoch 1/1000, Loss: 11.42244815826416
Epoch 2/1000, Loss: 11.099620819091797
Epoch 3/1000, Loss: 11.34311294555664
Epoch 4/1000, Loss: 11.059738159179688
Epoch 5/1000, Loss: 11.078689575195312


KeyboardInterrupt: 

In [8]:
"get_laplacian: IndexError: The shape of the mask [2, 6144] at index 0 does not match the shape of the indexed tensor [128, 2, 7168] at index 0"
"norm_laplacian: IndexError: The shape of the mask [2, 6144] at index 0 does not match the shape of the indexed tensor [128, 1024, 1024] at index 0"
6144/1024, 7168/1024

(6.0, 7.0)