In [1]:
import torch

from points_dataset import EmbedderDataset
from torch_geometric.data import Data, DataLoader

from models import *
from point_cloud_dataset import PointCloudDataset
from torch.utils.tensorboard import SummaryWriter
torch.__version__, "cuda available" if torch.cuda.is_available() else "cpu only"

('2.4.0+cu124', 'cuda available')

In [2]:
device = "cpu" if not torch.cuda.is_available() else "cuda"

# ds_path = r"C:\Projects\multiSetup\datasets_embedded\ds_1024.pth"
# point_cloud_ds_path = r"C:\Projects\multiSetup\datasets_embedded\ds_256.pth"
point_cloud_ds_path = r"dataset_points\ds_1024_head.pth"
point_cloud_ds = torch.load(point_cloud_ds_path)
point_cloud_dataset_data = point_cloud_ds.data

print(f"Using {device} device, ds length - {len(point_cloud_ds)}, data.shape - {point_cloud_dataset_data.shape}")

EPOCHS = 560
BATCH_SIZE = 128
SPLIT_FACTOR = .8
TRAIN_MODE = (False, True)[1]
REMOVE_OLD_MODELS = True
POINTS_PER_SHAPE = 1024
POINT_DIM = 3


  point_cloud_ds = torch.load(point_cloud_ds_path)


Using cuda device, ds length - 27341, data.shape - (27341, 3072)


In [3]:
diffision_ae_ds = PointCloudDataset(torch.from_numpy(point_cloud_dataset_data), k=6)
len(diffision_ae_ds)
train_loader = DataLoader(diffision_ae_ds, batch_size=BATCH_SIZE, shuffle=True)



In [4]:
# batch = next(iter(train_loader))
# batch # DataBatch(x=[131072, 3], edge_index=[2, 786432], edge_weight=[786432], batch=[131072], ptr=[129])

In [5]:
def train_autoencoder(model, train_loader, device, epochs=EPOCHS, lr=1e-3, log_dir="runs/autoencoder"):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()

    # Initialize TensorBoard writer
    writer = SummaryWriter(log_dir=log_dir)

    best_loss = float('inf')  # Initialize best loss to infinity
    best_model_path = None

    for epoch in range(epochs):
        epoch_loss = 0.0
        for batch_idx, batch in enumerate(train_loader):
            x = batch.x  # Node features for all graphs in the batch
            edge_index = batch.edge_index  # Edge indices for all graphs in the batch
            edge_weight = batch.edge_weight  # Edge weights (Laplacian) for all graphs in the batch
            model.train()
            optimizer.zero_grad()

            # Move data to the specified device (GPU or CPU)
            x = x.to(device)  # torch.Size([128, 1024, 3])
            edge_index = edge_index.to(device)  # torch.Size([128, 2, 6144])
            edge_weight = edge_weight.to(device)

            # Forward pass
            reconstructed, _ = model(x, edge_index, edge_weight=edge_weight)
            loss = loss_fn(reconstructed, x)

            # Backward pass
            loss.backward()
            optimizer.step()

            # Accumulate loss for the epoch
            epoch_loss += loss.item()

            # Log batch-level loss to TensorBoard
            writer.add_scalar('Loss/train_batch', loss.item(), epoch * len(train_loader) + batch_idx)

        # Average loss per epoch
        avg_epoch_loss = epoch_loss / len(train_loader)

        # Log epoch-level loss to TensorBoard
        writer.add_scalar('Loss/train_epoch', avg_epoch_loss, epoch)

        # Print progress
        print(f'Epoch {epoch}/{epochs}, Loss: {avg_epoch_loss}')

        # Check if the current model is the best one (based on loss)
        if avg_epoch_loss < best_loss:
            best_loss = avg_epoch_loss
            best_model_path = f"models/best_model_epoch_{epoch}.pth"
            torch.save(model.state_dict(), best_model_path)
            torch.save(model.encoder.state_dict(), f"models/encoder_{epoch}.pth")
            torch.save(model.decoder.state_dict(), f"models/decoder_{epoch}.pth")

            print(f"Best model saved with loss {best_loss} at epoch {epoch}")

    # Close TensorBoard writer when training is complete
    writer.close()

    # Return the path to the best model for further usage
    return best_model_path

# Initialize and train the model
hidden_features = 64
latent_dim = 32

model = DiffusionNetAutoencoder(POINT_DIM, hidden_features, latent_dim).to(device)

# Call the training function with TensorBoard logging
best_model_path = train_autoencoder(model, train_loader, device)
print(f"Best model saved at: {best_model_path}")

Epoch 0/560, Loss: 504.8459463386892
Best model saved with loss 504.8459463386892 at epoch 0
Epoch 1/560, Loss: 38.80685969379461
Best model saved with loss 38.80685969379461 at epoch 1
Epoch 2/560, Loss: 23.692380664504576
Best model saved with loss 23.692380664504576 at epoch 2
Epoch 3/560, Loss: 17.21518500497408
Best model saved with loss 17.21518500497408 at epoch 3
Epoch 4/560, Loss: 13.804099216639438
Best model saved with loss 13.804099216639438 at epoch 4
Epoch 5/560, Loss: 11.518624417135648
Best model saved with loss 11.518624417135648 at epoch 5
Epoch 6/560, Loss: 9.96742689275296
Best model saved with loss 9.96742689275296 at epoch 6
Epoch 7/560, Loss: 8.83157965847265
Best model saved with loss 8.83157965847265 at epoch 7
Epoch 8/560, Loss: 7.93142168098521
Best model saved with loss 7.93142168098521 at epoch 8
Epoch 9/560, Loss: 7.06536657565108
Best model saved with loss 7.06536657565108 at epoch 9
Epoch 10/560, Loss: 6.318931742249248
Best model saved with loss 6.31893

KeyboardInterrupt: 