In [1]:
import os
import torch

from points_dataset import EmbedderDataset
from torch_geometric.data import Data, DataLoader

from models import *
from point_cloud_dataset import PointCloudDataset
from torch.utils.tensorboard import SummaryWriter
torch.__version__, "cuda available" if torch.cuda.is_available() else "cpu only"

('2.4.0+cu124', 'cuda available')

In [2]:
device = "cpu" if not torch.cuda.is_available() else "cuda"

# ds_path = r"C:\Projects\multiSetup\datasets_embedded\ds_1024.pth"
# point_cloud_ds_path = r"C:\Projects\multiSetup\datasets_embedded\ds_256.pth"
point_cloud_ds_path = r"dataset_points\ds_1024_head.pth"
point_cloud_ds = torch.load(point_cloud_ds_path)
point_cloud_dataset_data = point_cloud_ds.data

print(f"Using {device} device, ds length - {len(point_cloud_ds)}, data.shape - {point_cloud_dataset_data.shape}")

EPOCHS = 1500
BATCH_SIZE = 128
SPLIT_FACTOR = .8
TRAIN_MODE = (False, True)[1]
REMOVE_OLD_MODELS = True
# POINTS_PER_SHAPE = 256
POINT_DIM = 3

# Initialize model parameters
in_features = POINT_DIM # 
hidden_features = 32
latent_dim = 2

model_dir = f"models{POINT_DIM}{hidden_features}{latent_dim}"
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
    

  point_cloud_ds = torch.load(point_cloud_ds_path)


Using cuda device, ds length - 27341, data.shape - (27341, 3072)


In [3]:
diffision_ae_ds = PointCloudDataset(torch.from_numpy(point_cloud_dataset_data), k=6)
len(diffision_ae_ds)
train_loader = DataLoader(diffision_ae_ds, batch_size=BATCH_SIZE, shuffle=True)



In [4]:
# batch = next(iter(train_loader))
# batch # DataBatch(x=[131072, 3], edge_index=[2, 786432], edge_weight=[786432], batch=[131072], ptr=[129])

: 

In [5]:
def train_autoencoder(model, train_loader, device, epochs=EPOCHS, lr=1e-3):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()

    # Initialize TensorBoard writer
    writer = SummaryWriter()

    best_loss = float('inf')  # Initialize best loss to infinity
    best_model_path = None

    for epoch in range(epochs):
        epoch_loss = 0.0
        for batch_idx, batch in enumerate(train_loader):
            x = batch.x  # Node features for all graphs in the batch
            edge_index = batch.edge_index  # Edge indices for all graphs in the batch
            edge_weight = batch.edge_weight  # Edge weights (Laplacian) for all graphs in the batch
            model.train()
            optimizer.zero_grad()

            # Move data to the specified device (GPU or CPU)
            x = x.to(device)  # torch.Size([128, 1024, 3])
            edge_index = edge_index.to(device)  # torch.Size([128, 2, 6144])
            edge_weight = edge_weight.to(device)

            # Forward pass
            reconstructed, _ = model(x, edge_index, edge_weight=edge_weight)
            loss = loss_fn(reconstructed, x)

            # Backward pass
            loss.backward()
            optimizer.step()

            # Accumulate loss for the epoch
            epoch_loss += loss.item()

            # Log batch-level loss to TensorBoard
            writer.add_scalar('Loss/train_batch', loss.item(), epoch * len(train_loader) + batch_idx)

        # Average loss per epoch
        avg_epoch_loss = epoch_loss / len(train_loader)

        # Log epoch-level loss to TensorBoard
        writer.add_scalar('Loss/train_epoch', avg_epoch_loss, epoch)

        # Print progress
        print(f'Epoch {epoch}/{epochs}, Loss: {avg_epoch_loss}')

        # Check if the current model is the best one (based on loss)
        if avg_epoch_loss < best_loss and epoch>300:
            best_loss = avg_epoch_loss
            best_model_path = f"models/autoencoder_epoch_{epoch}.pth"
            torch.save(model.state_dict(), best_model_path)
            # torch.save(model.encoder.state_dict(), f"models/encoder_{epoch}.pth")
            # torch.save(model.decoder.state_dict(), f"models/decoder_{epoch}.pth")

            print(f"Best model saved with loss {best_loss} at epoch {epoch}")

    # Close TensorBoard writer when training is complete
    writer.close()

    # Return the path to the best model for further usage
    return best_model_path



model = DiffusionNetAutoencoder(in_features, hidden_features, latent_dim).to(device)

# Call the training function with TensorBoard logging
best_model_path = train_autoencoder(model, train_loader, device)
print(f"Best model saved at: {best_model_path}")

Epoch 0/1500, Loss: 317.0971529238692
Epoch 1/1500, Loss: 49.84152556802625
Epoch 2/1500, Loss: 35.88661191173803
Epoch 3/1500, Loss: 28.485988153475468
Epoch 4/1500, Loss: 23.62676059865506
Epoch 5/1500, Loss: 20.214736207623346
Epoch 6/1500, Loss: 17.623887436412204
Epoch 7/1500, Loss: 15.666801162969286
Epoch 8/1500, Loss: 14.182738206096898
Epoch 9/1500, Loss: 13.062825657496942
Epoch 10/1500, Loss: 12.066845421479128
Epoch 11/1500, Loss: 11.191836704717618
Epoch 12/1500, Loss: 10.427334776548582
Epoch 13/1500, Loss: 9.851999220447006
Epoch 14/1500, Loss: 9.484412901869444
Epoch 15/1500, Loss: 8.747686481921471
Epoch 16/1500, Loss: 8.340036846766962
Epoch 17/1500, Loss: 7.857043865685151
Epoch 18/1500, Loss: 7.44783677341782
Epoch 19/1500, Loss: 7.104765722684771
Epoch 20/1500, Loss: 6.722418693738563
Epoch 21/1500, Loss: 6.476321169149096
Epoch 22/1500, Loss: 6.128958191827079
Epoch 23/1500, Loss: 6.064482998625141
Epoch 24/1500, Loss: 5.6072931713032945
Epoch 25/1500, Loss: 5.407

In [None]:
23768*3, 768*384
