In [1]:
import os
os.chdir('D:/mesh-and-bones-to-rig') # Change to the right directory.

In [2]:
import torch
from torch_geometric.loader import DataLoader

In [3]:
from data.dataset import MeshBonesToRigDataset
from src.models.mesh_bones_to_rig import MeshBonesToRigNet
from noteboorks.training_utils import train_epoch, validate_epoch, test_model

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [4]:
# Assume the dataset preprocessed files are stored in this folder.
root_dir = "./data/model_data/ModelResource_MeshAndBonesToRig_preproccessed_reduced_subset/"
cache_dir = os.path.join(root_dir, "precomputed")

In [5]:
# Read split files (each file contains a list of model base names, e.g., "123", "456", etc.)
with open(os.path.join(root_dir, "train_final.txt"), 'r') as f:
    train_names = [line.strip() for line in f.readlines()]
with open(os.path.join(root_dir, "val_final.txt"), 'r') as f:
    val_names = [line.strip() for line in f.readlines()]
with open(os.path.join(root_dir, "test_final.txt"), 'r') as f:
    test_names = [line.strip() for line in f.readlines()]

In [6]:
train_names.remove("1112") # Ignore that mesh... we have no normals for it... (see notebook 1)

In [7]:
# A dry run for testing, that is why we get the first 4 meshes.
train_names = train_names[:4]
val_names = val_names[:4]
test_names = test_names[:4]

In [8]:
# Check the first 4 meshes in the train set.
train_names

['13173', '9811', '3624', '708']

In [9]:
# Create datasets by filtering using allowed_names.
train_dataset = MeshBonesToRigDataset(root_dir=root_dir, cache_dir=cache_dir, k=8, allowed_names=train_names)
val_dataset   = MeshBonesToRigDataset(root_dir=root_dir, cache_dir=cache_dir, k=8, allowed_names=val_names)
test_dataset  = MeshBonesToRigDataset(root_dir=root_dir, cache_dir=cache_dir, k=8, allowed_names=test_names)

In [10]:
# Try with batch size 1 for a start.
batch_size = 1

In [11]:
# Create data loaders.
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [12]:
train_loader.dataset.__getitem__(0)

Data(vertices=[1557, 3], edge_index_geodesic=[2, 12456], edge_attr_geodesic=[12456, 1], vertex_neighbors=[1557, 8], vertex_adj=[1557, 1557], vertex_normals=[1557, 3], bone_positions=[28, 3], bone_features=[28, 8], bone_adj=[28, 28], target_skin_weights=[1557, 28], volumetric_geodesic=[1557, 28], surface_geodesic=[1557, 1557], num_nodes=1557)

In [13]:
# Choose gpu if it is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [14]:
# Instantiate the model with default parameters.
model = MeshBonesToRigNet()
model.to(device)

MeshBonesToRigNet(
  (mesh_encoder): MeshEncoder(
    (convs): ModuleList(
      (0): SplineConv(3, 128, dim=1)
      (1): SplineConv(128, 128, dim=1)
      (2): SplineConv(128, 256, dim=1)
    )
    (activation): ReLU()
  )
  (bone_encoder): BoneEncoder(
    (convs): ModuleList(
      (0): GCNConv(8, 64)
      (1): GCNConv(64, 64)
    )
    (activation): ReLU()
  )
  (fusion): FusionModule(
    (proj_vertex): Linear(in_features=256, out_features=128, bias=True)
    (proj_bone): Linear(in_features=64, out_features=128, bias=True)
  )
  (refinement): RefinementModule()
)

In [15]:
model.train()

MeshBonesToRigNet(
  (mesh_encoder): MeshEncoder(
    (convs): ModuleList(
      (0): SplineConv(3, 128, dim=1)
      (1): SplineConv(128, 128, dim=1)
      (2): SplineConv(128, 256, dim=1)
    )
    (activation): ReLU()
  )
  (bone_encoder): BoneEncoder(
    (convs): ModuleList(
      (0): GCNConv(8, 64)
      (1): GCNConv(64, 64)
    )
    (activation): ReLU()
  )
  (fusion): FusionModule(
    (proj_vertex): Linear(in_features=256, out_features=128, bias=True)
    (proj_bone): Linear(in_features=64, out_features=128, bias=True)
  )
  (refinement): RefinementModule()
)

In [16]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 3  # Set as needed.

In [17]:
# Training loop.
for epoch in range(num_epochs):
    train_loss = train_epoch(model, train_loader, optimizer, device, epoch)
    val_loss = validate_epoch(model, val_loader, device)
    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f}")

Loss Skin: 9.162877, Loss Geo: 12.386421, Loss Smooth: 0.000129, Total loss: 21.549427
Loss Skin: 6.694206, Loss Geo: 11.882845, Loss Smooth: 0.000208, Total loss: 18.577259
Loss Skin: 8.906940, Loss Geo: 12.154030, Loss Smooth: 0.000215, Total loss: 21.061186
Loss Skin: 2.941505, Loss Geo: 9.127338, Loss Smooth: 0.000698, Total loss: 12.069541
Loss Skin: 5.145375, Loss Geo: 12.079281, Loss Smooth: 0.000318, Total loss: 17.224974
Loss Skin: 7.345552, Loss Geo: 11.310163, Loss Smooth: 0.000627, Total loss: 18.656343
Loss Skin: 6.726935, Loss Geo: 12.166236, Loss Smooth: 0.000281, Total loss: 18.893454
Loss Skin: 6.762665, Loss Geo: 12.170872, Loss Smooth: 0.000298, Total loss: 18.933834
Epoch 1/3 - Train Loss: 18.3144 - Val Loss: 18.4272
Loss Skin: 2.963648, Loss Geo: 9.119812, Loss Smooth: 0.000731, Total loss: 12.084190
Loss Skin: 8.259687, Loss Geo: 12.148945, Loss Smooth: 0.000288, Total loss: 20.408920
Loss Skin: 6.014644, Loss Geo: 11.864418, Loss Smooth: 0.000196, Total loss: 17.

In [18]:
# Testing.
test_loss = test_model(model, test_loader, device)
print(f"Test Loss: {test_loss:.4f}")

Loss Skin: 4.655128, Loss Geo: 11.870607, Loss Smooth: 0.000157, Total loss: 16.525892
Loss Skin: 5.974240, Loss Geo: 12.062287, Loss Smooth: 0.000241, Total loss: 18.036768
Loss Skin: 5.214540, Loss Geo: 12.567119, Loss Smooth: 0.000146, Total loss: 17.781803
Loss Skin: 5.257133, Loss Geo: 12.578658, Loss Smooth: 0.000142, Total loss: 17.835936
Test Loss: 17.5451


The pipeline seems to be okay.

In [19]:
# Try with batch size 2.
batch_size = 2

In [20]:
# Create data loaders.
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [22]:
# Training loop.
for epoch in range(num_epochs):
    train_loss = train_epoch(model, train_loader, optimizer, device, epoch)
    val_loss = validate_epoch(model, val_loader, device)
    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f}")

RuntimeError: Sizes of tensors must match except in dimension 0. Expected size 1557 but got size 2811 for tensor number 1 in the list.

I will have to write a custom collate function and play around a bit more for batch sizes greater than 1 to work. If I have the time I will do it, but for now as most NNs working with geometric data typically work with batch sizes like 2 or 3, I think I can continue with the batch size of 1. A TODO!