In [1]:
import os
os.chdir('D:/mesh-and-bones-to-rig')

In [2]:
import os
import trimesh
import numpy as np
import matplotlib.pyplot as plt

import ipywidgets as widgets
from IPython.display import display

import torch
import torch.nn as nn
import torch.nn.init as init
from torch_geometric.loader import DataLoader
from torch.utils.tensorboard import SummaryWriter

In [4]:
from data.dataset import parse_rig_info
from data.dataset import MeshBonesToRigDataset
from src.models.mesh_bones_to_rig import MeshBonesToRigNet
from noteboorks.training_utils import train_epoch, validate_epoch, test_model, train_model
from noteboorks.visualization_utils import display_view


In [5]:
# Assume the dataset preprocessed files are stored in this folder.
root_dir = "./data/model_data/ModelResource_MeshAndBonesToRig_preproccessed_reduced_subset/"
cache_dir = os.path.join(root_dir, "precomputed")

# All hyperparameters at once
hyperparams = {
    # Model Architecture
    'mesh_encoder_in_channels': 3,
    'mesh_encoder_hidden_channels': 128,
    'mesh_encoder_out_channels': 256,
    'mesh_encoder_kernel_size': 5,
    'mesh_encoder_num_layers': 3,
    'mesh_encoder_dim': 1,
    'bone_encoder_in_channels': 8,
    'bone_encoder_hidden_channels': 64,
    'bone_encoder_out_channels': 64,
    'bone_encoder_num_layers': 2,
    'fusion_common_dim': 128,
    'fusion_top_k': 4, # Ususally at most 4 bones influence a vertex
    'fusion_alpha': 1.0, # Learnable, we are giving it a default value
    'fusion_alpha_learnable': True,
    'refinement_gamma': 0.5,
    'with_refinement': False,

    # Training Settings
    'mesh_name': "All training dataset apart from ",
    'num_epochs': 100, # I want to see how it performs after 500 epochs.
    'batch_size': 1, # Set batch size to 1 as the problem with staking multiple Data objects with tensors of variable dimensions is still not resolved.
    'learning_rate': 1e-4,  # Set learning rate as needed (in the Rignet paper they used this learning rate).
    'optimizer': 'Adam',
    'initialization': 'xavier_uniform_', # I wanted to try out a different initialization scheme.

    # Dataset Settings
    'k_neighbors': 8, # Up to k nearest neighbours

    # Loss Settings
    'lambda_skin': 1.0,
    'lambda_geo': 1.0,
    'lambda_geo_alpha': 1.0,
    'lambda_smooth': 1.0
}

In [6]:
with open(root_dir + "train_final.txt", 'r') as f:
    train_names = [line.strip() for line in f.readlines()]
with open(root_dir + "val_final.txt", 'r') as f:
    val_names = [line.strip() for line in f.readlines()]
with open(root_dir + "test_final.txt", 'r') as f:
    test_names = [line.strip() for line in f.readlines()]

In [7]:
train_names.remove("1112") # Ignore that mesh... we have no normals for it... (see notebook 1)

We haven't the time to train on a lot of meshes so maybe on 99 will be okay? So 9 meshes for validation and testing also if we want to be proportianate.

In [8]:
# Create datasets by filtering using allowed_names.
train_dataset = MeshBonesToRigDataset(root_dir=root_dir, k=hyperparams["k_neighbors"], allowed_names=train_names)
val_dataset   = MeshBonesToRigDataset(root_dir=root_dir, k=hyperparams["k_neighbors"], allowed_names=val_names)
test_dataset  = MeshBonesToRigDataset(root_dir=root_dir, k=hyperparams["k_neighbors"], allowed_names=test_names)

In [9]:
# Create data loaders.
train_loader = DataLoader(train_dataset, batch_size=hyperparams["batch_size"], shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=hyperparams["batch_size"], shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=hyperparams["batch_size"], shuffle=False, num_workers=4)

In [10]:
# Choose gpu if it is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [11]:
device

device(type='cuda')

The training is very bad as only batches of 1 mesh can be used (for now), the network itself is not complex and I am pretty sure we can make the Dataloader more efficient. A lot of TODOs.

In [12]:
model = None
model = train_model(model, train_dataset, hyperparams, "runs/big_training", val_dataset, save_checkpoint=True, checkpoint_dir="checkpoints")

Parameter containing:
tensor(1., device='cuda:0', requires_grad=True)
Loss Skin: 2.812909, Loss Geo: 12.322889, Loss Smooth: 0.000603, Total loss: 15.136402
Parameter containing:
tensor(1.0001, device='cuda:0', requires_grad=True)
Loss Skin: 3.471351, Loss Geo: 13.028409, Loss Smooth: 0.000388, Total loss: 16.500149
Parameter containing:
tensor(1.0002, device='cuda:0', requires_grad=True)
Loss Skin: 3.534492, Loss Geo: 13.110827, Loss Smooth: 0.000184, Total loss: 16.645504
Parameter containing:
tensor(1.0003, device='cuda:0', requires_grad=True)
Loss Skin: 5.258833, Loss Geo: 8.654401, Loss Smooth: 0.000701, Total loss: 13.913937
Parameter containing:
tensor(1.0004, device='cuda:0', requires_grad=True)
Loss Skin: 1.450401, Loss Geo: 13.010146, Loss Smooth: 0.000240, Total loss: 14.460788
Parameter containing:
tensor(1.0005, device='cuda:0', requires_grad=True)
Loss Skin: 4.102818, Loss Geo: 12.232716, Loss Smooth: 0.000586, Total loss: 16.336119
Parameter containing:
tensor(1.0006, de

In [13]:
%load_ext tensorboard

In [14]:
%tensorboard --logdir runs

Reusing TensorBoard on port 6006 (pid 9876), started 23:28:17 ago. (Use '!kill 9876' to kill it.)

Let's visualize the best model's performance on one of the meshes in the test set and also see how it performs on it.

In [12]:
best_model_path = os.path.join("checkpoints", "best_model.pth")
checkpoint = torch.load(best_model_path)
best_model = MeshBonesToRigNet(hyperparams["mesh_encoder_in_channels"],
                                hyperparams["mesh_encoder_hidden_channels"],
                                hyperparams["mesh_encoder_out_channels"],
                                hyperparams["mesh_encoder_kernel_size"],
                                hyperparams["mesh_encoder_num_layers"],
                                hyperparams["mesh_encoder_dim"],
                                hyperparams["bone_encoder_in_channels"],
                                hyperparams["bone_encoder_hidden_channels"],
                                hyperparams["bone_encoder_out_channels"],
                                hyperparams["bone_encoder_num_layers"],
                                hyperparams["fusion_common_dim"],
                                hyperparams["fusion_top_k"],
                                hyperparams["fusion_alpha"],
                                hyperparams["fusion_alpha_learnable"],
                                hyperparams["refinement_gamma"],
                                hyperparams["with_refinement"])
best_model.load_state_dict(checkpoint["model_state_dict"])
best_model.to(device)
best_model.eval()

MeshBonesToRigNet(
  (mesh_encoder): MeshEncoder(
    (convs): ModuleList(
      (0): SplineConv(3, 128, dim=1)
      (1): SplineConv(128, 128, dim=1)
      (2): SplineConv(128, 256, dim=1)
    )
    (activation): ReLU()
  )
  (bone_encoder): BoneEncoder(
    (convs): ModuleList(
      (0): GCNConv(8, 64)
      (1): GCNConv(64, 64)
    )
    (activation): ReLU()
  )
  (fusion): FusionModule(
    (proj_vertex): Linear(in_features=256, out_features=128, bias=True)
    (proj_bone): Linear(in_features=64, out_features=128, bias=True)
  )
  (refinement): RefinementModule()
)

In [13]:
# Testing.
test_loss = test_model(best_model, test_loader, device, hyperparams["lambda_skin"], hyperparams["lambda_geo"], hyperparams["lambda_smooth"], hyperparams["lambda_geo_alpha"])
print(f"Test Loss: {test_loss:.4f}")

Parameter containing:
tensor(1.0148, device='cuda:0', requires_grad=True)
Loss Skin: 1.317057, Loss Geo: 11.153289, Loss Smooth: 0.000190, Total loss: 12.470536
Parameter containing:
tensor(1.0148, device='cuda:0', requires_grad=True)
Loss Skin: 1.287701, Loss Geo: 11.807454, Loss Smooth: 0.000195, Total loss: 13.095349
Parameter containing:
tensor(1.0148, device='cuda:0', requires_grad=True)
Loss Skin: 0.967564, Loss Geo: 11.472464, Loss Smooth: 0.000210, Total loss: 12.440238
Parameter containing:
tensor(1.0148, device='cuda:0', requires_grad=True)
Loss Skin: 1.896680, Loss Geo: 11.937403, Loss Smooth: 0.000235, Total loss: 13.834317
Parameter containing:
tensor(1.0148, device='cuda:0', requires_grad=True)
Loss Skin: 1.364156, Loss Geo: 12.432274, Loss Smooth: 0.000137, Total loss: 13.796567
Parameter containing:
tensor(1.0148, device='cuda:0', requires_grad=True)
Loss Skin: 1.558984, Loss Geo: 12.490712, Loss Smooth: 0.000157, Total loss: 14.049852
Parameter containing:
tensor(1.014

In [26]:
train_dataset_one_mesh = MeshBonesToRigDataset(root_dir=root_dir, cache_dir=cache_dir, k=8, allowed_names=[test_names[0]])
data_mesh = train_dataset_one_mesh.__getitem__(0)
skinning_weights = data_mesh["target_skin_weights"]
skinning_weights_np = skinning_weights.cpu().numpy()

In [27]:
skinning_weights_np

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [28]:
mesh = trimesh.load(os.path.join(root_dir, "obj_remesh", f"{test_names[0]}.obj"))
mesh.show()

In [29]:
data_mesh.to(device)

Data(vertices=[2312, 3], edge_index_geodesic=[2, 18496], edge_attr_geodesic=[18496, 1], vertex_neighbors=[2312, 8], vertex_adj=[2312, 2312], vertex_normals=[2312, 3], bone_positions=[22, 3], bone_features=[22, 8], bone_adj=[22, 22], target_skin_weights=[2312, 22], volumetric_geodesic=[2312, 22], surface_geodesic=[2312, 2312], num_nodes=2312)

In [30]:
predicted_skinning_weights = best_model(data_mesh["vertices"],
                                    data_mesh["edge_index_geodesic"],
                                    data_mesh["edge_attr_geodesic"],
                                    data_mesh["vertex_neighbors"],
                                    data_mesh["vertex_adj"],
                                    data_mesh["vertex_normals"],
                                    data_mesh["bone_features"],
                                    data_mesh["bone_adj"],
                                    data_mesh["volumetric_geodesic"],
                                    data_mesh["surface_geodesic"])
predicted_skinning_weights_np = predicted_skinning_weights.cpu().detach().numpy()

Parameter containing:
tensor(1.0148, device='cuda:0', requires_grad=True)


In [31]:
bone_positions_np, root_joint, bone_hierarchy, skin_weights_dict, bone_names = parse_rig_info(os.path.join(root_dir, "rig_info_remesh", f"{test_names[0]}.txt")) # I need only the bone names in reallity for the dropdown.
bone_options = [(name, i) for i, name in enumerate(bone_names)]

In [32]:
dropdown = widgets.Dropdown(
    options=bone_options,
    value=0,  # default selected integer value
    description='Bone Name:'
)

In [33]:
display_view(mesh, skinning_weights_np, predicted_skinning_weights_np, dropdown, side_view=widgets.fixed(False))

HBox(children=(HBox(children=(Dropdown(description='Bone Name:', options=(('Head', 0), ('Jaw', 1), ('LArm', 2)…

I will say that in general teh network as is doesn't have the capacity for now to learn a lot of the meshes. It is fairly simple and the dataset is not very large. Hyperparameters are not tuned and it is still a lot of work to do.
It will be a better approach  to go through the dataset and categorize the meshes by their topology. Say, humans to be one category, dinosaurs another and train the small networks of these sorts for one specific category. This task in my opinion will be quite difficult to generalize with the current state of the architecture.

I am happy with the results so far and I hope to continue working on this project.