In [6]:
#!pip install  -q git+https://github.com/MarcusLoppe/meshgpt-pytorch.git

In [2]:
project_name = 'model_M0001'
working_dir = f'{project_name}'
models_dir = f'{working_dir}/models'
dataset_dir = f'{working_dir}/datasets'
model_name = f"M0000_E11"

encoder_checkpoint = f'{models_dir}/2024-07-23-M0001-encoder-loss_0.170.pt'
transformer_checkpoint = f'{models_dir}/mesh-transformer.ckpt.epoch_0_avg_loss_0.903.pt'
dataset_path = f"{dataset_dir}/objverse_shapenet_modelnet_max_250faces_186M_tokens.npz"

# To extract labels from dataset
#from meshgpt_pytorch import MeshDataset 
#dataset = MeshDataset.load(dataset_path)#, map_location=torch.device('cpu')) 
#labels = list(set(item["texts"] for item in dataset.data))

In [3]:
import torch
import trimesh
import numpy as np
import os
import csv
import json
from collections import OrderedDict

from meshgpt_pytorch import (
    MeshTransformerTrainer,
    MeshAutoencoderTrainer,
    MeshAutoencoder,
    MeshTransformer
)
from meshgpt_pytorch.data import ( 
    derive_face_edges_from_faces
) 

Encoder

In [9]:
# 16k 2 4
autoencoder = MeshAutoencoder(     
    decoder_dims_through_depth =  (128,) * 6 + (192,) * 12 + (256,) * 24 + (384,) * 6,   
    dim_codebook = 192,  
    dim_area_embed = 16,
    dim_coor_embed = 16, 
    dim_normal_embed = 16,
    dim_angle_embed = 8,    
    attn_decoder_depth  = 4,
    attn_encoder_depth = 2
).to("cpu")

total_params = sum(p.numel() for p in autoencoder.parameters()) 
total_params = f"{total_params / 1000000:.1f}M"
print(f"Total parameters: {total_params}")

Total parameters: 50.7M


In [10]:
# 
# Trained on a dataset of 14k models that contains less then 250 faces (objverse + shapenet + Modelnet).
# Reached the loss of 0.399 MSE
pkg = torch.load(encoder_checkpoint) 
autoencoder.load_state_dict(pkg['model'])
for param in autoencoder.parameters():
    param.requires_grad = True

OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 

### Transformer


In [5]:
import gc  
torch.cuda.empty_cache()
gc.collect()   
# max_seq = max(len(d["faces"]) for d in dataset if "faces" in d)  * (autoencoder.num_vertices_per_face * autoencoder.num_quantizers) 
# print("Max token sequence:" , max_seq)  
transformer = MeshTransformer(
    autoencoder,
    dim =768,
    coarse_pre_gateloop_depth = 6,  
    fine_pre_gateloop_depth= 4, 
    attn_depth = 24,  
    attn_heads = 16,
    dropout  = 0.0,
    max_seq_len = 1500,
    condition_on_text = True, 
    gateloop_use_heinsen = False,
    text_condition_model_types = "bge", 
    text_condition_cond_drop_prob = 0.0, 
).to("cpu") 

total_params = sum(p.numel() for p in transformer.decoder.parameters())
total_params = f"{total_params / 1000000:.1f}M"
print(f"Decoder total parameters: {total_params}")

pkg = torch.load(transformer_checkpoint, map_location=torch.device('cpu')) 
transformer.load_state_dict(pkg['model'])

Decoder total parameters: 321.5M


<All keys matched successfully>

## Generate and view mesh

**Using only text**

In [6]:
from meshgpt_pytorch import mesh_render 
from pathlib import Path
import datetime
 
folder = f'{working_dir}/renders'
obj_file_path = Path(folder)
obj_file_path.mkdir(exist_ok = True, parents = True)  

text_coords = [] 
text_coords.append(transformer.generate(texts = [text],  temperature = 0.0))   

current_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
results_filename = f"{current_datetime}_{model_name}_test_results.obj"
mesh_render.save_rendering(f'{folder}/{results_filename}', text_coords)

AssertionError: `text` or `text_embeds` must be passed in if `condition_on_text` is set to True

In [None]:
 
# from meshgpt_pytorch import mesh_render 
# from pathlib import Path
# import datetime
 
# folder = f'{working_dir}/renders'
# obj_file_path = Path(folder)
# obj_file_path.mkdir(exist_ok = True, parents = True)  

# query = [
#     'tv table', 'office table', 'high chair', 'glass table',
#     'designer sloped chair', 'designer chair', 'corner table', 'circle chair', 'bar chair',
#     'shoe', 'cup', 'plate', 'vase', 'person'
# ]
    
# text_coords = [] 
# for text in (query):
#     print(f"Generating {text}") 
#     text_coords.append(transformer.generate(texts = [text],  temperature = 0.0))   

# current_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
# results_filename = f"{current_datetime}_{model_name}_test_results.obj"
# mesh_render.save_rendering(f'{folder}/{results_filename}', text_coords)

**Text + prompt of tokens**

**Prompt with 10% of codes/tokens**

In [None]:
# from pathlib import Path 
# from meshgpt_pytorch import mesh_render 
# folder = working_dir / f'renders/text+codes'
# obj_file_path = Path(folder)
# obj_file_path.mkdir(exist_ok = True, parents = True)  

# token_length_procent = 0.10 
# codes = []
# texts = []
# for label in labels:
#     for item in dataset.data: 
#         if item['texts'] == label:
#             tokens = autoencoder.tokenize(
#                 vertices = item['vertices'],
#                 faces = item['faces'],
#                 face_edges = item['face_edges']
#             ) 
#             num_tokens = int(tokens.shape[0] * token_length_procent)  
#             texts.append(item['texts']) 
#             codes.append(tokens.flatten()[:num_tokens].unsqueeze(0))  
#             break
        
# coords = []  
# for text, prompt in zip(texts, codes): 
#     print(f"Generating {text} with {prompt.shape[1]} tokens") 
#     coords.append(transformer.generate(texts = [text],  prompt = prompt, temperature = 0) )    
      
# mesh_render.save_rendering(f'{folder}/text+prompt_{token_length_procent*100}.obj', coords)

**Prompt with 0% to 80% of tokens**

In [None]:
# from pathlib import Path
# from meshgpt_pytorch import mesh_render 
 
# folder = working_dir / f'renders/text+codes_rows'
# obj_file_path = Path(folder)
# obj_file_path.mkdir(exist_ok = True, parents = True)   

# mesh_rows = []
# for token_length_procent in np.arange(0, 0.8, 0.1):
#     codes = []
#     texts = []
#     for label in labels:
#         for item in dataset.data: 
#             if item['texts'] == label:
#                 tokens = autoencoder.tokenize(
#                     vertices = item['vertices'],
#                     faces = item['faces'],
#                     face_edges = item['face_edges']
#                 ) 
#                 num_tokens = int(tokens.shape[0] * token_length_procent) 
                
#                 texts.append(item['texts']) 
#                 codes.append(tokens.flatten()[:num_tokens].unsqueeze(0))  
#                 break
            
#     coords = []   
#     for text, prompt in zip(texts, codes):  
#         print(f"Generating {text} with {prompt.shape[1]} tokens") 
#         coords.append(transformer.generate(texts = [text],  prompt = prompt, temperature = 0)) 
         
#     mesh_rows.append(coords)  
    
# mesh_render.save_rendering(f'{folder}/all.obj', mesh_rows)
 

**Just some testing for text embedding similarity**

In [None]:
# import numpy as np 
# texts = list(labels)
# vectors = [transformer.conditioner.text_models[0].embed_text([text], return_text_encodings = False).cpu().flatten() for text in texts]
 
# max_label_length = max(len(text) for text in texts)
 
# # Print the table header
# print(f"{'Text':<{max_label_length}} |", end=" ")
# for text in texts:
#     print(f"{text:<{max_label_length}} |", end=" ")
# print()

# # Print the similarity matrix as a table with fixed-length columns
# for i in range(len(texts)):
#     print(f"{texts[i]:<{max_label_length}} |", end=" ")
#     for j in range(len(texts)):
#         # Encode the texts and calculate cosine similarity manually
#         vector_i = vectors[i]
#         vector_j = vectors[j]
        
#         dot_product = torch.sum(vector_i * vector_j)
#         norm_vector1 = torch.norm(vector_i)
#         norm_vector2 = torch.norm(vector_j)
#         similarity_score = dot_product / (norm_vector1 * norm_vector2)
        
#         # Print with fixed-length columns
#         print(f"{similarity_score.item():<{max_label_length}.4f} |", end=" ")
#     print()