In [2]:
import torch

from shap_e.models.download import load_model
from shap_e.util.data_util import load_or_create_multimodal_batch
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, gif_widget

In [2]:
import trimesh
import os
from pathlib import Path
from tqdm import tqdm
def convert_off_to_ply(off_filename, ply_filename):
    # Load the OFF file using Trimesh
    mesh = trimesh.load(off_filename, force='mesh')
    
    # Export the mesh to PLY format
    mesh.export(ply_filename, file_type='ply')

def count_files(directory, extension):
    return sum(1 for _ in Path(directory).rglob(f'*{extension}'))

def process_directory(input_root, output_root):
    input_root_path = Path(input_root)
    output_root_path = Path(output_root)
    
    # Count the total number of OFF files
    total_files = count_files(input_root, '.off')
    
    # Create a tqdm progress bar
    progress = tqdm(total=total_files, desc="Converting OFF to PLY")
    
    # Walk through all subdirectories and files in the input directory
    for subdir, _, files in os.walk(input_root_path):
        for file in files:
            if file.endswith('.off'):
                # Full path to the source OFF file
                full_file_path = Path(subdir) / file
                
                # Construct the corresponding output file path
                output_file_path = output_root_path / full_file_path.relative_to(input_root_path)
                output_file_path = output_file_path.with_suffix('.ply')
                
                # Ensure the output directory exists
                output_file_path.parent.mkdir(parents=True, exist_ok=True)
                
                # Convert and save the OFF file to PLY
                convert_off_to_ply(str(full_file_path), str(output_file_path))
                
                # Update progress bar after each file is processed
                progress.update(1)
    
    # Close the progress bar
    progress.close()

# Usage
process_directory("/home/yiftach/main/Research/shap-e/data/ModelNet40","/home/yiftach/main/Research/shap-e/data/ModelNet40_ply")


Converting OFF to PLY:   3%|▎         | 389/12311 [00:03<01:26, 137.05it/s]Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7f013c71a1d0>>
Traceback (most recent call last):
  File "/home/yiftach/anaconda3/envs/instantmesh/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 770, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 
Converting OFF to PLY:   4%|▍         | 526/12311 [00:15<17:32, 11.20it/s] 

In [2]:
import os
print("PATH:", os.getenv("PATH"))
print("LD_LIBRARY_PATH:", os.getenv("LD_LIBRARY_PATH"))
print("CUDA_HOME:", os.getenv("CUDA_HOME"))


PATH: /home/yiftach/anaconda3/envs/instantmesh/bin:/home/yiftach/.vscode-server/cli/servers/Stable-e170252f762678dec6ca2cc69aba1570769a5d39/server/bin/remote-cli:/usr/local/cuda-12.1/bin:/home/yiftach/anaconda3/envs/instantmesh/bin:/home/yiftach/anaconda3/condabin:/home/yiftach/bin:/home/yiftach/bin:/usr/local/cuda/bin:/home/yiftach/anaconda3/bin:/home/yiftach/anaconda3/condabin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin
LD_LIBRARY_PATH: /usr/local/cuda-12.1/lib64:
CUDA_HOME: None


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
xm = load_model('transmitter', device=device)

/home/yiftach/main/Research/shap-e/shap_e/examples/shap_e_model_cache


  0%|          | 0.00/1.78G [00:00<?, ?iB/s]

None is the location


In [5]:
model_path = "/home/yiftach/main/Research/shap-e/data/ModelNet10/chair/train/chair_0200.ply"

# This may take a few minutes, since it requires rendering the model twice
# in two different modes.
batch = load_or_create_multimodal_batch(
    device,
    model_path=model_path,
    mv_light_mode="basic",
    mv_image_size=256,
    cache_dir="example_data/chair_0082_2",
    verbose=True, # this will show Blender output during renders
)

creating point cloud...
extracting point cloud from multiview...
creating multiview...
Blender 3.4.1 (hash 55485cb379f7 built 2022-12-20 00:46:45)
Read prefs: /home/yiftach/.config/blender/3.4/config/userpref.blend
activated gpu NVIDIA RTX A6000
activated gpu NVIDIA RTX A6000

Successfully imported '/home/yiftach/main/Research/shap-e/data/ModelNet10/chair/train/chair_0200.ply' in 0.007 sec
GPU
Fra:1 Mem:20.01M (Peak 21.14M) | Time:00:00.17 | Syncing chair_0200
Fra:1 Mem:20.14M (Peak 21.14M) | Time:00:00.17 | Syncing Camera
Fra:1 Mem:20.14M (Peak 21.14M) | Time:00:00.17 | Rendering 1 / 1 samples
Fra:1 Mem:19.33M (Peak 28.05M) | Time:00:00.19 | Compositing
Fra:1 Mem:19.34M (Peak 28.05M) | Time:00:00.19 | Compositing | Determining resolution
Fra:1 Mem:19.34M (Peak 28.05M) | Time:00:00.19 | Compositing | Initializing execution
Fra:1 Mem:24.40M (Peak 28.05M) | Time:00:00.21 | Compositing | Tile 1-4
Fra:1 Mem:24.40M (Peak 28.05M) | Time:00:00.21 | Compositing | Tile 2-4
Fra:1 Mem:24.40M (Pea

In [4]:
import os
import random
from tqdm import tqdm

def sample_files_from_directories(base_dir, sample_size=25):
    # Initialize an empty list to store the file paths and their parent category
    sampled_file_paths = []
    
    # Get the list of category directories within the base directory
    categories = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
    print(categories)
    # Function to sample files from a directory
    def sample_files(directory):
        # List all files in the directory
        files = [os.path.join(directory, f) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
        # Sample files
        return random.sample(files, min(sample_size, len(files)))

    # Iterate over each category and sample files
    for category in categories:
        category_dir = os.path.join(base_dir, category,"train")
        # Sample files and create tuples (file_path, category)
        sampled_files = sample_files(category_dir)
        for file_path in sampled_files:
            sampled_file_paths.append((file_path, category))
    
    return sampled_file_paths

# sample_files_from_directories("/home/yiftach/main/Research/shap-e/data/ModelNet10")
category_to_latents = {}
for model_path, category in tqdm(sample_files_from_directories("/home/yiftach/main/Research/shap-e/data/ModelNet10")):
    if category not in category_to_latents:
        category_to_latents[category] = []
        
    batch = load_or_create_multimodal_batch(
        device,
        model_path=model_path,
        mv_light_mode="basic",
        mv_image_size=256,
        cache_dir=f"cache_data/{category}/{os.path.basename(model_path).split('.')[0]}",
        verbose=False,
    )
    latent = xm.encoder.encode_to_bottleneck(batch).cpu().detach().numpy()
    category_to_latents[category].append(latent)

    # break

['bed', 'sofa', 'bathtub', 'chair', 'toilet', 'monitor', 'table', 'desk', 'night_stand', 'dresser']


  0%|          | 0/250 [00:00<?, ?it/s]

  1%|          | 3/250 [05:46<7:59:19, 116.44s/it]

In [None]:
import numpy as np
category_to_latents = {k: np.stack(v).squeeze(axis=1) for k, v in category_to_latents.items()}


In [None]:
all_latents = np.concatenate([v for v in category_to_latents.values()], axis=0)
# if all_latents.is_cuda:
    # data_tensor = all_latents.cpu()
data_numpy = all_latents
tsne = TSNE(n_components=2, random_state=42,perplexity=29)
data_transformed = tsne.fit_transform(data_numpy)


In [None]:
plt.scatter(data_transformed[:, 0], data_transformed[:, 1])

In [None]:
import torch
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

In [None]:
with torch.no_grad():
    latent = xm.encoder.encode_to_bottleneck(batch)

    render_mode = 'nerf' # you can change this to 'nerf'
    size = 512 # recommended that you lower resolution when using nerf

    cameras = create_pan_cameras(size, device)
    images = decode_latent_images(xm, latent, cameras, rendering_mode=render_mode)
    display(gif_widget(images))

In [None]:
with torch.no_grad():
    latent = xm.encoder.encode_to_bottleneck(batch)

    render_mode = 'nerf' # you can change this to 'nerf'
    size = 512 # recommended that you lower resolution when using nerf

    cameras = create_pan_cameras(size, device)
    images = decode_latent_images(xm, latent, cameras, rendering_mode=render_mode)
    display(gif_widget(images))

In [None]:
from transformers import AutoProcessor, Blip2ForConditionalGeneration
import torch

processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
# by default `from_pretrained` loads the weights in float32
# we load in float16 instead to save memory
blip2 = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16) 
blip2 = blip2.to(device)

In [None]:
inputs = processor(images, return_tensors="pt").to(device, torch.float16)

generated_ids = blip2.generate(**inputs, max_new_tokens=20)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
print(generated_text)


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from shap_e.diffusion.sample import sample_latents, sample_latents_noised
# from shap_e.diffusion.gaussian_diffusion import ddim_inversion
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config,GaussianDiffusion
from shap_e.models.download import load_model, load_config
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, gif_widget
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [None]:
model = load_model('text300M', device=device)
diffusion = diffusion_from_config(load_config('diffusion'))

latents_noised = diffusion.ddim_inversion(model=model,latent=latent,clip_denoised=True, model_kwargs=dict(texts=["a table"] * 1))




In [None]:
from tqdm import tqdm

size = 64 # this is the size of the renders; higher values take longer to render.
cameras = create_pan_cameras(size, device)
# latents_noised
im2 = []
for i, l2 in tqdm(enumerate(latents_noised[100:101])):
    images = decode_latent_images(xm, l2, cameras, rendering_mode="nerf")
    display(gif_widget(images))

In [None]:
diffusion = diffusion_from_config(load_config('diffusion'))

batch_size = 4
guidance_scale = 12.5
diffusion.num_timesteps = 100
diffusion.alphas_cumprod = diffusion.alphas_cumprod[:100]
latents_new = sample_latents(
    batch_size=batch_size,
    model=model,
    diffusion=diffusion,
    guidance_scale=guidance_scale,
    model_kwargs=dict(texts=["a red table"] * batch_size),
    progress=True,
    clip_denoised=True,
    use_fp16=True,
    use_karras=True,
    karras_steps=64,
    sigma_min=1e-3,
    sigma_max=45,
    s_churn=0.1,
    noise=latents_noised[100].expand(batch_size, -1),
)

In [None]:
from tqdm import tqdm

size = 64 # this is the size of the renders; higher values take longer to render.
cameras = create_pan_cameras(size, device)
# latents_noised
im2 = []
for i, l2 in tqdm(enumerate(latents_new)):
    images = decode_latent_images(xm, l2, cameras, rendering_mode="nerf")
    display(gif_widget(images))
    