# Setup Model

In [5]:
import torch
import numpy as np
import time
from shap_e.diffusion.sample import sample_latents
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
from shap_e.models.download import load_model, load_config
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, gif_widget, decode_latent_mesh
from shap_e.models.nn.camera import DifferentiableCameraBatch, DifferentiableProjectiveCamera

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
assert(device != 'cuda')

SAVE_LOCATION = 'models/'

xm = load_model('transmitter', device=device)
model = load_model('text300M', device=device)
diffusion = diffusion_from_config(load_config('diffusion'))

def generateLatentFromPrompt(prompt, guidance_scale=15.0):
    latents = sample_latents(
        batch_size=1,
        model=model,
        diffusion=diffusion,
        guidance_scale=guidance_scale,
        model_kwargs=dict(texts=[prompt] * 1),
        progress=True,
        clip_denoised=True,
        use_fp16=True,
        use_karras=True,
        karras_steps=128,
        sigma_min=1e-3,
        sigma_max=160,
        s_churn=0,
    )
    
    return latents[0]

def exportLatentToObj(latent, name):
    t = decode_latent_mesh(xm, latent).tri_mesh()
    with open(f'{SAVE_LOCATION}{name}.obj', 'w') as f:
        t.write_obj(f)

# Usage

In [2]:
latent = generateLatentFromPrompt('A penguin made of legos')

  0%|          | 0/64 [00:00<?, ?it/s]

In [3]:
def create_single_pan_camera(
    size: int,
    device: torch.device,
    theta: float) -> DifferentiableCameraBatch:
    
    # Convert theta from degrees to radians for trigonometric functions
    theta_rad = np.radians(theta)
    
    # Calculate the camera's direction (z)
    z = np.array([np.sin(theta_rad), np.cos(theta_rad), -0.5])
    z /= np.linalg.norm(z)
    
    # Compute the camera's origin
    origin = -z * 4
    
    # Compute the right (x) and up (y) vectors
    x = np.array([np.cos(theta_rad), -np.sin(theta_rad), 0.0])
    y = np.cross(z, x)
    
    # Reshape vectors to be 2D arrays with the second dimension of size 3
    origin = origin.reshape(1, 3)
    x = x.reshape(1, 3)
    y = y.reshape(1, 3)
    z = z.reshape(1, 3)
    
    # Create the DifferentiableCameraBatch object
    return DifferentiableCameraBatch(
        shape=(1, 1),
        flat_camera=DifferentiableProjectiveCamera(
            origin=torch.from_numpy(origin).float().to(device),
            x=torch.from_numpy(x).float().to(device),
            y=torch.from_numpy(y).float().to(device),
            z=torch.from_numpy(z).float().to(device),
            width=size,
            height=size,
            x_fov=0.7,
            y_fov=0.7,
        ),
    )

In [7]:
now = time.time()
cameras = create_single_pan_camera(512, device, 15)
images = decode_latent_images(xm, latent, cameras, rendering_mode='stf')
display(gif_widget(images))
print('Completed in,', time.time() - now)

HTML(value='<img src="data:image/gif;base64,R0lGODlhAAEAAYcAANvc3NjY19nX09bX2NbX19XX19XW19XW1tTW1tTV1tTV1dTV1N…

Completed in, 13.867968559265137


In [21]:
now = time.time()
t = decode_latent_mesh(xm, latent).tri_mesh()
#with open(SAVE_LOCATION + 'timing2.obj', 'wb') as f:
#    t.write_obj(f)
print('Completed in,', time.time() - now)

TypeError: a bytes-like object is required, not 'str'

In [17]:
print(latent.shape)

torch.Size([1048576])


In [6]:
exportLatentToObj(latent, 'legop')