# Setup Model

In [3]:
import torch
import numpy as np
import time
import PIL
from shap_e.diffusion.sample import sample_latents
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
from shap_e.models.download import load_model, load_config
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, gif_widget, decode_latent_mesh
from shap_e.models.nn.camera import DifferentiableCameraBatch, DifferentiableProjectiveCamera
from shap_e.util.image_util import load_image

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
assert(device != 'cuda')

SAVE_LOCATION = 'models/'

xm = load_model('transmitter', device=device)
imageModel = load_model('image300M', device=device)
textModel = load_model('text300M', device=device)
diffusion = diffusion_from_config(load_config('diffusion'))

def generateLatentImage(image, guidance_scale=3.0):
    return generateLatent(dict(images=[image] * 1), imageModel, guidance_scale)

def generateLatentPrompt(prompt, guidance_scale=15.0):
    return generateLatent(dict(texts=[prompt] * 1), textModel, guidance_scale)

def generateLatent(args, model, guidance_scale):
    latents = sample_latents(
        batch_size=1,
        model=model,
        diffusion=diffusion,
        guidance_scale=guidance_scale,
        model_kwargs=args,
        progress=True,
        clip_denoised=True,
        use_fp16=True,
        use_karras=True,
        karras_steps=64,
        sigma_min=1e-3,
        sigma_max=160,
        s_churn=0,
    )
    
    return latents[0]

def exportLatentToObj(latent, name):
    t = decode_latent_mesh(xm, latent).tri_mesh()
    with open(f'{SAVE_LOCATION}{name}.obj', 'w') as f:
        t.write_obj(f)

In [6]:
def create_single_pan_camera(
    size: int,
    device: torch.device,
    theta: float) -> DifferentiableCameraBatch:
    
    # Convert theta from degrees to radians for trigonometric functions
    theta_rad = np.radians(theta)
    
    # Calculate the camera's direction (z)
    z = np.array([np.sin(theta_rad), np.cos(theta_rad), -0.5])
    z /= np.linalg.norm(z)
    
    # Compute the camera's origin
    origin = -z * 4
    
    # Compute the right (x) and up (y) vectors
    x = np.array([np.cos(theta_rad), -np.sin(theta_rad), 0.0])
    y = np.cross(z, x)
    
    # Reshape vectors to be 2D arrays with the second dimension of size 3
    origin = origin.reshape(1, 3)
    x = x.reshape(1, 3)
    y = y.reshape(1, 3)
    z = z.reshape(1, 3)
    
    # Create the DifferentiableCameraBatch object
    return DifferentiableCameraBatch(
        shape=(1, 1),
        flat_camera=DifferentiableProjectiveCamera(
            origin=torch.from_numpy(origin).float().to(device),
            x=torch.from_numpy(x).float().to(device),
            y=torch.from_numpy(y).float().to(device),
            z=torch.from_numpy(z).float().to(device),
            width=size,
            height=size,
            x_fov=0.7,
            y_fov=0.7,
        ),
    )

# Usage

## Text To 3D

In [14]:
title = 'An airplane'
latent = generateLatentPrompt(title, 3)

  0%|          | 0/64 [00:00<?, ?it/s]

## Image To 3D

In [19]:
title = 'An orange airplane - reconstructed'
image = load_image("Images/an airplane.png")
latent = generateLatentImage(image)

  0%|          | 0/64 [00:00<?, ?it/s]

## Generate and Save Model

In [20]:
cameras = create_single_pan_camera(128, device, 60)
images = decode_latent_images(xm, latent, cameras, rendering_mode='stf')
display(gif_widget(images))
images[0].save('2DSlice/' + title + '.png')
print(f'Saved 2D Slice To: 2DSlice/{title}')

exportLatentToObj(latent, title)
print(f'Saved 3D Model To: {SAVE_LOCATION}{title}')

HTML(value='<img src="data:image/gif;base64,R0lGODlhgACAAIcAAOexb+KqauioZd2vbdioauKlZdmlZ9akZNOkZsylaeWgXt2hZN…

Saved 2D Slice To: 2DSlice/An orange airplane - reconstructed
Saved 3D Model To: models/An orange airplane - reconstructed
