# Shape-E Text-to-3D & Image-to-3D

Generate 3D assets with a text prompt or an image.

In [None]:
# Install required libraries
!pip install -q diffusers transformers accelerate trimesh gradio

In [None]:
# Import modules
# import spaces
import gradio as gr
import numpy as np
import PIL.Image
import random
import tempfile
import torch
import trimesh
from diffusers import ShapEPipeline, ShapEImg2ImgPipeline
from diffusers.utils import export_to_ply

In [None]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
print(f"device: {device}")

In [None]:
# Build text-to-3D pipeline using Diffusers library
pipe_text3d = ShapEPipeline.from_pretrained(
    "openai/shap-e",
    torch_dtype=torch.float16,
    variant="fp16"
)

In [None]:
# Build image-to-3D pipeline using Diffusers library
pipe_image3d = ShapEImg2ImgPipeline.from_pretrained(
    "openai/shap-e-img2img",
    torch_dtype=torch.float16,
    variant="fp16"
)

In [None]:
pipe_text3d = pipe_text3d.to(device)
pipe_image3d = pipe_image3d.to(device)

## Text-to-3D

In [None]:
guidance_scale = 15.0
prompt = ["A firecracker", "A birthday cupcake"]

In [None]:
images = pipe_text3d(
    prompt,
    guidance_scale=guidance_scale,
    num_inference_steps=64,
    frame_size=256
).images

In [None]:
from diffusers.utils import export_to_gif

export_to_gif(images[0], "firecracker_3d.gif")
export_to_gif(images[1], "cake_3d.gif")

## Image-to-3D

### Generate new image

In [None]:
# Load Kandinsky 3 model pipeline
from diffusers import AutoPipelineForText2Image

pipe = AutoPipelineForText2Image.from_pretrained(
    "kandinsky-community/kandinsky-3",
    torch_dtype=torch.float16,
    variant="fp16"
)
pipe.enable_model_cpu_offload()

In [None]:
prompt = "A cheeseburger, white background"

In [None]:
generator = torch.Generator(device=device).manual_seed(0)
image = pipe(prompt, num_inference_steps=25, generator=generator).images[0]

In [None]:
image.save("burger.png")

### Pass image to Image-to-Image Pipeline

In [None]:
guidance_scale = 3.0
image = Image.open("burger.png").resize((256, 256))

In [None]:
images = pipe_image3d(
    image,
    guidance_scale=guidance_scale,
    num_inference_steps=64,
    frame_size=256,
).images

In [None]:
gif_path = export_to_gif(images[0], "burger_3d.gif")

## Gradio Application

In [None]:
MAX_SEED = np.iinfo(np.int32).max

In [None]:
def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
    """
    Generates a random integer value in the "half-open" interval [low, high).
    """
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    return seed

In [None]:
# @spaces.GPU
def ply_to_glb(ply_path: str) -> str:
    """
    Converts a ply file to a glb file.
    """
    mesh = trimesh.load(ply_path)
    rot = trimesh.transformations.rotation_matrix(-np.pi / 2, [1, 0, 0])
    mesh = mesh.apply_transform(rot)
    rot = trimesh.transformations.rotation_matrix(np.pi, [0, 1, 0])
    mesh = mesh.apply_transform(rot)
    mesh_path = tempfile.NamedTemporaryFile(suffix=".glb", delete=False)
    mesh.export(mesh_path.name, file_type="glb")

    return mesh_path.name

In [None]:
# @spaces.GPU
def text_to_3d(prompt: str, seed: int = 0, guidance_scale: float = 15.0, num_steps: int = 64) -> str:
    """
    Generates 3D assets with a text prompt.
    """
    generator = torch.Generator(device=device).manual_seed(seed)
    images = pipe_text3d(
        prompt,
        generator=generator,
        guidance_scale=guidance_scale,
        num_inference_steps=num_steps,
        # frame_size=256,
        output_type="mesh"
    ).images

    ply_path = tempfile.NamedTemporaryFile(suffix=".ply", delete=False, mode="w+b")
    export_to_ply(images[0], ply_path.name)

    return ply_to_glb(ply_path.name)

In [None]:
# @spaces.GPU
def image_to_3d(
    image: PIL.Image.Image, seed: int = 0, guidance_scale: float = 3.0, num_steps: int = 64
    ) -> str:
    """
    Generates 3D assets with an image.
    """
    generator = torch.Generator(device=device).manual_seed(seed)
    images = pipe_image3d(
        image,
        generator=generator,
        guidance_scale=guidance_scale,
        num_inference_steps=num_steps,
        # frame_size=256,
        output_type="mesh"
    ).images

    ply_path = tempfile.NamedTemporaryFile(suffix=".ply", delete=False, mode="w+b")
    export_to_ply(images[0], ply_path.name)

    return ply_to_glb(ply_path.name)

In [None]:
# Set examples for text-to-3D
examples = [
    "A bird",
    "A shark",
    "A bowl of vegetables",
    "A firecracker",
    "A birthday cupcake"
]

In [None]:
# Define Gradio application
with gr.Blocks() as demo:
    gr.Markdown("# Shap-E Text-to-3D & Image-to-3D")
    with gr.Tabs():
        with gr.Tab(label="Text-to-3D"):
            with gr.Group():
                with gr.Row():
                    prompt = gr.Text(
                        label="Prompt",
                        show_label=False,
                        max_lines=1,
                        placeholder="Enter prompt",
                        container=False
                    )
                    run_button = gr.Button("Run", scale=0)
                result = gr.Model3D(
                    label="Result",
                    show_label=False
                )
                with gr.Accordion(label="Options", open=False):
                    seed = gr.Slider(
                        label="Seed",
                        minimum=0,
                        maximum=MAX_SEED,
                        step=1,
                        value=0
                    )
                    randomize_seed = gr.Checkbox(
                        label="Randomize seed",
                        value=True
                    )
                    guidance_scale = gr.Slider(
                        label="Guidance scale",
                        minimum=1,
                        maximum=20,
                        step=0.1,
                        value=15.0
                    )
                    num_inference_steps = gr.Slider(
                        label="Number of inference steps",
                        minimum=2,
                        maximum=100,
                        step=1,
                        value=64
                    )

            gr.Examples(
                examples=examples,
                inputs=prompt,
                outputs=result,
                fn=text_to_3d
            )

            gr.on(
                triggers=[prompt.submit, run_button.click],
                fn=randomize_seed_fn,
                inputs=[seed, randomize_seed],
                outputs=seed,
                api_name=False,
                concurrency_limit=None
            ).then(
                fn=text_to_3d,
                inputs=[
                    prompt,
                    seed,
                    guidance_scale,
                    num_inference_steps
                ],
                outputs=result,
                api_name="text-to-3d",
                concurrency_id="gpu",
                concurrency_limit=1
            )
        # with gr.Tab(label="Image-to-3D"):
        #     with gr.Group():
        #         image = gr.Image(
        #             label="Input image",
        #             show_label=False,
        #             type="pil"
        #         )
        #         run_button = gr.Button(value="Run")
        #         result = gr.Model3D(
        #             label="Result",
        #             show_label=False
        #         )
        #         with gr.Accordion(label="Options", open=False):
        #             seed = gr.Slider(
        #                 label="Seed",
        #                 minimum=0,
        #                 maximum=MAX_SEED,
        #                 step=1,
        #                 value=0
        #             )
        #             randomize_seed = gr.Checkbox(
        #                 label="Randomize seed",
        #                 value=True
        #             )
        #             guidance_scale = gr.Slider(
        #                 label="Guidance scale",
        #                 minimum=1,
        #                 maximum=20,
        #                 step=0.1,
        #                 value=3.0
        #             )
        #             num_inference_steps = gr.Slider(
        #                 label="Number of inference steps",
        #                 minimum=2,
        #                 maximum=100,
        #                 step=1,
        #                 value=64
        #             )

        #     gr.Examples(
        #         examples=examples_images,
        #         inputs=image,
        #         outputs=result,
        #         fn=image_to_3d
        #     )

        #     run_button.click(
        #         fn=randomize_seed_fn,
        #         inputs=[seed, randomize_seed],
        #         outputs=seed,
        #         api_name=False,
        #         concurrency_limit=None
        #     ).then(
        #         fn=image_to_3d,
        #         inputs=[
        #             image,
        #             seed,
        #             guidance_scale,
        #             num_inference_steps
        #         ],
        #         outputs=result,
        #         api_name="image-to-3d",
        #         concurrency_id="gpu",
        #         concurrency_limit=1
        #     )

In [None]:
# Set queue with default settings
demo.queue()

In [None]:
# Start Gradio application
demo.launch(debug=True, share=False)

In [None]:
# Close Gradio application
demo.close()