# Parameters

The structure of the shared folder must be as follows:

```
shared
│
├── original_imgs --- 360 deg images of the original mesh
│   └── fish.obj
│       ├── 0.png
│       ├── 1.png
│       └── ...
│
├── original_meshes --- The original .obj files produced by Shap-E
│   └── fish.obj
│
├── edited_imgs --- 360 deg images after being passed through Pix2Pix
│   └── fish.obj - make it metallic
│       ├── 0.png
│       ├── 1.png
│       └── ...
│
├── edited_meshes --- The reconstructed .obj files from the Pix2Pix edited_imgs
│   └── fish.obj
│
└── alternate_meshes --- Alternative to running through Pix2Pix by re-prompting Shap-E
    └── fish.obj
```

In [24]:
SHARED_FOLDER = r'C:\Users\Almog\Dev\shap-e\project\shared'

# INSTRUCTIONS = {
#     "A houseplant": "make the plant metallic",
#     "An apple": "turn the apple into a crystal",
#     "A coffee mug": "turn the mug into a bowl",
#     "A bicycle": "make the bicycle wooden",
#     "An office chair": "turn the office chair into a velvet style",
#     "A notebook": "turn the notebook into leather",
#     "An umbrella": "make umbrella minecraft style",
#     "A wristwatch": "make the watch gold",
#     "A pair of shoes": "make the shoes rubber",
#     "A toothbrush": "make the toothbrush into bamboo",
#     "A teapot": "make the teapot out of silver",
#     "A backpack": "turn the backpack into denim",
#     "A smartphone": "make the smartphone plastic",
#     "A desk lamp": "add brass to the desk lamp",
#     "A water bottle": "make the water bottle out of legos"
# }

INSTRUCTIONS = {
    "a cheeseburger": "Make the bun out of chocolate",
    "an apple": "Transform the apple into a green delicious apple",
    "a traffic cone": "Give the traffic cone a futuristic, neon-light design",
    "a pumpkin": "Carve a spooky face on the pumpkin",
    "a banana": "Add chocolate syrup",
    "a water bottle": "Give the water bottle a metallic finish",
    "a golden retriever dog": "Turn the dog into a husky breed",
    "a birthday cake": "Add a lot of birthday candles to the brithday cake",
    "a red couch": "Make the couch into lego",
    "a rubber duck": "Make the rubber duck minecraft style"
}

INSTRUCTIONS = {
   "an apple": "Make the apple plaid",
}

PROMPTS = []
EDITED_INSTRUCTIONS = []

for prompt, instruction in INSTRUCTIONS.items():
    PROMPTS.append(prompt)
    EDITED_INSTRUCTIONS.append(instruction)

COMBINED_PROMPTS = [
    "A chocolate and candy cheeseburger",
    "A green delicious apple",
    "A futuristic, neon-light designed traffic cone",
    "A pumpkin with a spooky carved face",
    "A banana with whipped cream and chocolate syrup",
    "A water bottle with a metallic finish",
    "A husky breed dog",
    "A birthday cake with birthday candles",
    "A red couch with cushions on top",
    "A Minecraft-style rubber duck",
]

GUIDANCE_SCALE = 15.0
TRIAL = False

import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
assert(device != 'cuda')

# Generate Models

## Setup

In [2]:
import torch
import os
from shap_e.diffusion.sample import sample_latents
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
from shap_e.models.download import load_model, load_config
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, gif_widget, decode_latent_mesh

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
assert(device != 'cuda')

In [3]:
xmModel = load_model('transmitter', device=device)
textModel = load_model('text300M', device=device)
diffusionModel = diffusion_from_config(load_config('diffusion'))

## Generate and Save .OBJ Files

In [28]:
folder = os.path.join(SHARED_FOLDER, 'original_meshes')

latents = []
for prompt in PROMPTS:
    
    # Generate a text-to-3d latent representations
    latent = sample_latents(
        batch_size=1,
        model=textModel,
        diffusion=diffusionModel,
        guidance_scale=GUIDANCE_SCALE,
        model_kwargs=dict(texts=[prompt]),
        progress=True,
        clip_denoised=True,
        use_fp16=True,
        use_karras=True,
        karras_steps=128, # Originally 64
        sigma_min=1e-3,
        sigma_max=160,
        s_churn=0,
    )[0]
    latents.append(latent)
    # mesh = decode_latent_mesh(xmModel, latent).tri_mesh()
    # try:
    #     with open(os.path.join(folder, f'{prompt}.obj'), 'w') as f:
    #         mesh.write_obj(f)
    #         print(f'Successfully saved "{prompt}.obj".')
    # except IOError as e:
    #     print(f'Failed to save "{prompt}.obj".')
        
    if TRIAL:
        break

  0%|          | 0/128 [00:00<?, ?it/s]

  0%|          | 0/128 [00:00<?, ?it/s]

  0%|          | 0/128 [00:00<?, ?it/s]

  0%|          | 0/128 [00:00<?, ?it/s]

  0%|          | 0/128 [00:00<?, ?it/s]

  0%|          | 0/128 [00:00<?, ?it/s]

  0%|          | 0/128 [00:00<?, ?it/s]

  0%|          | 0/128 [00:00<?, ?it/s]

  0%|          | 0/128 [00:00<?, ?it/s]

  0%|          | 0/128 [00:00<?, ?it/s]

# Generate 1 View from Latent

## Setup

In [12]:
from shap_e.models.nn.camera import DifferentiableCameraBatch, DifferentiableProjectiveCamera
import numpy as np

In [29]:
def create_single_pan_camera(
    size: int,
    device: torch.device,
    theta: float) -> DifferentiableCameraBatch:
    
    # Convert theta from degrees to radians for trigonometric functions
    theta_rad = np.radians(theta)
    
    # Calculate the camera's direction (z)
    z = np.array([np.sin(theta_rad), np.cos(theta_rad), -0.5])
    z /= np.linalg.norm(z)
    
    # Compute the camera's origin
    origin = -z * 4
    
    # Compute the right (x) and up (y) vectors
    x = np.array([np.cos(theta_rad), -np.sin(theta_rad), 0.0])
    y = np.cross(z, x)
    
    # Reshape vectors to be 2D arrays with the second dimension of size 3
    origin = origin.reshape(1, 3)
    x = x.reshape(1, 3)
    y = y.reshape(1, 3)
    z = z.reshape(1, 3)
    
    # Create the DifferentiableCameraBatch object
    return DifferentiableCameraBatch(
        shape=(1, 1),
        flat_camera=DifferentiableProjectiveCamera(
            origin=torch.from_numpy(origin).float().to(device),
            x=torch.from_numpy(x).float().to(device),
            y=torch.from_numpy(y).float().to(device),
            z=torch.from_numpy(z).float().to(device),
            width=size,
            height=size,
            x_fov=0.7,
            y_fov=0.7,
        ),
    )

## Generate

In [30]:
folder = os.path.join(SHARED_FOLDER, 'original_imgs_512')

for prompt, latent in zip(PROMPTS, latents):
    cameras = create_single_pan_camera(512, device, 15)
    imgs = decode_latent_images(xmModel, latent, cameras, rendering_mode='stf')
    
    for i, img in enumerate(imgs):
        # Make the folder for each prompt
        if not os.path.isdir(os.path.join(folder, f'{prompt}.obj')):
            os.mkdir(os.path.join(folder, f'{prompt}.obj'))
            
        img.save(os.path.join(folder, f'{prompt}.obj\{i}.png'))
        print(f'Saved {prompt}.obj\{i}.png')
        
    if TRIAL:
        break

Saved a cheeseburger.obj\0.png
Saved an apple.obj\0.png
Saved a traffic cone.obj\0.png
Saved a pumpkin.obj\0.png
Saved a banana.obj\0.png
Saved a water bottle.obj\0.png
Saved a golden retriever dog.obj\0.png
Saved a birthday cake.obj\0.png
Saved a red couch.obj\0.png
Saved a rubber duck.obj\0.png


# Generate 360 Views

## Setup

In [8]:
import Viewer3D.Viewer3D as Viewer3D
import os

## Generate and Save .PNG Images

In [9]:
folder = os.path.join(SHARED_FOLDER, 'original_imgs')

for prompt in PROMPTS:
    mesh_folder = os.path.join(SHARED_FOLDER, 'original_meshes')
    mesh_filename = os.path.join(mesh_folder, f'{prompt}.obj')
    imgs = Viewer3D.generateImagesTriMesh(mesh_filename)
    
    for i, img in enumerate(imgs):
        # Make the folder for each prompt
        if not os.path.isdir(os.path.join(folder, f'{prompt}.obj')):
            os.mkdir(os.path.join(folder, f'{prompt}.obj'))
            
        img.save(os.path.join(folder, f'{prompt}.obj\{i}.png'))
        
        if TRIAL:
            display(img.convert("RGB"))
    if TRIAL:
        break

# Generate Meshes Directly w/ Edit Instructions

Make make sure to run the setup in the original model

In [18]:
folder = os.path.join(SHARED_FOLDER, 'alternate_meshes')
for prompt in COMBINED_PROMPTS:
    
    # Generate a text-to-3d latent representations
    latent = sample_latents(
        batch_size=1,
        model=textModel,
        diffusion=diffusionModel,
        guidance_scale=GUIDANCE_SCALE,
        model_kwargs=dict(texts=[prompt]),
        progress=True,
        clip_denoised=True,
        use_fp16=True,
        use_karras=True,
        karras_steps=128, # Originally 64
        sigma_min=1e-3,
        sigma_max=160,
        s_churn=0,
    )[0]
    
    mesh = decode_latent_mesh(xmModel, latent).tri_mesh()
    try:
        with open(os.path.join(folder, f'{prompt}.obj'), 'w') as f:
            mesh.write_obj(f)
            print(f'Successfully saved "{prompt}.obj".')
    except IOError as e:
        print(f'Failed to save "{prompt}.obj".')
        
    if TRIAL:
        break

  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "A chocolate and candy cheeseburger.obj".


  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "A green delicious apple.obj".


  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "A futuristic, neon-light designed traffic cone.obj".


  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "A pumpkin with a spooky carved face.obj".


  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "A banana with whipped cream and chocolate syrup.obj".


  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "A water bottle with a metallic finish.obj".


  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "A husky breed dog.obj".


  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "A birthday cake with birthday candles.obj".


  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "A red couch with cushions on top.obj".


  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "A Minecraft-style rubber duck.obj".


# Combine Images Into 3D Mesh

Takes images from `edited_imgs` and produces `edited_meshes`

## Setup

In [29]:
import torch
import os

from shap_e.diffusion.sample import sample_latents
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
from shap_e.models.download import load_model, load_config
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, gif_widget
from shap_e.util.image_util import load_image
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, gif_widget, decode_latent_mesh

In [30]:
xmModel = load_model('transmitter', device=device)
imageModel = load_model('image300M', device=device)
diffusionModel = diffusion_from_config(load_config('diffusion'))

## Generate 3D Mesh

In [31]:
folder = os.path.join(SHARED_FOLDER, 'edited_meshes')

guidance_scale = 3.0

for imgs_foldername in [f'{prompt}.obj - {instruction}' for prompt, instruction in zip(PROMPTS, EDITED_INSTRUCTIONS)]:
    imgs_folder_path = os.path.join(SHARED_FOLDER, f'edited_masked_imgs_512\{imgs_foldername}')
    if not os.path.exists(imgs_folder_path):
        continue
            
    for img_filename in [f for f in os.listdir(imgs_folder_path) if '.png' in f]:
        fullpath = os.path.join(imgs_folder_path, img_filename)
        print(fullpath)
        
        image = load_image(fullpath)
        
        latent = sample_latents(
            batch_size=1,
            model=imageModel,
            diffusion=diffusionModel,
            guidance_scale=guidance_scale,
            model_kwargs=dict(images=[image]),
            progress=True,
            clip_denoised=True,
            use_fp16=True,
            use_karras=True,
            karras_steps=128,  # originally 64
            sigma_min=1e-3,
            sigma_max=160,
            s_churn=0,
        )[0]
        
        mesh = decode_latent_mesh(xmModel, latent).tri_mesh()
        output_filename = imgs_foldername.replace('.obj', '') + '.obj'
        try:
            with open(os.path.join(folder, output_filename), 'w') as f:
                mesh.write_obj(f)
                print(f'Successfully saved "{output_filename}".')
        except IOError as e:
            print(f'Failed to save "{output_filename}".')
        
        # Breaking b/c can't figure out how to combine the multiple images into a 3D model
        # So doing only 1 prespective. Future Work!
        break
    
    if TRIAL:
        break

        folder = os.path.join(SHARED_FOLDER, 'original_reconstructed_meshes')

C:\Users\Almog\Dev\shap-e\project\shared\edited_masked_imgs_512\an apple.obj - Make the apple plaid\0.png


  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "an apple - Make the apple plaid.obj".


# Combine Original Images Into 3D Mesh

Takes images from `edited_imgs` and produces `edited_meshes`

## Setup

In [6]:
import torch
import os

from shap_e.diffusion.sample import sample_latents
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
from shap_e.models.download import load_model, load_config
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, gif_widget
from shap_e.util.image_util import load_image
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, gif_widget, decode_latent_mesh

In [7]:
xmModel = load_model('transmitter', device=device)
imageModel = load_model('image300M', device=device)
diffusionModel = diffusion_from_config(load_config('diffusion'))

## Generate 3D Mesh

In [22]:
folder = os.path.join(SHARED_FOLDER, 'original_reconstructed_meshes')
guidance_scale = 3.0

for imgs_foldername in [f'{prompt}.obj' for prompt in PROMPTS]:
    imgs_folder_path = os.path.join(SHARED_FOLDER, f'original_imgs\{imgs_foldername}')
    if not os.path.exists(imgs_folder_path):
        continue
            
    for img_filename in [f for f in os.listdir(imgs_folder_path) if '.png' in f]:
        fullpath = os.path.join(imgs_folder_path, img_filename)
        print(fullpath)
        
        image = load_image(fullpath)
        
        latent = sample_latents(
            batch_size=1,
            model=imageModel,
            diffusion=diffusionModel,
            guidance_scale=guidance_scale,
            model_kwargs=dict(images=[image]),
            progress=True,
            clip_denoised=True,
            use_fp16=True,
            use_karras=True,
            karras_steps=128,  # originally 64
            sigma_min=1e-3,
            sigma_max=160,
            s_churn=0,
        )[0]
        
        mesh = decode_latent_mesh(xmModel, latent).tri_mesh()
        output_filename = imgs_foldername.replace('.obj', '') + '.obj'
        try:
            with open(os.path.join(folder, output_filename), 'w') as f:
                mesh.write_obj(f)
                print(f'Successfully saved "{output_filename}".')
        except IOError as e:
            print(f'Failed to save "{output_filename}".')
        
        # Breaking b/c can't figure out how to combine the multiple images into a 3D model
        # So doing only 1 prespective. Future Work!
        break
    
    if TRIAL:
        break


C:\Users\Almog\Dev\shap-e\project\shared\original_imgs\a birthday cake.obj\0.png


  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "a birthday cake.obj".
C:\Users\Almog\Dev\shap-e\project\shared\original_imgs\a red couch.obj\0.png


  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "a red couch.obj".
C:\Users\Almog\Dev\shap-e\project\shared\original_imgs\a rubber duck.obj\0.png


  0%|          | 0/128 [00:00<?, ?it/s]

Successfully saved "a rubber duck.obj".


# Mask The Original Images and Apply to Edited Images

In [27]:
import os
import cv2
import numpy as np

In [28]:
OUTPUT_IMGS = os.path.join(SHARED_FOLDER, 'edited_masked_imgs_512')
ORIGINAL_IMGS = os.path.join(SHARED_FOLDER, 'original_imgs_512')
EDITED_IMGS = os.path.join(SHARED_FOLDER, 'edited_imgs_512')

for folder_name in os.listdir(EDITED_IMGS):
    if not os.path.exists(os.path.join(ORIGINAL_IMGS, folder_name.split(' - ')[0])):
        print(f'Skipping {folder_name}')
        continue
    
    for img_name in os.listdir(os.path.join(EDITED_IMGS, folder_name)):
        full_org_img_path = os.path.join(os.path.join(ORIGINAL_IMGS, folder_name.split(' - ')[0]), img_name)
        full_edit_img_path = os.path.join(os.path.join(EDITED_IMGS, folder_name), img_name)

        if not os.path.exists(full_org_img_path):
            print(f'Skipping {folder_name}/{img_name}')
            continue
        
        print(f'Masking {folder_name}/{img_name}')
        
        org_img = cv2.imread(full_org_img_path)
        org_gray = cv2.cvtColor(org_img, cv2.COLOR_BGR2GRAY)
        _, org_mask = cv2.threshold(org_gray, 0, 255, cv2.THRESH_BINARY_INV)
        org_contours, _ = cv2.findContours(org_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        org_contours = [sorted(org_contours, key=cv2.contourArea, reverse=True)[1]]
        
        edit_img = cv2.imread(full_edit_img_path)
        
        new_img = np.zeros_like(org_img, dtype=np.uint8)
        cv2.drawContours(new_img, org_contours, -1, (255, 255, 255), -1)
        region = cv2.bitwise_and(edit_img, new_img)
        new_img[new_img != 0] = region[new_img != 0]
        
        # cv2.imshow('New Image', new_img)
        # cv2.waitKey(0)
        
        if not os.path.isdir(os.path.join(OUTPUT_IMGS, folder_name)):
            os.mkdir(os.path.join(OUTPUT_IMGS, folder_name))
        cv2.imwrite(os.path.join(os.path.join(OUTPUT_IMGS, folder_name), img_name), new_img)
    
cv2.destroyAllWindows()

Masking a banana.obj - Add chocolate syrup/0.png
Masking a birthday cake.obj - Add a lot of birthday candles to the brithday cake/0.png
Masking a cheeseburger.obj - Make the bun out of chocolate/0.png
Masking a golden retriever dog.obj - Turn the dog into a husky breed/0.png
Masking a pumpkin.obj - Carve a spooky face on the pumpkin/0.png
Masking a red couch.obj - Make the couch into lego/0.png
Masking a red couch.obj - Place couch cushions on top of the couch/0.png
Masking a rubber duck.obj - Make the rubber duck minecraft style/0.png
Masking a water bottle.obj - Give the water bottle a metallic finish/0.png
Masking an apple.obj - Make the apple plaid/0.png
Masking an apple.obj - Transform the apple into a green delicious apple/0.png


# Apply InstructPix2Pix

In [20]:
import torch
import os
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
from PIL import Image

In [21]:
model_id = "timbrooks/instruct-pix2pix"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    safety_checker=None,    
)
pipe.to("cuda")
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

In [26]:
ORIGINAL_IMGS_DIR = os.path.join(SHARED_FOLDER, 'original_imgs_512')
EDITED_IMGS_DIR = os.path.join(SHARED_FOLDER, 'edited_imgs_512')

RESOLUTION = 512
STEPS = 100
CFG_TEXT = 11.5  # Default is 7.5
CFG_IMAGE = 3.5 # Default is 1.5

os.makedirs(EDITED_IMGS_DIR, exist_ok=True)

for i, folder in enumerate(os.listdir(ORIGINAL_IMGS_DIR)):
    if not 'apple' in folder:
        continue
    folder_path = os.path.join(ORIGINAL_IMGS_DIR, folder)
    if folder == '.ipynb_checkpoints' or not os.path.isdir(folder_path):
        continue
    edited_instruction = INSTRUCTIONS[folder.replace('.obj', '')]

    # Create a corresponding folder in edited images directory
    edited_folder_path = os.path.join(EDITED_IMGS_DIR, f'{folder} - {edited_instruction}')
    os.makedirs(edited_folder_path, exist_ok=True)
    
    if 'traffic' in folder:
        print('Skipping Cone...')
        continue

    # Iterate over each image in the folder
    for img_file in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_file)
        if not os.path.isfile(img_path):
            continue
        
        print(f'Running {img_path}')
        org_img = Image.open(img_path)
        output_path = os.path.join(edited_folder_path, img_file)
        edit_img = pipe(
            edited_instruction,
            image=org_img,
            image_guidance_scale=CFG_IMAGE,
            guidance_scale=CFG_TEXT,
            num_inference_steps=STEPS
        ).images[0]
        edit_img.save(output_path)
        
        # Future Work: Support for many images
        break
    
    

Running C:\Users\Almog\Dev\shap-e\project\shared\original_imgs_512\an apple.obj\0.png


  0%|          | 0/100 [00:00<?, ?it/s]