In [4]:
%%capture
!pip install torch torchvision torchaudio
!pip install diffusers accelerate transformers open3d --extra-index-url https://download.pytorch.org/whl/cpu

import torch
import open3d as o3d
import numpy as np
from pathlib import Path
from diffusers import StableDiffusionPipeline
from transformers import pipeline

# Configuration
DEVICE = "mps" if torch.backends.mps.is_available() else "cpu"
OUTPUT_DIR = Path.cwd()
RESOLUTION = 128  # Keep low for 8GB RAM
STRIDE = 8        # Higher = fewer points

def safe_3d_generation(prompt: str) -> Path:
    """M2-optimized generation with output saving"""
    
    # Stage 1: Text-to-image with MPS-optimized SD
    sd_pipe = StableDiffusionPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        torch_dtype=torch.float16,
        variant="fp16",
        use_safetensors=True
    ).to(DEVICE)
    
    sd_pipe.enable_attention_slicing()
    image = sd_pipe(
        prompt,
        num_inference_steps=15,
        height=RESOLUTION,
        width=RESOLUTION
    ).images[0]
    
    # Save intermediate image
    img_path = OUTPUT_DIR / "generated_image.png"
    image.save(img_path)
    del sd_pipe
    torch.mps.empty_cache()

    # Stage 2: Depth estimation
    depth_pipe = pipeline(
        "depth-estimation", 
        "Intel/dpt-hybrid-midas",
        torch_dtype=torch.float16
    ).to(DEVICE)
    
    depth_map = depth_pipe(image)["depth"]
    del depth_pipe
    torch.mps.empty_cache()

    # Stage 3: Point cloud generation
    depth_array = np.array(depth_map.resize((RESOLUTION, RESOLUTION))) / 255.0
    color_array = np.array(image.resize((RESOLUTION, RESOLUTION))) / 255.0

    points, colors = [], []
    for y in range(0, RESOLUTION, STRIDE):
        for x in range(0, RESOLUTION, STRIDE):
            points.append([x/RESOLUTION, y/RESOLUTION, depth_array[y, x]])
            colors.append(color_array[y, x])

    # Create and save point cloud
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(np.array(points))
    pcd.colors = o3d.utility.Vector3dVector(np.array(colors))
    
    ply_path = OUTPUT_DIR / "output_3d.ply"
    o3d.io.write_point_cloud(str(ply_path), pcd)
    
    return ply_path

# Run and save results
output_file = safe_3d_generation("A modern office chair")
print(f"3D output saved to: {output_file}")

python(4900) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(4901) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


KeyboardInterrupt: 

In [None]:
%%capture
!pip install torch torchvision torchaudio
!pip install diffusers accelerate transformers open3d --extra-index-url https://download.pytorch.org/whl/cpu

import torch
import open3d as o3d
import numpy as np
from pathlib import Path
from diffusers import StableDiffusionPipeline, DPMSolverSinglestepScheduler

# Speed-optimized configuration
DEVICE = "mps"
RESOLUTION = 128  # 128px (don't go lower than 96)
INFERENCE_STEPS = 12  # Minimum for decent quality
STRIDE = 6 # Balance detail/speed
OUTPUT_DIR = Path.cwd()

def fast_3d_generation(prompt: str) -> Path:
    """Speed-optimized 3D generation pipeline"""
    
    # 1. Ultra-fast text-to-image (~45s)
    pipe = StableDiffusionPipeline.from_pretrained(
        "segmind/SSD-1B",
        torch_dtype=torch.float16,
        variant="fp16",
        use_safetensors=True
    ).to(DEVICE)
    
    pipe.scheduler = DPMSolverSinglestepScheduler.from_config(pipe.scheduler.config)
    pipe.enable_attention_slicing()
    
    image = pipe(
        prompt,
        num_inference_steps=INFERENCE_STEPS,
        height=RESOLUTION,
        width=RESOLUTION
    ).images[0]
    
    # Save and clean up
    img_path = OUTPUT_DIR / "preview.png"
    image.save(img_path)
    del pipe
    torch.mps.empty_cache()

    # 2. Rapid depth estimation (~15s)
    depth_pipe = pipeline(
        "depth-estimation",
        "Intel/dpt-hybrid-midas",
        device=DEVICE,
        torch_dtype=torch.float16
    )
    
    depth_map = depth_pipe(image)["depth"]
    del depth_pipe
    torch.mps.empty_cache()

    # 3. Vectorized point cloud creation (~5s)
    depth_array = np.array(depth_map.resize((RESOLUTION, RESOLUTION))) / 255.0
    color_array = np.array(image.resize((RESOLUTION, RESOLUTION))) / 255.0
    
    # Vectorized grid sampling
    x, y = np.meshgrid(np.arange(0, RESOLUTION, STRIDE), 
                      np.arange(0, RESOLUTION, STRIDE))
    points = np.column_stack((
        x.flatten()/RESOLUTION, 
        y.flatten()/RESOLUTION, 
        depth_array[y, x].flatten()
    ))
    colors = color_array[y, x].reshape(-1, 3)

    # Save final output
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    pcd.colors = o3d.utility.Vector3dVector(colors)
    
    ply_path = OUTPUT_DIR / "output_3d.ply"  # Fixed string termination
    o3d.io.write_point_cloud(str(ply_path), pcd)
    
    return ply_path

# Execution (Total ~1-2 mins)
output_path = fast_3d_generation("A minimalist desk lamp")
print(f"Generated in {output_path}")

python(6064) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(6065) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Error while downloading from https://cdn-lfs.hf.co/repos/7c/29/7c2937659b52c19a0f95a3263e36d666c75eace5f0b2074383042784e0eee26a/40d8ea9159f3e875278dacc7879442d58c45850cf13c62f5e26681061c51829a?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27diffusion_pytorch_model.fp16.safetensors%3B+filename%3D%22diffusion_pytorch_model.fp16.safetensors%22%3B&Expires=1738195270&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTczODE5NTI3MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9yZXBvcy83Yy8yOS83YzI5Mzc2NTliNTJjMTlhMGY5NWEzMjYzZTM2ZDY2NmM3NWVhY2U1ZjBiMjA3NDM4MzA0Mjc4NGUwZWVlMjZhLzQwZDhlYTkxNTlmM2U4NzUyNzhkYWNjNzg3OTQ0MmQ1OGM0NTg1MGNmMTNjNjJmNWUyNjY4MTA2MWM1MTgyOWE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=iXGrIbkWJrXE8uY3Jp5ycDKZ-17

In [1]:
%%capture
import os
os.environ['MallocStackLogging'] = '0'

!pip install torch torchvision torchaudio
!pip install diffusers accelerate transformers open3d --extra-index-url https://download.pytorch.org/whl/cpu

import torch
import open3d as o3d
import numpy as np
from pathlib import Path
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler

# Ultra-fast configuration
DEVICE = "mps"
RES = 128  # Minimum viable resolution
STEPS = 8  # Fewer diffusion steps
STRIDE = 8  # Sparse sampling

def optimized_3d(prompt: str) -> Path:
    """Reliable 1-minute 3D generation"""
    
    # 1. Text-to-image with Apple-optimized model (~30s)
    pipe = StableDiffusionPipeline.from_pretrained(
        "OFA-Sys/small-stable-diffusion-v0",  # Verified working on MPS
        torch_dtype=torch.float16
    ).to(DEVICE)
    
    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
    pipe.enable_attention_slicing()
    
    image = pipe(
        prompt,
        num_inference_steps=STEPS,
        height=RES,
        width=RES,
        guidance_scale=3.0
    ).images[0]
    
    # Save and cleanup
    img_path = Path.cwd() / "preview.png"
    image.save(img_path)
    del pipe
    torch.mps.empty_cache()

    # 2. Depth estimation (~15s)
    depth_pipe = pipeline(
        "depth-estimation",
        "Intel/dpt-swinv2-tiny-256",  # Lightweight model
        device=DEVICE
    )
    depth_map = depth_pipe(image)["depth"]
    del depth_pipe
    torch.mps.empty_cache()

    # 3. Efficient point cloud (~10s)
    depth_array = np.array(depth_map.resize((RES, RES))) / 255.0
    color_array = np.array(image.resize((RES, RES))) / 255.0
    
    # Grid sampling
    y, x = np.indices((RES, RES))
    mask = (x % STRIDE == 0) & (y % STRIDE == 0)
    
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(np.column_stack((
        x[mask]/RES, y[mask]/RES, depth_array[mask]
    )))
    pcd.colors = o3d.utility.Vector3dVector(color_array[mask])
    
    ply_path = Path.cwd() / "output.ply"
    o3d.io.write_point_cloud(str(ply_path), pcd)
    
    return ply_path

# Execute (Total ~55s-1.5m)
output = optimized_3d("A simple chair")
print(f"3D generated: {output}")

KeyboardInterrupt: 