In [1]:
import os
import torch
import open3d as o3d
import numpy as np
from pathlib import Path
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler

# Fast configuration
DEVICE = "mps"
RES = 96         # Lower resolution
STEPS = 6        # Fewer diffusion steps
STRIDE = 12      # Sparser sampling

def optimized_3d(prompt: str) -> Path:
    """Fast 3D generation (~37s) with reduced complexity."""
    
    # 1. Text-to-image generation (~20s)
    pipe = StableDiffusionPipeline.from_pretrained(
        "OFA-Sys/small-stable-diffusion-v0",
        torch_dtype=torch.float16
    ).to(DEVICE)
    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
    pipe.enable_attention_slicing()
    
    image = pipe(
        prompt,
        num_inference_steps=STEPS,
        height=RES,
        width=RES,
        guidance_scale=3.0
    ).images[0]
    
    # Save preview image to the root directory
    preview_path = Path("preview.png")
    image.save(preview_path)
    del pipe
    torch.mps.empty_cache()
    
    # 2. Depth estimation (~10s)
    depth_pipe = StableDiffusionPipeline.from_pretrained(
        "Intel/dpt-swinv2-tiny-256",
        torch_dtype=torch.float16
    ).to(DEVICE)
    depth_map = depth_pipe(image)["depth"]
    del depth_pipe
    torch.mps.empty_cache()
    
    # 3. Point cloud generation (~7s)
    # Resize depth map to match RES and convert to numpy array
    depth_array = np.array(depth_map.resize((RES, RES))) / 255.0
    
    # Create a simple grid using slicing instead of detailed indices
    grid_x = np.linspace(0, 1, RES)[::STRIDE]
    grid_y = np.linspace(0, 1, RES)[::STRIDE]
    xx, yy = np.meshgrid(grid_x, grid_y)
    # Use the corresponding depth values with slicing
    depth_sliced = depth_array[::STRIDE, ::STRIDE]
    points = np.column_stack((xx.flatten(), yy.flatten(), depth_sliced.flatten()))
    
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    # Assign a constant gray color to all points (removing the per-pixel color computation)
    constant_color = np.tile(np.array([[0.7, 0.7, 0.7]]), (points.shape[0], 1))
    pcd.colors = o3d.utility.Vector3dVector(constant_color)
    
    ply_path = Path("output.ply")
    o3d.io.write_point_cloud(str(ply_path), pcd)
    
    return ply_path

# Execute
output = optimized_3d("A simple chair")
print(f"3D generated: {output}")


  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

An error occurred while trying to fetch /Users/adamaslan/.cache/huggingface/hub/models--OFA-Sys--small-stable-diffusion-v0/snapshots/38e10e5e71e8fbf717a47a81e7543cd01c1a8140/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /Users/adamaslan/.cache/huggingface/hub/models--OFA-Sys--small-stable-diffusion-v0/snapshots/38e10e5e71e8fbf717a47a81e7543cd01c1a8140/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /Users/adamaslan/.cache/huggingface/hub/models--OFA-Sys--small-stable-diffusion-v0/snapshots/38e10e5e71e8fbf717a47a81e7543cd01c1a8140/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /Users/adamaslan/.cache/huggingface/hub/models--OFA-Sys--small-stable-diffusion-v0/snapshots/38e10e5e71e8fbf717a47a81e7543cd01c1a8140/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
The config attributes {'predict_ep

  0%|          | 0/6 [00:00<?, ?it/s]

  images = (images * 255).round().astype("uint8")
Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


EntryNotFoundError: 404 Client Error. (Request ID: Root=1-67b13769-26b34bc846746d5b7452d76b;facfb129-b345-4290-9017-982f344496cc)

Entry Not Found for url: https://huggingface.co/Intel/dpt-swinv2-tiny-256/resolve/main/model_index.json.

In [5]:
import os
import torch
import open3d as o3d
import numpy as np
from pathlib import Path
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
from PIL import Image

# Fast configuration
DEVICE = "mps"
RES = 96         # Lower resolution
STEPS = 6        # Fewer diffusion steps
STRIDE = 12      # Sparser sampling

def optimized_3d(prompt: str) -> Path:
    """Fast 3D generation (~37s) with reduced complexity using transformers for depth estimation."""
    
    # 1. Text-to-image generation (~20s)
    pipe = StableDiffusionPipeline.from_pretrained(
        "OFA-Sys/small-stable-diffusion-v0",
        torch_dtype=torch.float16
    ).to(DEVICE)
    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
    pipe.enable_attention_slicing()
    
    image = pipe(
        prompt,
        num_inference_steps=STEPS,
        height=RES,
        width=RES,
        guidance_scale=3.0
    ).images[0]
    
    # Save preview image to the root directory
    preview_path = Path("preview.png")
    image.save(preview_path)
    del pipe
    torch.mps.empty_cache()
    
    # 2. Depth estimation (~10s) using transformers DPT model
    feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-swinv2-tiny")
    depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny").to(DEVICE, torch.float16)
    inputs = feature_extractor(images=image, return_tensors="pt")
    pixel_values = inputs["pixel_values"].to(DEVICE)
    with torch.no_grad():
        outputs = depth_model(pixel_values)
    predicted_depth = outputs.predicted_depth
    # Resize depth map to (RES, RES)
    predicted_depth = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=(RES, RES),
        mode="bicubic",
        align_corners=False
    ).squeeze().cpu().numpy()
    # Normalize depth map to 0-1
    depth_min = predicted_depth.min()
    depth_max = predicted_depth.max()
    depth_array = (predicted_depth - depth_min) / (depth_max - depth_min)
    torch.mps.empty_cache()
    
    # 3. Point cloud generation (~7s)
    grid_x = np.linspace(0, 1, RES)[::STRIDE]
    grid_y = np.linspace(0, 1, RES)[::STRIDE]
    xx, yy = np.meshgrid(grid_x, grid_y)
    depth_sliced = depth_array[::STRIDE, ::STRIDE]
    points = np.column_stack((xx.flatten(), yy.flatten(), depth_sliced.flatten()))
    
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    # Assign a constant gray color to all points
    constant_color = np.tile(np.array([[0.7, 0.7, 0.7]]), (points.shape[0], 1))
    pcd.colors = o3d.utility.Vector3dVector(constant_color)
    
    ply_path = Path("output.ply")
    o3d.io.write_point_cloud(str(ply_path), pcd)
    
    return ply_path

# Execute
output = optimized_3d("A simple chair")
print(f"3D generated: {output}")


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

An error occurred while trying to fetch /Users/adamaslan/.cache/huggingface/hub/models--OFA-Sys--small-stable-diffusion-v0/snapshots/38e10e5e71e8fbf717a47a81e7543cd01c1a8140/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /Users/adamaslan/.cache/huggingface/hub/models--OFA-Sys--small-stable-diffusion-v0/snapshots/38e10e5e71e8fbf717a47a81e7543cd01c1a8140/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /Users/adamaslan/.cache/huggingface/hub/models--OFA-Sys--small-stable-diffusion-v0/snapshots/38e10e5e71e8fbf717a47a81e7543cd01c1a8140/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /Users/adamaslan/.cache/huggingface/hub/models--OFA-Sys--small-stable-diffusion-v0/snapshots/38e10e5e71e8fbf717a47a81e7543cd01c1a8140/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
The config attributes {'predict_ep

  0%|          | 0/6 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


OSError: Intel/dpt-swinv2-tiny is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [None]:
import os
import torch
import open3d as o3d
import numpy as np
from pathlib import Path
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
from PIL import Image

# Configuration
DEVICE = "mps" if torch.backends.mps.is_available() else "cpu"
RES = 96         # Reduced resolution for speed
STEPS = 6        # Fewer inference steps
STRIDE = 12      # Point cloud sampling stride

def generate_3d_from_text(prompt: str) -> Path:
    """Generate 3D point cloud from text prompt with depth estimation."""
    
    # 1. Text-to-image generation
    pipe = StableDiffusionPipeline.from_pretrained(
        "OFA-Sys/small-stable-diffusion-v0",
        torch_dtype=torch.float16
    ).to(DEVICE)
    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
    
    image = pipe(
        prompt,
        num_inference_steps=STEPS,
        height=RES,
        width=RES,
        guidance_scale=3.0
    ).images[0]
    
    # Save preview
    preview_path = Path("preview.png")
    image.save(preview_path)
    del pipe
    torch.mps.empty_cache() if DEVICE == "mps" else torch.cuda.empty_cache()
    
    # 2. Depth estimation (corrected model ID)
    feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-swinv2-tiny-256")
    depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256")
    depth_model = depth_model.to(DEVICE).half()
    
    inputs = feature_extractor(images=image, return_tensors="pt")
    pixel_values = inputs["pixel_values"].to(DEVICE, torch.float16)
    
    with torch.no_grad():
        outputs = depth_model(pixel_values)
    
    # Process depth map
    predicted_depth = outputs.predicted_depth
    predicted_depth = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=(RES, RES),
        mode="bicubic",
        align_corners=False
    ).squeeze().cpu().numpy()
    
    # Normalize depth values
    depth_min, depth_max = predicted_depth.min(), predicted_depth.max()
    depth_array = (predicted_depth - depth_min) / (depth_max - depth_min)
    
    # 3. Create point cloud
    grid = np.mgrid[0:RES:STRIDE, 0:RES:STRIDE].reshape(2, -1).T
    normalized_coords = grid / RES
    depth_values = depth_array[grid[:, 0], grid[:, 1]]
    
    points = np.column_stack((
        normalized_coords[:, 1],   # X axis (width)
        1 - normalized_coords[:, 0],  # Y axis (height flipped)
        depth_values              # Z axis (depth)
    ))
    
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    pcd.colors = o3d.utility.Vector3dVector(np.full_like(points, 0.7))  # Gray color
    
    ply_path = Path("output.ply")
    o3d.io.write_point_cloud(str(ply_path), pcd)
    
    return ply_path

# Example usage with user prompt
user_prompt = "A modern chair with minimalist design"  # <<-- CHANGE THIS TEXT to modify the output
result = generate_3d_from_text(user_prompt)
print(f"Generated 3D model: {result}")

KeyboardInterrupt: 

In [None]:
# might work but 3.4 gig model
import torch
import open3d as o3d
import numpy as np
from pathlib import Path
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
from PIL import Image

# Configuration
DEVICE = "mps" if torch.backends.mps.is_available() else "cpu"
RES = 256        # Increased resolution to reduce NSFW false positives
STEPS = 15       # Slightly more steps for better quality
STRIDE = 10      # Balanced point cloud density

def generate_3d_model(prompt: str) -> Path:
    """Generate 3D point cloud from text prompt with NSFW filter bypass"""
    
    # 1. Text-to-image generation with safety override
    pipe = StableDiffusionPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        torch_dtype=torch.float16,
        safety_checker=None,  # Disable NSFW filter
        requires_safety_checker=False
    ).to(DEVICE)
    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
    
    # Enhanced prompt engineering
    processed_prompt = f"{prompt}, clean 3D model, orthographic view, blender render, studio lighting"
    
    image = pipe(
        processed_prompt,
        num_inference_steps=STEPS,
        height=RES,
        width=RES,
        guidance_scale=7.5,
        negative_prompt="text, watermark, low quality, blurry, unsafe"  # Extra safety
    ).images[0]
    
    # Save preview
    preview_path = Path("preview.png")
    image.save(preview_path)
    del pipe
    torch.mps.empty_cache() if DEVICE == "mps" else torch.cuda.empty_cache()
    
    # 2. Depth estimation with corrected model
    feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-swinv2-tiny-256")
    depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256")
    depth_model = depth_model.to(DEVICE).half()
    
    inputs = feature_extractor(images=image, return_tensors="pt")
    pixel_values = inputs["pixel_values"].to(DEVICE, torch.float16)
    
    with torch.no_grad():
        outputs = depth_model(pixel_values)
    
    # Process depth map
    predicted_depth = outputs.predicted_depth
    predicted_depth = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=(RES, RES),
        mode="bicubic",
        align_corners=False
    ).squeeze().cpu().numpy()
    
    # Normalize depth values
    depth_min, depth_max = predicted_depth.min(), predicted_depth.max()
    depth_array = (predicted_depth - depth_min) / (depth_max - depth_min)
    
    # 3. Create optimized point cloud
    grid = np.stack(np.meshgrid(np.linspace(0, 1, RES//STRIDE), 
                               np.linspace(0, 1, RES//STRIDE)), 
                axis=-1).reshape(-1, 2)
    
    x = grid[:, 0]
    y = grid[:, 1]
    z = depth_array[::STRIDE, ::STRIDE].flatten()
    
    points = np.column_stack((
        x,         # X-axis
        1 - y,     # Y-axis (flipped)
        z * 0.5    # Z-axis (scaled for better visualization)
    ))
    
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    pcd.colors = o3d.utility.Vector3dVector(np.tile([0.7, 0.7, 0.7], (len(points), 1)))
    
    ply_path = Path("output.ply")
    o3d.io.write_point_cloud(str(ply_path), pcd)
    
    return ply_path

# Usage - Change this prompt to test different objects
user_input = "A simple wooden chair with four legs and backrest"
result = generate_3d_model(user_input)
print(f"Generated 3D model saved to: {result}")

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

KeyboardInterrupt: 

In [10]:
import torch
import open3d as o3d
import numpy as np
from pathlib import Path
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
from PIL import Image

# Configuration
DEVICE = "mps" if torch.backends.mps.is_available() else "cpu"
RES = 96
STEPS = 15
STRIDE = 10

def generate_3d_model(prompt: str) -> Path:
    """Generate 3D point cloud from text prompt using SFW-gen-v2 model"""
    
    # 1. Text-to-image generation with NSFW-gen-v2
    try:
        pipe = DiffusionPipeline.from_pretrained(
            "UnfilteredAI/NSFW-gen-v2",
            torch_dtype=torch.float16,
            safety_checker=None,  # Disable safety checks
            requires_safety_checker=False
        ).to(DEVICE)
    except Exception as e:
        raise ValueError(f"Model loading failed: {str(e)}. Please check model availability on Hugging Face Hub")

    # Configure scheduler
    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
    
    # Enhanced prompt engineering
    processed_prompt = f"{prompt}, clean 3D model, orthographic view, blender render, studio lighting"
    
    # Generate image
    image = pipe(
        processed_prompt,
        num_inference_steps=STEPS,
        height=RES,
        width=RES,
        guidance_scale=7.5,
        negative_prompt="text, watermark, low quality, blurry, unsafe"
    ).images[0]
    
    # Save preview
    preview_path = Path("preview.png")
    image.save(preview_path)
    del pipe
    torch.mps.empty_cache() if DEVICE == "mps" else torch.cuda.empty_cache()
    
    # 2. Depth estimation (remaining code stays the same)
    feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-swinv2-tiny-256")
    depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256")
    depth_model = depth_model.to(DEVICE).half()
    
    inputs = feature_extractor(images=image, return_tensors="pt")
    pixel_values = inputs["pixel_values"].to(DEVICE, torch.float16)
    
    with torch.no_grad():
        outputs = depth_model(pixel_values)
    
    # Process depth map
    predicted_depth = outputs.predicted_depth
    predicted_depth = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=(RES, RES),
        mode="bicubic",
        align_corners=False
    ).squeeze().cpu().numpy()
    
    # Normalize depth values
    depth_min, depth_max = predicted_depth.min(), predicted_depth.max()
    depth_array = (predicted_depth - depth_min) / (depth_max - depth_min)
    
    # 3. Point cloud generation (remaining code stays the same)
    grid = np.stack(np.meshgrid(np.linspace(0, 1, RES//STRIDE), 
                               np.linspace(0, 1, RES//STRIDE)), 
                axis=-1).reshape(-1, 2)
    
    x = grid[:, 0]
    y = grid[:, 1]
    z = depth_array[::STRIDE, ::STRIDE].flatten()
    
    points = np.column_stack((
        x,         # X-axis
        1 - y,     # Y-axis (flipped)
        z * 0.5    # Z-axis scaled
    ))
    
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    pcd.colors = o3d.utility.Vector3dVector(np.tile([0.7, 0.7, 0.7], (len(points), 1)))
    
    ply_path = Path("output.ply")
    o3d.io.write_point_cloud(str(ply_path), pcd)
    
    return ply_path

# Usage
user_input = "A simple wooden chair with four legs and backrest"
result = generate_3d_model(user_input)
print(f"Generated 3D model saved to: {result}")

KeyboardInterrupt: 

In [None]:
import torch
import open3d as o3d
import numpy as np
from pathlib import Path
from diffusers import DiffusionPipeline
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
from PIL import Image

# Configuration
DEVICE = "mps" if torch.backends.mps.is_available() else "cpu"
RES = 128                  # Reduced resolution for faster processing
STEPS = 4                  # Fewer inference steps for LCM model
STRIDE = 8                 # Increased stride for point cloud downsampling

def generate_3d_model(prompt: str) -> Path:
    """Optimized 3D model generation pipeline for Apple Silicon"""
    
    # 1. Fast text-to-image generation with LCM
    try:
        pipe = DiffusionPipeline.from_pretrained(
            "SimianLuo/LCM_Dreamshaper_v7",
            torch_dtype=torch.float16,
            safety_checker=None,
        ).to(DEVICE)
    except Exception as e:
        raise ValueError(f"Model loading failed: {str(e)}")

    # Generate optimized prompt
    processed_prompt = f"{prompt}, clean 3D model, blender render, studio lighting, white background"
    
    # Generate image with LCM parameters
    image = pipe(
        processed_prompt,
        num_inference_steps=STEPS,
        height=RES,
        width=RES,
        guidance_scale=1.0,        # Lower guidance scale for LCM
        output_type="pil",
    ).images[0]
    
    # Save preview and cleanup
    preview_path = Path("preview.png")
    image.save(preview_path)
    del pipe
    torch.mps.empty_cache()

    # 2. Memory-efficient depth estimation
    feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-swinv2-tiny-256")
    depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256")
    depth_model = depth_model.to(DEVICE).half()  # Half-precision
    
    # Process image through feature extractor
    inputs = feature_extractor(images=image, return_tensors="pt")
    pixel_values = inputs["pixel_values"].to(DEVICE, torch.float16)

    with torch.no_grad():
        outputs = depth_model(pixel_values)

    # Process depth map
    predicted_depth = outputs.predicted_depth
    predicted_depth = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=(RES, RES),
        mode="bicubic",
        align_corners=False,
    ).squeeze().cpu().numpy()

    # Normalize depth values
    depth_min, depth_max = predicted_depth.min(), predicted_depth.max()
    depth_array = (predicted_depth - depth_min) / (depth_max - depth_min + 1e-6)

    # 3. Optimized point cloud generation
    n_points = RES // STRIDE
    grid = np.stack(np.meshgrid(
        np.linspace(0, 1, n_points),
        np.linspace(0, 1, n_points)
    ), axis=-1).reshape(-1, 2)

    x = grid[:, 0]
    y = grid[:, 1]
    z = depth_array[::STRIDE, ::STRIDE].flatten()

    points = np.column_stack((
        x,         # X-axis
        1 - y,     # Y-axis (flipped)
        z * 0.5    # Z-axis scaled
    ))

    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    pcd.colors = o3d.utility.Vector3dVector(np.tile([0.7, 0.7, 0.7], (len(points), 1)))
    
    # Save output
    ply_path = Path("output.ply")
    o3d.io.write_point_cloud(str(ply_path), pcd)
    
    # Final cleanup
    del depth_model, feature_extractor, inputs
    torch.mps.empty_cache()
    
    return ply_path

# Usage example
user_input = "A simple wooden chair with four legs and backrest"
result = generate_3d_model(user_input)
print(f"Generated 3D model saved to: {result}")