In [16]:
from diffusers import StableDiffusionPipeline
from peft import get_peft_model, LoraConfig
import torch


In [17]:


# Load the base Stable Diffusion model (v1.4)
pipe = StableDiffusionPipeline.from_pretrained(
    "CompVis/stable-diffusion-v1-4",
    torch_dtype=torch.float16,
    revision="fp16"
).to("cuda")



 The Diffusers team and community would be very grateful if you could open an issue: https://github.com/huggingface/diffusers/issues/new with the title 'CompVis/stable-diffusion-v1-4 is missing fp16 files' so that the correct variant file can be added.
Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]An error occurred while trying to fetch /root/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/2880f2ca379f41b0226444936bb7a6766a227587/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/2880f2ca379f41b0226444936bb7a6766a227587/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
Loading pipeline components...:  43%|████▎     | 3/7 [00:00<00:00, 13.05it/s]An error occurred while trying to fetch /root/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/2880f2ca379f41b0226444936bb7a

In [18]:
def generate_infographic_prompt(entry):
    title = entry.get("title", "Untitled")
    organism = entry.get("organism", "Unknown organism")
    components = entry.get("functional_components", [])

    n_layers = len(components)

    # Build the prompt
    prompt = f"""A scientific infographic titled “{title}” centered at the top on one line.
The image shows a vertically exploded, isometric diagram with exactly {n_layers} layers, spaced evenly in scale-accurate order from top (largest) to bottom (smallest), against a neutral grey background (RGB #CCCCCC) with a light grey isometric grid overlay.

🔝 TOP LAYER — ORGANISM VISUAL + FUNCTION
* The top layer is a high-resolution, photo-realistic, full-color image of the organism “{organism}”.
* It is in isometric view, aligned with the orientation of all lower layers.
* The organism appears isolated — no natural or photographic environment, just the grey background.
* ✅ Must look photographic, naturalistic, and realistic
* 🚫 No cartoon, stylized, artistic, or 3D-rendered imagery
* 🚫 No environmental scenes (e.g., desert, leaf, water, landscape)
Note: This layer is the largest and includes no caption or scale label.

📚 LAYERS (Functional Components, Top to Bottom)
"""

    for i, comp in enumerate(components, start=1):
        name = comp.get("layer_name", f"Layer {i}")
        function = comp.get("function_caption", "No function provided")
        scale = comp.get("scale_label", "No scale")

        prompt += f"""
{i}. {name}
    * Visual: Draw using black linework only — clean, minimal, architectural-style.
        * Must follow scientifically accurate structure based on microscope or literature reference.
        * Render in isometric alignment.
        * 🚫 No shading, no 3D rendering, no color
    * Function Caption:
        * Position: Left of the layer and aligned on the left : "{function}"  
    * Scale Label:
        * Position: Right of the layer
        * Format: e.g., “{scale}”
        * Connected to a white vertical scientific ruler with tick marks matching all layers using logarithmic spacing
"""

    # Style rules
    prompt += """
🎨 VISUAL STYLE & RULES
* Background:
    * Flat grey (RGB #CCCCCC)
    * With light grey isometric grid overlay (engineering blueprint style)
* Ruler:
    * Positioned right side
    * Vertical, white, with black tick marks (logarithmic scale)
* Alignment Lines:
    * Thin, black vertical lines connecting the center of all layers
* Spacing:
    * Equal vertical spacing
    * Layer sizes must decrease top to bottom to reflect true scientific scale hierarchy
* Font:
    * Uniform sans-serif font, small size
    * No overlapping with visuals
    * Title: centered and bold; all other text regular weight

🚫 DO NOT INCLUDE:
* Any background behind the organism (no sand, water, plants, etc.)
* 3D renderings, shading, drop shadows, lighting effects
* Color in any layer except the organism
* Stylized or symbolic interpretations of structures
* Captions on the right or inside diagrams
* Layer sizes out of scientific order (no reverse or arbitrary size)
"""

    return prompt


In [19]:
import json


with open("leaf.json", "r") as f:
    data = json.load(f)
model = data["models"][0]
prompt = generate_infographic_prompt(model)
prompt

'A scientific infographic titled “Lotus Leaf Superhydrophobic Surface” centered at the top on one line.\nThe image shows a vertically exploded, isometric diagram with exactly 4 layers, spaced evenly in scale-accurate order from top (largest) to bottom (smallest), against a neutral grey background (RGB #CCCCCC) with a light grey isometric grid overlay.\n\n🔝 TOP LAYER — ORGANISM VISUAL + FUNCTION\n* The top layer is a high-resolution, photo-realistic, full-color image of the organism “Nelumbo nucifera (Sacred Lotus)”.\n* It is in isometric view, aligned with the orientation of all lower layers.\n* The organism appears isolated — no natural or photographic environment, just the grey background.\n* ✅ Must look photographic, naturalistic, and realistic\n* 🚫 No cartoon, stylized, artistic, or 3D-rendered imagery\n* 🚫 No environmental scenes (e.g., desert, leaf, water, landscape)\nNote: This layer is the largest and includes no caption or scale label.\n\n📚 LAYERS (Functional Components, Top t

In [20]:
# Generate an image from a prompt
#prompt = "a mountain landscape with a river and a fox in the foreground, highly detailed, photorealistic"
image = pipe(prompt, guidance_scale=7.5).images[0]

# Save the image
image.save("stable_diffusion.png")


Token indices sequence length is longer than the specified maximum sequence length for this model (921 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['grid overlay . 🔝 top layer — organism visual + function * the top layer is a high - resolution , photo - realistic , full - color image of the organism “ nelumbo nucifera ( sacred lotus )”. * it is in isometric view , aligned with the orientation of all lower layers . * the organism appears isolated — no natural or photographic environment , just the grey background . * ✅ must look photographic , naturalistic , and realistic * 🚫 no cartoon , stylized , artistic , or 3 d - rendered imagery * 🚫 no environmental scenes ( e . g ., desert , leaf , water , landscape ) note : this layer is the largest and includes no caption or scale label . 📚 layers ( functional components , top to bottom ) 1 . layer 1 * visual

100%|██████████| 50/50 [00:02<00:00, 21.16it/s]
