In [None]:
# %pip install "peft==0.13.1" "diffusers==0.26.0" "huggingface_hub==0.24.0" "diffusers==0.26.0" transformers accelerate
%pip install peft==0.14.0 diffusers==0.32.2 transformers==4.48.0

# Prompt to generate

```console
accelerate launch --num_cpu_threads_per_process 2 \
  "./sd-scripts/sdxl_train.py" \
  --pretrained_model_name_or_path "./models/gonzales20260217Dmd.dzxk.safetensors" \
  --train_data_dir "./test_train" \
  --output_dir "./outputs" \
  --output_name "SDXL_gonzales20260217Dmd" \
  --resolution "1024,1024" \
  --train_batch_size 1 \
  --gradient_accumulation_steps 4 \
  --max_train_steps 1500 \
  --save_every_n_steps 300 \
  --learning_rate 1e-6 \
  --optimizer_type "AdamW8bit" \
  --mixed_precision bf16 \
  --save_precision fp16 \
  --no_half_vae \
  --noise_offset 0.1 \
  --gradient_checkpointing \
  --sdpa \
  --cache_latents \
  --cache_text_encoder_outputs \
  --highvram
```

# Generate image from model

In [9]:
import torch
from diffusers import StableDiffusionXLPipeline
from diffusers import DPMSolverMultistepScheduler
from diffusers import AutoencoderKL

from enum import Enum

class ModelType(Enum):
    SD15 = "SD15"
    SDXL = "SDXL"

def generate_image(model_type: ModelType, model_path: str, lora_path: str, output_name: str, prompt: str, negative_prompt: str = None, repeat: int = 1):
    print("正在加载全量自定义模型 (SDXL)...")

    # vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.bfloat16).to("cuda")

    # 注意：这里改用 from_single_file 方法加载 6G 的 safetensors
    pipe = StableDiffusionXLPipeline.from_single_file(
        model_path,
        # vae=vae,
        torch_dtype=torch.bfloat16,
        use_safetensors=True
    ).to("cuda")

    # 在加载完 pipe 后更改配置
    pipe.scheduler = DPMSolverMultistepScheduler.from_config(
        pipe.scheduler.config,
        # use_karras_sigmas=True,
        # algorithm_type="sde-dpmsolver++",
    )

    # 3. 开启 FreeU (SDXL 的画质神技，显著提升清晰度和色彩层次)
    # b1/b2 控制细节，s1/s2 控制结构
    pipe.enable_freeu(s1=0.6, s2=0.4, b1=1.1, b2=1.2)

    print("正在推理...")
    for i in range(repeat):
        ext_name = output_name.split(".")[-1]
        output_name_with_index = output_name.replace(f".{ext_name}", f"_{i+1}.{ext_name}") if repeat > 1 else output_name
        with torch.inference_mode():
            image = pipe(
                prompt=prompt,
                negative_prompt=negative_prompt,
                num_inference_steps=11,
                guidance_scale=1.2,
                width=1024,
                height=1024,
                clip_skip=2,
            ).images[0]

        image.save(output_name_with_index)
        print(f"✅ 成功！全量模型生成的图片已保存: {output_name_with_index}")


# generate_image(
#     model_type=ModelType.SDXL,
#     model_path="./outputs/SDXL_DB_LYZ_Resume.ckpt",
#     lora_path=None,
#     output_name="./output_images/liying_zhao_5090_test_full_tune.png",
#     prompt="EWOLiyingZhao with long hair, wearing a blue shirt. She is posing for the camera with her hands on her cheeks, giving off a playful and confident vibe. The woman appears to be smiling, possibly for the camera. The background of the image is blurry, which helps to focus on the subject.",
# )

# generate_image(
#     model_type=ModelType.SDXL,
#     model_path="./models/gonzales20260217Dmd.dzxk.safetensors",
#     lora_path=None,
#     output_name="./output_images/random.png",
#     prompt="A sharp, photo-realistic candid shot of an asian woman at a desk, captured in a moment of distraction. She is seated, her body angled slightly, one hand raised to tuck a strand of her long, dark hair behind her ear. Her white blouse is partially unbuttoned, and the movement has caused the fabric to fall slightly more open, revealing a fuller view of her pale, soft breast in the warm lamplight. Her gold wedding ring catches the light as her hand moves near her face. The sole illumination is a classic 1950s gooseneck desk lamp with a chrome finish, its flexible arm positioned to cast light directly onto her upper body and the desk surface. Her expression is thoughtful, her gaze distant, caught in a brief reverie. The background is soft and dark. The scene feels utterly natural and unposed, a private moment captured. The atmosphere is quiet, warm, and elegantly sensual.",
#     negative_prompt="lowres, bad anatomy, text, error, extra digit, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, blurry, overexposed",
#     repeat=2,
# )

# generate_image(
#     model_type=ModelType.SDXL,
#     model_path="./outputs/SDXL_gonzales20260217Dmd-step00000300.ckpt",
#     lora_path=None,
#     output_name="./output_images/random.png",
#     prompt="A sharp, photo-realistic candid shot of EWOLiyingZhao at a desk, captured in a moment of distraction. She is seated, her body angled slightly, one hand raised to tuck a strand of her long, dark hair behind her ear. Her white blouse is partially unbuttoned, and the movement has caused the fabric to fall slightly more open, revealing a fuller view of her pale, soft breast in the warm lamplight. Her gold wedding ring catches the light as her hand moves near her face. The sole illumination is a classic 1950s gooseneck desk lamp with a chrome finish, its flexible arm positioned to cast light directly onto her upper body and the desk surface. Her expression is thoughtful, her gaze distant, caught in a brief reverie. The background is soft and dark. The scene feels utterly natural and unposed, a private moment captured. The atmosphere is quiet, warm, and elegantly sensual.",
#     negative_prompt="lowres, bad anatomy, text, error, extra digit, cropped, worst quality, low quality, "
#         "normal quality, jpeg artifacts, signature, watermark, username, blurry, overexposed, "
#         # --- 眼睛专项修复 ---
#         "bad eyes, deformed iris, deformed pupils, fused eyes, (misaligned eyes:1.2), (cross-eyed:1.2), "
#         "squint, (lazy eye:1.2), (blind:1.1), (dilated pupils:1.2), (heterochromia:1.1), eyeball protrusion, "
#         "half-blind, clouded eyes, bloodshot eyes, "
#         # --- 面部与皮肤细节 ---
#         "poorly drawn face, mutation, deformed, (unrealistic eyes:1.2), (plastic skin:1.2), "
#         "unnatural skin texture, (cluttered face:1.2), (morbid:1.1), (disfigured:1.1), "
#         # --- 肢体与整体逻辑 ---
#         "extra fingers, mutated hands, fused fingers, too many fingers, long neck, gross proportions, "
#         "malformed limbs, extra arms, extra legs, mutated, floating limbs, disconnected limbs",
#     repeat=10,
# )

# generate_image(
#     model_type=ModelType.SDXL,
#     model_path="./outputs/SDXL_gonzales20260217Dmd-step00001500.ckpt",
#     lora_path=None,
#     output_name="./output_images/random.png",
#     prompt="A sharp, photo-realistic candid shot of EWOLiyingZhao at a desk, captured in a moment of distraction. She is seated, her body angled slightly, one hand raised to tuck a strand of her long, dark hair behind her ear. Her white blouse is partially unbuttoned, and the movement has caused the fabric to fall slightly more open, revealing a fuller view of her pale, soft breast in the warm lamplight. Her gold wedding ring catches the light as her hand moves near her face. The sole illumination is a classic 1950s gooseneck desk lamp with a chrome finish, its flexible arm positioned to cast light directly onto her upper body and the desk surface. Her expression is thoughtful, her gaze distant, caught in a brief reverie. The background is soft and dark. The scene feels utterly natural and unposed, a private moment captured. The atmosphere is quiet, warm, and elegantly sensual.",
#     negative_prompt="lowres, bad anatomy, text, error, extra digit, cropped, worst quality, low quality, "
#         "normal quality, jpeg artifacts, signature, watermark, username, blurry, overexposed, "
#         # --- 眼睛专项修复 ---
#         "bad eyes, deformed iris, deformed pupils, fused eyes, (misaligned eyes:1.2), (cross-eyed:1.2), "
#         "squint, (lazy eye:1.2), (blind:1.1), (dilated pupils:1.2), (heterochromia:1.1), eyeball protrusion, "
#         "half-blind, clouded eyes, bloodshot eyes, "
#         # --- 面部与皮肤细节 ---
#         "poorly drawn face, mutation, deformed, (unrealistic eyes:1.2), (plastic skin:1.2), "
#         "unnatural skin texture, (cluttered face:1.2), (morbid:1.1), (disfigured:1.1), "
#         # --- 肢体与整体逻辑 ---
#         "extra fingers, mutated hands, fused fingers, too many fingers, long neck, gross proportions, "
#         "malformed limbs, extra arms, extra legs, mutated, floating limbs, disconnected limbs",
#     repeat=20,
# )

generate_image(
    model_type=ModelType.SDXL,
    model_path="./outputs/SDXL_gonzales20260217Dmd-step00001500.ckpt",
    lora_path=None,
    output_name="./output_images/random.png",
    prompt=(
        "((EWOLiyingZhao:1.4)), (full round face:1.3), high cranial top, "
        "short rounded chin, (large circular almond eyes:1.2), "
        "defined thick aegyo-sal, black pupils, soft curved eyebrows, "
        "small straight nose, short philtrum, soft feminine jawline, "
        "a photo-realistic candid shot at a desk, white blouse, "
        "natural skin texture, cinematic lighting, 8k resolution"
    ),
    negative_prompt=(
        "long face, v-shaped face, sharp chin, thin face, slanted eyes, "
        "narrow eyes, low cranial top, plastic skin, (worst quality, low quality:1.4), "
        "blurry, bad anatomy, deformed"
    ),
    repeat=2,
)

正在加载全量自定义模型 (SDXL)...


Fetching 17 files: 100%|██████████| 17/17 [00:00<00:00, 293428.67it/s]
Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00,  7.31it/s]
Token indices sequence length is longer than the specified maximum sequence length for this model (94 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['at a desk, white blouse, natural skin texture, cinematic lighting, 8 k resolution']
Token indices sequence length is longer than the specified maximum sequence length for this model (94 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['at a desk, white blouse, natural skin texture, cinematic lighting, 8 k resolution']


正在推理...


100%|██████████| 11/11 [00:00<00:00, 11.62it/s]
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['at a desk, white blouse, natural skin texture, cinematic lighting, 8 k resolution']
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['at a desk, white blouse, natural skin texture, cinematic lighting, 8 k resolution']


✅ 成功！全量模型生成的图片已保存: ./output_images/random_1.png


100%|██████████| 11/11 [00:00<00:00, 11.68it/s]


✅ 成功！全量模型生成的图片已保存: ./output_images/random_2.png
