Skip to content

SD3.5-large. Why is it OK when calling with a single thread, but not with multiple threads? #9833

@EvanSong77

Description

@EvanSong77

Describe the bug

First, I created a SD3.5-large service:

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import uuid
from diffusers import BitsAndBytesConfig, SD3Transformer2DModel, DDIMScheduler, DDPMParallelScheduler
from diffusers import StableDiffusion3Pipeline
import torch
from transformers import T5EncoderModel
import time 
from flask import request, jsonify
import logging
import sys
import flask

app = flask.Flask("sd_server")

handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)s in %(module)s: %(message)s"))
app.logger.handlers.clear()
app.logger.addHandler(handler)
app.logger.setLevel(logging.INFO)

# model pipeline
model_id = "../stable-diffusion-3.5-large"

nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
model_nf4 = SD3Transformer2DModel.from_pretrained(
    model_id,
    subfolder="transformer",
    quantization_config=nf4_config,
    torch_dtype=torch.bfloat16
)
model_nf4 = model_nf4.to("cuda:0")
pipeline = StableDiffusion3Pipeline.from_pretrained(
    model_id, 
    transformer=model_nf4,
    torch_dtype=torch.bfloat16
)
# pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
# pipeline.scheduler = DDPMParallelScheduler.from_config(pipeline.scheduler.config)
pipeline = pipeline.to("cuda:0")

# # diffusers/t5-nf4
# t5_nf4 = T5EncoderModel.from_pretrained("text_encoder_3", torch_dtype=torch.bfloat16)
# t5_nf4 = t5_nf4.to("cuda:0")

# pipeline = StableDiffusion3Pipeline.from_pretrained(
#     model_id, 
#     transformer=model_nf4,
#     text_encoder_3=t5_nf4,
#     torch_dtype=torch.bfloat16
# )
# pipeline = pipeline.to("cuda:0")


def generate_uuid_filename(extension=".jpeg"):
    filename = f"{uuid.uuid4()}{extension}"
    
    return filename

def image_generation(prompt, negative_prompt, width, height, save_path, num_inference_steps=28, guidance_scale=4.5, max_sequence_length=512):
    image = pipeline(
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=num_inference_steps,
        width=width,
        height=height,
        guidance_scale=guidance_scale,
        max_sequence_length=max_sequence_length,
    ).images[0]
    file_name = generate_uuid_filename()
    image.save(os.path.join(save_path, file_name))
    torch.cuda.empty_cache()
    return f"{file_name}保存完毕..."
    

def update_prompt(req_data):
    trans = {"natural":["cinematic photo ```%s``` , photograph, film, bokeh, professional, 4k, highly detailed",
                       "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly"],
            "vivid":["HDR photo of ``%s``` . High dynamic range, vivid, rich details, clear shadows and highlights, realistic, intense, enhanced contrast, highly detailed",
                    "flat, low contrast, oversaturated, underexposed, overexposed, blurred, noisy"]}
    style = "natural"
    try:
        if req_data.get('style') != None:
            if req_data.get('style') in trans.keys():
                style = req_data.get('style')
    except:
        pass
    import re
    try:
        req_data["promptEnglish"] = re.findall(r'\\"(.+)\\"',req_data["promptEnglish"])[0]
    except:
        pass
    prompt = trans[style][0]%req_data["promptEnglish"]
    negative_prompt = trans[style][1]
    if req_data["negativePromptEnglish"] not in [None ,'']:
        negative_prompt = req_data["negativePromptEnglish"]
        
    return prompt, negative_prompt

@app.route('/api/text_to_img', methods=['POST'])
def route():
    res = {"id": "",
           "object": "image",
           "created":int(time.time()),
           "data":[]}
    
    req_data = request.json
    app.logger.info(req_data)

    prompt, negative_prompt = update_prompt(req_data)
    app.logger.info(prompt+"|"+negative_prompt)

    width = int(req_data["size"].split("x")[0]) 
    height= int(req_data["size"].split("x")[1])               

    res["data"] = image_generation(prompt, negative_prompt, width, height, './')
        
    return jsonify(res)


if __name__ == '__main__':
    app.run(host='0.0.0.0',port=12571,threaded=True, debug=False)

Then I called this service concurrently and the following problems occurred:

  [2024-11-01 07:32:12,370] INFO in app: {'prompt': '', 'promptEnglish': 'A capybara holding a sign that reads Hello Fast World', 'negative_prompt': '', 'negativePromptEnglish': None, 'style': 'natural', 'size': '1024x1024'}
[2024-11-01 07:32:12,371] INFO in app: cinematic photo ```A capybara holding a sign that reads Hello Fast World``` , photograph, film, bokeh, professional, 4k, highly detailed|drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly
  4%|███▋                                                                                                  | 1/28 [00:01<00:28,  1.05s/it][2024-11-01 07:32:14,858] INFO in app: {'prompt': '', 'promptEnglish': 'A capybara holding a sign that reads Hello Fast World', 'negative_prompt': '', 'negativePromptEnglish': None, 'style': 'natural', 'size': '1024x1024'}
[2024-11-01 07:32:14,859] INFO in app: cinematic photo ```A capybara holding a sign that reads Hello Fast World``` , photograph, film, bokeh, professional, 4k, highly detailed|drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly
 64%|████████████████████████████████████████████████████████████████▉                                    | 18/28 [00:32<00:18,  1.82s/it]
[2024-11-01 07:32:46,144] ERROR in app: Exception on /api/text_to_img [POST]                              | 15/28 [00:28<00:24,  1.86s/it]
Traceback (most recent call last):
  File "/root/anaconda3/lib/python3.10/site-packages/flask/app.py", line 2525, in wsgi_app
    response = self.full_dispatch_request()
  File "/root/anaconda3/lib/python3.10/site-packages/flask/app.py", line 1822, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/root/anaconda3/lib/python3.10/site-packages/flask/app.py", line 1820, in full_dispatch_request
    rv = self.dispatch_request()
  File "/root/anaconda3/lib/python3.10/site-packages/flask/app.py", line 1796, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
  File "/data/dev/model-checkpoint/stable-diffusion-3.5-large-turbo/app.py", line 121, in route
    res["data"] = image_generation(prompt, negative_prompt, width, height, './')
  File "/data/dev/model-checkpoint/stable-diffusion-3.5-large-turbo/app.py", line 66, in image_generation
    image = pipeline(
  File "/root/anaconda3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
  File "/root/anaconda3/lib/python3.10/site-packages/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py", line 902, in __call__
    latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
  File "/root/anaconda3/lib/python3.10/site-packages/diffusers/schedulers/scheduling_flow_match_euler_discrete.py", line 295, in step
    sigma_next = self.sigmas[self.step_index + 1]
IndexError: index 29 is out of bounds for dimension 0 with size 29
127.0.0.1 - - [01/Nov/2024 07:32:46] "POST /api/text_to_img HTTP/1.1" 500 -
 54%|██████████████████████████████████████████████████████                                               | 15/28 [00:29<00:25,  1.95s/it]
[2024-11-01 07:32:46,968] ERROR in app: Exception on /api/text_to_img [POST]
Traceback (most recent call last):
  File "/root/anaconda3/lib/python3.10/site-packages/flask/app.py", line 2525, in wsgi_app
    response = self.full_dispatch_request()
  File "/root/anaconda3/lib/python3.10/site-packages/flask/app.py", line 1822, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/root/anaconda3/lib/python3.10/site-packages/flask/app.py", line 1820, in full_dispatch_request
    rv = self.dispatch_request()
  File "/root/anaconda3/lib/python3.10/site-packages/flask/app.py", line 1796, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
  File "/data/dev/model-checkpoint/stable-diffusion-3.5-large-turbo/app.py", line 121, in route
    res["data"] = image_generation(prompt, negative_prompt, width, height, './')
  File "/data/dev/model-checkpoint/stable-diffusion-3.5-large-turbo/app.py", line 66, in image_generation
    image = pipeline(
  File "/root/anaconda3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
  File "/root/anaconda3/lib/python3.10/site-packages/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py", line 902, in __call__
    latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
  File "/root/anaconda3/lib/python3.10/site-packages/diffusers/schedulers/scheduling_flow_match_euler_discrete.py", line 295, in step
    sigma_next = self.sigmas[self.step_index + 1]
IndexError: index 29 is out of bounds for dimension 0 with size 29
127.0.0.1 - - [01/Nov/2024 07:32:46] "POST /api/text_to_img HTTP/1.1" 500 -

Why is it OK when calling with a single thread, but not with multiple threads?

Reproduction

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import uuid
from diffusers import BitsAndBytesConfig, SD3Transformer2DModel, DDIMScheduler, DDPMParallelScheduler
from diffusers import StableDiffusion3Pipeline
import torch
from transformers import T5EncoderModel
import time 
from flask import request, jsonify
import logging
import sys
import flask

app = flask.Flask("sd_server")

handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)s in %(module)s: %(message)s"))
app.logger.handlers.clear()
app.logger.addHandler(handler)
app.logger.setLevel(logging.INFO)

# model pipeline
model_id = "../stable-diffusion-3.5-large"

nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
model_nf4 = SD3Transformer2DModel.from_pretrained(
    model_id,
    subfolder="transformer",
    quantization_config=nf4_config,
    torch_dtype=torch.bfloat16
)
model_nf4 = model_nf4.to("cuda:0")
pipeline = StableDiffusion3Pipeline.from_pretrained(
    model_id, 
    transformer=model_nf4,
    torch_dtype=torch.bfloat16
)
# pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
# pipeline.scheduler = DDPMParallelScheduler.from_config(pipeline.scheduler.config)
pipeline = pipeline.to("cuda:0")

# # diffusers/t5-nf4
# t5_nf4 = T5EncoderModel.from_pretrained("text_encoder_3", torch_dtype=torch.bfloat16)
# t5_nf4 = t5_nf4.to("cuda:0")

# pipeline = StableDiffusion3Pipeline.from_pretrained(
#     model_id, 
#     transformer=model_nf4,
#     text_encoder_3=t5_nf4,
#     torch_dtype=torch.bfloat16
# )
# pipeline = pipeline.to("cuda:0")


def generate_uuid_filename(extension=".jpeg"):
    # 使用UUID生成文件名
    filename = f"{uuid.uuid4()}{extension}"
    
    return filename

def image_generation(prompt, negative_prompt, width, height, save_path, num_inference_steps=28, guidance_scale=4.5, max_sequence_length=512):
    image = pipeline(
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=num_inference_steps,
        width=width,
        height=height,
        guidance_scale=guidance_scale,
        max_sequence_length=max_sequence_length,
    ).images[0]
    file_name = generate_uuid_filename()
    image.save(os.path.join(save_path, file_name))
    torch.cuda.empty_cache()
    return f"{file_name}保存完毕..."
    

def update_prompt(req_data):
    trans = {"natural":["cinematic photo ```%s``` , photograph, film, bokeh, professional, 4k, highly detailed",
                       "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly"],
            "vivid":["HDR photo of ``%s``` . High dynamic range, vivid, rich details, clear shadows and highlights, realistic, intense, enhanced contrast, highly detailed",
                    "flat, low contrast, oversaturated, underexposed, overexposed, blurred, noisy"]}
    style = "natural"
    try:
        if req_data.get('style') != None:
            if req_data.get('style') in trans.keys():
                style = req_data.get('style')
    except:
        pass
    import re
    try:
        req_data["promptEnglish"] = re.findall(r'\\"(.+)\\"',req_data["promptEnglish"])[0]
    except:
        pass
    prompt = trans[style][0]%req_data["promptEnglish"]
    negative_prompt = trans[style][1]
    if req_data["negativePromptEnglish"] not in [None ,'']:
        negative_prompt = req_data["negativePromptEnglish"]
        
    return prompt, negative_prompt

@app.route('/api/text_to_img', methods=['POST'])
def route():
    res = {"id": "",
           "object": "image",
           "created":int(time.time()),
           "data":[]}
    
    req_data = request.json
    app.logger.info(req_data)

    prompt, negative_prompt = update_prompt(req_data)
    app.logger.info(prompt+"|"+negative_prompt)

    width = int(req_data["size"].split("x")[0]) 
    height= int(req_data["size"].split("x")[1])               

    res["data"] = image_generation(prompt, negative_prompt, width, height, './')
        
    return jsonify(res)


if __name__ == '__main__':
    app.run(host='0.0.0.0',port=12571,threaded=True, debug=False)

Logs

No response

System Info

python app.py

Who can help?

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions