-
Notifications
You must be signed in to change notification settings - Fork 4.8k
Open
Description
Enviornment
- CUDA: 11.6
- Python: 3.8.10
- PyTorch: 1.13.1
- DeepSpeed: 0.8.1
- Transformers: 4.26.1
- Diffusers: 0.13.0
- GPU: T4
Code
"""
Simple script to benchmark Stable Diffusion optimized with DeepSpeed inference.
References:
(1) https://github.com/microsoft/DeepSpeed-MII/blob/main/mii/models/load_models.py
(2) https://github.com/huggingface/diffusers/pull/2303 (benchmarking function)
"""
import os
import deepspeed
import torch
import torch.utils.benchmark as benchmark
from diffusers import StableDiffusionPipeline
def benchmark_torch_function(f, *args, **kwargs):
t0 = benchmark.Timer(
stmt="f(*args, **kwargs)", globals={"args": args, "kwargs": kwargs, "f": f}
)
return round(t0.blocked_autorange(min_run_time=1).mean, 2)
# general parameters
world_size = int(os.getenv("WORLD_SIZE", "1"))
prompt = "A photo of an astronaut riding a horse on marse."
weight_dtype = torch.float16
num_inference_steps = 50
batch_size = 2
# deepspeed specific parameters
ds_kwargs = {
"mp_size": world_size,
"dtype": weight_dtype,
"replace_method": "auto",
"enable_cuda_graph": True,
"checkpoint": None,
# "config": None,
"training_mp_size": 1,
"replace_with_kernel_inject": False,
}
# load original pipeline
model_id = "CompVis/stable-diffusion-v1-4"
pipeline = StableDiffusionPipeline.from_pretrained(
model_id, torch_dtype=weight_dtype, safety_checker=None
).to("cuda")
pipeline.set_progress_bar_config(disable=True)
# benchmark
print("Running benchmark with vanilla pipeline...")
f = lambda: pipeline(
prompt, num_inference_steps=num_inference_steps, num_images_per_prompt=batch_size
).images
time_vanilla = benchmark_torch_function(f)
# optimize the pipeline with deepspeed
engine = deepspeed.init_inference(getattr(pipeline, "model", pipeline), **ds_kwargs)
if hasattr(pipeline, "model"):
pipeline.model = engine
# benchmark
print("Running benchmark with DeepSpeed pipeline...")
f = lambda: pipe(
prompt, num_inference_steps=num_inference_steps, num_images_per_prompt=batch_size
).images
time_ds_inference = benchmark_torch_function(f)
# log information
print(f"Vanilla Stable Diffusion: {time_vanilla} s")
print(f"DeepSpeed Stable Diffusion: {time_ds_inference} s")Error
[2023-02-20 06:28:39,725] [INFO] [logging.py:75:log_dist] [Rank -1] DeepSpeed info: version=0.8.1, git-hash=unknown, git-branch=unknown
[2023-02-20 06:28:39,726] [WARNING] [config_utils.py:74:_process_deprecated_field] Config parameter replace_method is deprecated. This parameter is no longer needed, please remove from your call to DeepSpeed-inference
[2023-02-20 06:28:39,726] [WARNING] [config_utils.py:74:_process_deprecated_field] Config parameter mp_size is deprecated use tensor_parallel.tp_size instead
[2023-02-20 06:28:39,727] [INFO] [logging.py:75:log_dist] [Rank -1] quantize_bits = 8 mlp_extra_grouping = False, quantize_groups = 1
Traceback (most recent call last):
File "benchmark_sd_ds_inference.py", line 58, in <module>
engine = deepspeed.init_inference(getattr(pipeline, "model", pipeline), **ds_kwargs)
File "/opt/conda/envs/deepspeed/lib/python3.8/site-packages/deepspeed/__init__.py", line 311, in init_inference
engine = InferenceEngine(model, config=ds_inference_config)
File "/opt/conda/envs/deepspeed/lib/python3.8/site-packages/deepspeed/inference/engine.py", line 137, in __init__
parser_dict = AutoTP.tp_parser(model)
File "/opt/conda/envs/deepspeed/lib/python3.8/site-packages/deepspeed/module_inject/auto_tp.py", line 68, in tp_parser
assert AutoTP.supported(model), "Automatic policy not supported for model. Please provide policy."
File "/opt/conda/envs/deepspeed/lib/python3.8/site-packages/deepspeed/module_inject/auto_tp.py", line 35, in supported
if key.group(1).lower() in unsupported:
AttributeError: 'NoneType' object has no attribute 'group'What am I missing out on?
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels