From bf286d479b9f73e006d30f295646d1bddf61a968 Mon Sep 17 00:00:00 2001 From: Lev Kurilenko Date: Fri, 13 Jan 2023 01:32:09 +0000 Subject: [PATCH 1/3] Add DeepSpeed Stable Diffusion Example --- inference/huggingface/README.md | 1 + .../huggingface/stable-diffusion/README.md | 24 +++++++++++++++ .../stable-diffusion/requirements.txt | 3 ++ .../stable-diffusion/test-stable-diffusion.py | 30 +++++++++++++++++++ 4 files changed, 58 insertions(+) create mode 100644 inference/huggingface/stable-diffusion/README.md create mode 100644 inference/huggingface/stable-diffusion/requirements.txt create mode 100644 inference/huggingface/stable-diffusion/test-stable-diffusion.py diff --git a/inference/huggingface/README.md b/inference/huggingface/README.md index 3e68d1647..f1dd12f55 100644 --- a/inference/huggingface/README.md +++ b/inference/huggingface/README.md @@ -32,6 +32,7 @@ The DeepSpeed huggingface inference examples are organized into their correspond | [`text-generation/run-generation-script`](./text-generation/run-generation-script/) | [`README`](./text-generation/run-generation-script/README.md) | [`requirements`](./text-generation/run-generation-script/requirements.txt) | | [`text2text-generation`](./text2text-generation/) | [`README`](./text2text-generation/README.md) | [`requirements`](./text2text-generation/requirements.txt) | | [`translation`](./translation/) | [`README`](./translation/README.md) | [`requirements`](./translation/requirements.txt) | +| [`stable-diffusion`](./stable-diffusion/) | [`README`](./stable-diffusion/README.md) | [`requirements`](./stable-diffusion/requirements.txt) | Most examples can be run as follows:
deepspeed --num_gpus [number of GPUs] test-[model].py
diff --git a/inference/huggingface/stable-diffusion/README.md b/inference/huggingface/stable-diffusion/README.md new file mode 100644 index 000000000..963d413b1 --- /dev/null +++ b/inference/huggingface/stable-diffusion/README.md @@ -0,0 +1,24 @@ + +# DeepSpeed Stable Diffusion Example + +# Setup +Python dependencies: +
+pip install -r requirements.txt
+
+ +# Usage +Examples can be run as follows: +
deepspeed --num_gpus [number of GPUs] test-[model].py
+ +# Example Output +Command: +
+deepspeed --num_gpus 1 test-stable-diffusion.py
+
+ +Output: +
+./baseline.png
+./deepspeed.png
+
diff --git a/inference/huggingface/stable-diffusion/requirements.txt b/inference/huggingface/stable-diffusion/requirements.txt new file mode 100644 index 000000000..ff891d908 --- /dev/null +++ b/inference/huggingface/stable-diffusion/requirements.txt @@ -0,0 +1,3 @@ +deepspeed +torch +diffusers diff --git a/inference/huggingface/stable-diffusion/test-stable-diffusion.py b/inference/huggingface/stable-diffusion/test-stable-diffusion.py new file mode 100644 index 000000000..8f1eb2e44 --- /dev/null +++ b/inference/huggingface/stable-diffusion/test-stable-diffusion.py @@ -0,0 +1,30 @@ +import deepspeed +import torch +import os + +from diffusers import DiffusionPipeline + +prompt = "a dog on a rocket" + +model = "prompthero/midjourney-v4-diffusion" +local_rank = int(os.getenv("LOCAL_RANK", "0")) +device = torch.device(f"cuda:{local_rank}") +world_size = int(os.getenv('WORLD_SIZE', '4')) + +pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=torch.half) +pipe = pipe.to(device) + +baseline_image = pipe(prompt, guidance_scale=7.5).images[0] +baseline_image.save(f"baseline.png") + +pipe = deepspeed.init_inference( + pipe, + mp_size=1, + dtype=torch.half, + replace_method="auto", + replace_with_kernel_inject=False, + enable_cuda_graph=False, +) + +deepspeed_image = pipe(prompt, guidance_scale=7.5).images[0] +deepspeed_image.save(f"deepspeed.png") From 9dcdce9659e904344d1b5e91f8733ab35477b648 Mon Sep 17 00:00:00 2001 From: Lev Kurilenko Date: Thu, 30 Mar 2023 20:07:42 +0000 Subject: [PATCH 2/3] enable kernel inject, enable cuda graph, set generator seed --- .../stable-diffusion/test-stable-diffusion.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/inference/huggingface/stable-diffusion/test-stable-diffusion.py b/inference/huggingface/stable-diffusion/test-stable-diffusion.py index 8f1eb2e44..37825822b 100644 --- a/inference/huggingface/stable-diffusion/test-stable-diffusion.py +++ b/inference/huggingface/stable-diffusion/test-stable-diffusion.py @@ -10,21 +10,25 @@ local_rank = int(os.getenv("LOCAL_RANK", "0")) device = torch.device(f"cuda:{local_rank}") world_size = int(os.getenv('WORLD_SIZE', '4')) +generator = torch.Generator(device=torch.cuda.current_device()) pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=torch.half) pipe = pipe.to(device) -baseline_image = pipe(prompt, guidance_scale=7.5).images[0] +generator.manual_seed(0xABEDABE7) +baseline_image = pipe(prompt, guidance_scale=7.5, generator=generator).images[0] baseline_image.save(f"baseline.png") +# NOTE: DeepSpeed inference supports local CUDA graphs for replaced SD modules pipe = deepspeed.init_inference( pipe, mp_size=1, dtype=torch.half, replace_method="auto", - replace_with_kernel_inject=False, - enable_cuda_graph=False, + replace_with_kernel_inject=True, + enable_cuda_graph=True, ) -deepspeed_image = pipe(prompt, guidance_scale=7.5).images[0] +generator.manual_seed(0xABEDABE7) +deepspeed_image = pipe(prompt, guidance_scale=7.5, generator=generator).images[0] deepspeed_image.save(f"deepspeed.png") From b05ebcd19dbd3d5b78ed5ae656c3f8e0ccd6174d Mon Sep 17 00:00:00 2001 From: Lev Kurilenko Date: Thu, 30 Mar 2023 20:12:53 +0000 Subject: [PATCH 3/3] Remove extra args --- inference/huggingface/stable-diffusion/test-stable-diffusion.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/inference/huggingface/stable-diffusion/test-stable-diffusion.py b/inference/huggingface/stable-diffusion/test-stable-diffusion.py index 37825822b..8959341fd 100644 --- a/inference/huggingface/stable-diffusion/test-stable-diffusion.py +++ b/inference/huggingface/stable-diffusion/test-stable-diffusion.py @@ -22,9 +22,7 @@ # NOTE: DeepSpeed inference supports local CUDA graphs for replaced SD modules pipe = deepspeed.init_inference( pipe, - mp_size=1, dtype=torch.half, - replace_method="auto", replace_with_kernel_inject=True, enable_cuda_graph=True, )