diff --git a/inference/huggingface/README.md b/inference/huggingface/README.md index 3e68d1647..f1dd12f55 100644 --- a/inference/huggingface/README.md +++ b/inference/huggingface/README.md @@ -32,6 +32,7 @@ The DeepSpeed huggingface inference examples are organized into their correspond | [`text-generation/run-generation-script`](./text-generation/run-generation-script/) | [`README`](./text-generation/run-generation-script/README.md) | [`requirements`](./text-generation/run-generation-script/requirements.txt) | | [`text2text-generation`](./text2text-generation/) | [`README`](./text2text-generation/README.md) | [`requirements`](./text2text-generation/requirements.txt) | | [`translation`](./translation/) | [`README`](./translation/README.md) | [`requirements`](./translation/requirements.txt) | +| [`stable-diffusion`](./stable-diffusion/) | [`README`](./stable-diffusion/README.md) | [`requirements`](./stable-diffusion/requirements.txt) | Most examples can be run as follows:
deepspeed --num_gpus [number of GPUs] test-[model].pydiff --git a/inference/huggingface/stable-diffusion/README.md b/inference/huggingface/stable-diffusion/README.md new file mode 100644 index 000000000..963d413b1 --- /dev/null +++ b/inference/huggingface/stable-diffusion/README.md @@ -0,0 +1,24 @@ + +# DeepSpeed Stable Diffusion Example + +# Setup +Python dependencies: +
+pip install -r requirements.txt ++ +# Usage +Examples can be run as follows: +
deepspeed --num_gpus [number of GPUs] test-[model].py+ +# Example Output +Command: +
+deepspeed --num_gpus 1 test-stable-diffusion.py ++ +Output: +
+./baseline.png +./deepspeed.png +diff --git a/inference/huggingface/stable-diffusion/requirements.txt b/inference/huggingface/stable-diffusion/requirements.txt new file mode 100644 index 000000000..ff891d908 --- /dev/null +++ b/inference/huggingface/stable-diffusion/requirements.txt @@ -0,0 +1,3 @@ +deepspeed +torch +diffusers diff --git a/inference/huggingface/stable-diffusion/test-stable-diffusion.py b/inference/huggingface/stable-diffusion/test-stable-diffusion.py new file mode 100644 index 000000000..8959341fd --- /dev/null +++ b/inference/huggingface/stable-diffusion/test-stable-diffusion.py @@ -0,0 +1,32 @@ +import deepspeed +import torch +import os + +from diffusers import DiffusionPipeline + +prompt = "a dog on a rocket" + +model = "prompthero/midjourney-v4-diffusion" +local_rank = int(os.getenv("LOCAL_RANK", "0")) +device = torch.device(f"cuda:{local_rank}") +world_size = int(os.getenv('WORLD_SIZE', '4')) +generator = torch.Generator(device=torch.cuda.current_device()) + +pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=torch.half) +pipe = pipe.to(device) + +generator.manual_seed(0xABEDABE7) +baseline_image = pipe(prompt, guidance_scale=7.5, generator=generator).images[0] +baseline_image.save(f"baseline.png") + +# NOTE: DeepSpeed inference supports local CUDA graphs for replaced SD modules +pipe = deepspeed.init_inference( + pipe, + dtype=torch.half, + replace_with_kernel_inject=True, + enable_cuda_graph=True, +) + +generator.manual_seed(0xABEDABE7) +deepspeed_image = pipe(prompt, guidance_scale=7.5, generator=generator).images[0] +deepspeed_image.save(f"deepspeed.png")