diff --git a/docs/source/ko/_toctree.yml b/docs/source/ko/_toctree.yml index a1c0c690eb94..2fec3af66525 100644 --- a/docs/source/ko/_toctree.yml +++ b/docs/source/ko/_toctree.yml @@ -3,191 +3,46 @@ title: "๐Ÿงจ Diffusers" - local: quicktour title: "ํ›‘์–ด๋ณด๊ธฐ" + - local: in_translation + title: Stable Diffusion - local: installation title: "์„ค์น˜" title: "์‹œ์ž‘ํ•˜๊ธฐ" + - sections: - sections: - local: in_translation - title: "Loading Pipelines, Models, and Schedulers" - - local: in_translation - title: "Using different Schedulers" - - local: in_translation - title: "Configuring Pipelines, Models, and Schedulers" - - local: in_translation - title: "Loading and Adding Custom Pipelines" - title: "๋ถˆ๋Ÿฌ์˜ค๊ธฐ & ํ—ˆ๋ธŒ (๋ฒˆ์—ญ ์˜ˆ์ •)" - - sections: - - local: in_translation - title: "Unconditional Image Generation" - - local: in_translation - title: "Text-to-Image Generation" - - local: in_translation - title: "Text-Guided Image-to-Image" + title: ๊ฐœ์š” - local: in_translation - title: "Text-Guided Image-Inpainting" + title: Unconditional ์ด๋ฏธ์ง€ ์ƒ์„ฑ - local: in_translation - title: "Text-Guided Depth-to-Image" + title: Textual Inversion + - local: training/dreambooth + title: DreamBooth + - local: training/text2image + title: Text-to-image + - local: training/lora + title: Low-Rank Adaptation of Large Language Models (LoRA) - local: in_translation - title: "Reusing seeds for deterministic generation" + title: ControlNet - local: in_translation - title: "Community Pipelines" - - local: in_translation - title: "How to contribute a Pipeline" - title: "์ถ”๋ก ์„ ์œ„ํ•œ ํŒŒ์ดํ”„๋ผ์ธ (๋ฒˆ์—ญ ์˜ˆ์ •)" - - sections: - - local: in_translation - title: "Reinforcement Learning" - - local: in_translation - title: "Audio" - - local: in_translation - title: "Other Modalities" - title: "Taking Diffusers Beyond Images" - title: "Diffusers ์‚ฌ์šฉ๋ฒ• (๋ฒˆ์—ญ ์˜ˆ์ •)" -- sections: - - local: in_translation - title: "Memory and Speed" - - local: in_translation - title: "xFormers" - - local: in_translation - title: "ONNX" - - local: in_translation - title: "OpenVINO" - - local: in_translation - title: "MPS" - - local: in_translation - title: "Habana Gaudi" - title: "์ตœ์ ํ™”/ํŠน์ˆ˜ ํ•˜๋“œ์›จ์–ด (๋ฒˆ์—ญ ์˜ˆ์ •)" -- sections: - - local: in_translation - title: "Overview" - - local: in_translation - title: "Unconditional Image Generation" - - local: in_translation - title: "Textual Inversion" - - local: in_translation - title: "Dreambooth" - - local: in_translation - title: "Text-to-image fine-tuning" - title: "ํ•™์Šต (๋ฒˆ์—ญ ์˜ˆ์ •)" + title: InstructPix2Pix ํ•™์Šต + title: ํ•™์Šต - sections: - local: in_translation - title: "Stable Diffusion" - - local: in_translation - title: "Philosophy" - - local: in_translation - title: "How to contribute?" - title: "๊ฐœ๋… ์„ค๋ช… (๋ฒˆ์—ญ ์˜ˆ์ •)" -- sections: - - sections: - - local: in_translation - title: "Models" - - local: in_translation - title: "Diffusion Pipeline" - - local: in_translation - title: "Logging" - - local: in_translation - title: "Configuration" - - local: in_translation - title: "Outputs" - title: "Main Classes" - - - sections: - - local: in_translation - title: "Overview" - - local: in_translation - title: "AltDiffusion" - - local: in_translation - title: "Cycle Diffusion" - - local: in_translation - title: "DDIM" - - local: in_translation - title: "DDPM" - - local: in_translation - title: "Latent Diffusion" - - local: in_translation - title: "Unconditional Latent Diffusion" - - local: in_translation - title: "PaintByExample" - - local: in_translation - title: "PNDM" - - local: in_translation - title: "Score SDE VE" - - sections: - - local: in_translation - title: "Overview" - - local: in_translation - title: "Text-to-Image" - - local: in_translation - title: "Image-to-Image" - - local: in_translation - title: "Inpaint" - - local: in_translation - title: "Depth-to-Image" - - local: in_translation - title: "Image-Variation" - - local: in_translation - title: "Super-Resolution" - title: "Stable Diffusion" - - local: in_translation - title: "Stable Diffusion 2" - - local: in_translation - title: "Safe Stable Diffusion" - - local: in_translation - title: "Stochastic Karras VE" - - local: in_translation - title: "Dance Diffusion" - - local: in_translation - title: "UnCLIP" - - local: in_translation - title: "Versatile Diffusion" - - local: in_translation - title: "VQ Diffusion" - - local: in_translation - title: "RePaint" - - local: in_translation - title: "Audio Diffusion" - title: "ํŒŒ์ดํ”„๋ผ์ธ (๋ฒˆ์—ญ ์˜ˆ์ •)" - - sections: - - local: in_translation - title: "Overview" - - local: in_translation - title: "DDIM" - - local: in_translation - title: "DDPM" - - local: in_translation - title: "Singlestep DPM-Solver" - - local: in_translation - title: "Multistep DPM-Solver" - - local: in_translation - title: "Heun Scheduler" - - local: in_translation - title: "DPM Discrete Scheduler" - - local: in_translation - title: "DPM Discrete Scheduler with ancestral sampling" - - local: in_translation - title: "Stochastic Kerras VE" - - local: in_translation - title: "Linear Multistep" - - local: in_translation - title: "PNDM" - - local: in_translation - title: "VE-SDE" - - local: in_translation - title: "IPNDM" - - local: in_translation - title: "VP-SDE" - - local: in_translation - title: "Euler scheduler" - - local: in_translation - title: "Euler Ancestral Scheduler" - - local: in_translation - title: "VQDiffusionScheduler" - - local: in_translation - title: "RePaint Scheduler" - title: "์Šค์ผ€์ค„๋Ÿฌ (๋ฒˆ์—ญ ์˜ˆ์ •)" - - sections: - - local: in_translation - title: "RL Planning" - title: "Experimental Features" - title: "API (๋ฒˆ์—ญ ์˜ˆ์ •)" + title: ๊ฐœ์š” + - local: optimization/fp16 + title: ๋ฉ”๋ชจ๋ฆฌ์™€ ์†๋„ + - local: in_translation + title: Torch2.0 ์ง€์› + - local: optimization/xformers + title: xFormers + - local: optimization/onnx + title: ONNX + - local: optimization/open_vino + title: OpenVINO + - local: optimization/mps + title: MPS + - local: optimization/habana + title: Habana Gaudi + title: ์ตœ์ ํ™”/ํŠน์ˆ˜ ํ•˜๋“œ์›จ์–ด \ No newline at end of file diff --git a/docs/source/ko/optimization/fp16.mdx b/docs/source/ko/optimization/fp16.mdx new file mode 100644 index 000000000000..593860581be3 --- /dev/null +++ b/docs/source/ko/optimization/fp16.mdx @@ -0,0 +1,410 @@ + + +# ๋ฉ”๋ชจ๋ฆฌ์™€ ์†๋„ + +๋ฉ”๋ชจ๋ฆฌ ๋˜๋Š” ์†๋„์— ๋Œ€ํ•ด ๐Ÿค— Diffusers *์ถ”๋ก *์„ ์ตœ์ ํ™”ํ•˜๊ธฐ ์œ„ํ•œ ๋ช‡ ๊ฐ€์ง€ ๊ธฐ์ˆ ๊ณผ ์•„์ด๋””์–ด๋ฅผ ์ œ์‹œํ•ฉ๋‹ˆ๋‹ค. +์ผ๋ฐ˜์ ์œผ๋กœ, memory-efficient attention์„ ์œ„ํ•ด [xFormers](https://github.com/facebookresearch/xformers) ์‚ฌ์šฉ์„ ์ถ”์ฒœํ•˜๊ธฐ ๋•Œ๋ฌธ์—, ์ถ”์ฒœํ•˜๋Š” [์„ค์น˜ ๋ฐฉ๋ฒ•](xformers)์„ ๋ณด๊ณ  ์„ค์น˜ํ•ด ๋ณด์„ธ์š”. + +๋‹ค์Œ ์„ค์ •์ด ์„ฑ๋Šฅ๊ณผ ๋ฉ”๋ชจ๋ฆฌ์— ๋ฏธ์น˜๋Š” ์˜ํ–ฅ์— ๋Œ€ํ•ด ์„ค๋ช…ํ•ฉ๋‹ˆ๋‹ค. + +| | ์ง€์—ฐ์‹œ๊ฐ„ | ์†๋„ ํ–ฅ์ƒ | +| ---------------- | ------- | ------- | +| ๋ณ„๋„ ์„ค์ • ์—†์Œ | 9.50s | x1 | +| cuDNN auto-tuner | 9.37s | x1.01 | +| fp16 | 3.61s | x2.63 | +| Channels Last ๋ฉ”๋ชจ๋ฆฌ ํ˜•์‹ | 3.30s | x2.88 | +| traced UNet | 3.21s | x2.96 | +| memory-efficient attention | 2.63s | x3.61 | + + + NVIDIA TITAN RTX์—์„œ 50 DDIM ์Šคํ…์˜ "a photo of an astronaut riding a horse on mars" ํ”„๋กฌํ”„ํŠธ๋กœ 512x512 ํฌ๊ธฐ์˜ ๋‹จ์ผ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑํ•˜์˜€์Šต๋‹ˆ๋‹ค. + + +## cuDNN auto-tuner ํ™œ์„ฑํ™”ํ•˜๊ธฐ + +[NVIDIA cuDNN](https://developer.nvidia.com/cudnn)์€ ์ปจ๋ณผ๋ฃจ์…˜์„ ๊ณ„์‚ฐํ•˜๋Š” ๋งŽ์€ ์•Œ๊ณ ๋ฆฌ์ฆ˜์„ ์ง€์›ํ•ฉ๋‹ˆ๋‹ค. Autotuner๋Š” ์งง์€ ๋ฒค์น˜๋งˆํฌ๋ฅผ ์‹คํ–‰ํ•˜๊ณ  ์ฃผ์–ด์ง„ ์ž…๋ ฅ ํฌ๊ธฐ์— ๋Œ€ํ•ด ์ฃผ์–ด์ง„ ํ•˜๋“œ์›จ์–ด์—์„œ ์ตœ๊ณ ์˜ ์„ฑ๋Šฅ์„ ๊ฐ€์ง„ ์ปค๋„์„ ์„ ํƒํ•ฉ๋‹ˆ๋‹ค. + +**์ปจ๋ณผ๋ฃจ์…˜ ๋„คํŠธ์›Œํฌ**๋ฅผ ํ™œ์šฉํ•˜๊ณ  ์žˆ๊ธฐ ๋•Œ๋ฌธ์— (๋‹ค๋ฅธ ์œ ํ˜•๋“ค์€ ํ˜„์žฌ ์ง€์›๋˜์ง€ ์•Š์Œ), ๋‹ค์Œ ์„ค์ •์„ ํ†ตํ•ด ์ถ”๋ก  ์ „์— cuDNN autotuner๋ฅผ ํ™œ์„ฑํ™”ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค: + +```python +import torch + +torch.backends.cudnn.benchmark = True +``` + +### fp32 ๋Œ€์‹  tf32 ์‚ฌ์šฉํ•˜๊ธฐ (Ampere ๋ฐ ์ดํ›„ CUDA ์žฅ์น˜๋“ค์—์„œ) + +Ampere ๋ฐ ์ดํ›„ CUDA ์žฅ์น˜์—์„œ ํ–‰๋ ฌ๊ณฑ ๋ฐ ์ปจ๋ณผ๋ฃจ์…˜์€ TensorFloat32(TF32) ๋ชจ๋“œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋” ๋น ๋ฅด์ง€๋งŒ ์•ฝ๊ฐ„ ๋œ ์ •ํ™•ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. +๊ธฐ๋ณธ์ ์œผ๋กœ PyTorch๋Š” ์ปจ๋ณผ๋ฃจ์…˜์— ๋Œ€ํ•ด TF32 ๋ชจ๋“œ๋ฅผ ํ™œ์„ฑํ™”ํ•˜์ง€๋งŒ ํ–‰๋ ฌ ๊ณฑ์…ˆ์€ ํ™œ์„ฑํ™”ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. +๋„คํŠธ์›Œํฌ์— ์™„์ „ํ•œ float32 ์ •๋ฐ€๋„๊ฐ€ ํ•„์š”ํ•œ ๊ฒฝ์šฐ๊ฐ€ ์•„๋‹ˆ๋ฉด ํ–‰๋ ฌ ๊ณฑ์…ˆ์— ๋Œ€ํ•ด์„œ๋„ ์ด ์„ค์ •์„ ํ™œ์„ฑํ™”ํ•˜๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. +์ด๋Š” ์ผ๋ฐ˜์ ์œผ๋กœ ๋ฌด์‹œํ•  ์ˆ˜ ์žˆ๋Š” ์ˆ˜์น˜์˜ ์ •ํ™•๋„ ์†์‹ค์ด ์žˆ์ง€๋งŒ, ๊ณ„์‚ฐ ์†๋„๋ฅผ ํฌ๊ฒŒ ๋†’์ผ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. +๊ทธ๊ฒƒ์— ๋Œ€ํ•ด [์—ฌ๊ธฐ](https://huggingface.co/docs/transformers/v4.18.0/en/performance#tf32)์„œ ๋” ์ฝ์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. +์ถ”๋ก ํ•˜๊ธฐ ์ „์— ๋‹ค์Œ์„ ์ถ”๊ฐ€ํ•˜๊ธฐ๋งŒ ํ•˜๋ฉด ๋ฉ๋‹ˆ๋‹ค: + +```python +import torch + +torch.backends.cuda.matmul.allow_tf32 = True +``` + +## ๋ฐ˜์ •๋ฐ€๋„ ๊ฐ€์ค‘์น˜ + +๋” ๋งŽ์€ GPU ๋ฉ”๋ชจ๋ฆฌ๋ฅผ ์ ˆ์•ฝํ•˜๊ณ  ๋” ๋น ๋ฅธ ์†๋„๋ฅผ ์–ป๊ธฐ ์œ„ํ•ด ๋ชจ๋ธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฐ˜์ •๋ฐ€๋„(half precision)๋กœ ์ง์ ‘ ๋กœ๋“œํ•˜๊ณ  ์‹คํ–‰ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. +์—ฌ๊ธฐ์—๋Š” `fp16`์ด๋ผ๋Š” ๋ธŒ๋žœ์น˜์— ์ €์žฅ๋œ float16 ๋ฒ„์ „์˜ ๊ฐ€์ค‘์น˜๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๊ณ , ๊ทธ ๋•Œ `float16` ์œ ํ˜•์„ ์‚ฌ์šฉํ•˜๋„๋ก PyTorch์— ์ง€์‹œํ•˜๋Š” ์ž‘์—…์ด ํฌํ•จ๋ฉ๋‹ˆ๋‹ค. + +```Python +pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + + torch_dtype=torch.float16, +) +pipe = pipe.to("cuda") + +prompt = "a photo of an astronaut riding a horse on mars" +image = pipe(prompt).images[0] +``` + + + ์–ด๋–ค ํŒŒ์ดํ”„๋ผ์ธ์—์„œ๋„ [`torch.autocast`](https://pytorch.org/docs/stable/amp.html#torch.autocast) ๋ฅผ ์‚ฌ์šฉํ•˜๋Š” ๊ฒƒ์€ ๊ฒ€์€์ƒ‰ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑํ•  ์ˆ˜ ์žˆ๊ณ , ์ˆœ์ˆ˜ํ•œ float16 ์ •๋ฐ€๋„๋ฅผ ์‚ฌ์šฉํ•˜๋Š” ๊ฒƒ๋ณด๋‹ค ํ•ญ์ƒ ๋Š๋ฆฌ๊ธฐ ๋•Œ๋ฌธ์— ์‚ฌ์šฉํ•˜์ง€ ์•Š๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. + + +## ์ถ”๊ฐ€ ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ์„ ์œ„ํ•œ ์Šฌ๋ผ์ด์Šค ์–ดํ…์…˜ + +์ถ”๊ฐ€ ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ์„ ์œ„ํ•ด, ํ•œ ๋ฒˆ์— ๋ชจ๋‘ ๊ณ„์‚ฐํ•˜๋Š” ๋Œ€์‹  ๋‹จ๊ณ„์ ์œผ๋กœ ๊ณ„์‚ฐ์„ ์ˆ˜ํ–‰ํ•˜๋Š” ์Šฌ๋ผ์ด์Šค ๋ฒ„์ „์˜ ์–ดํ…์…˜(attention)์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + + + Attention slicing์€ ๋ชจ๋ธ์ด ํ•˜๋‚˜ ์ด์ƒ์˜ ์–ดํ…์…˜ ํ—ค๋“œ๋ฅผ ์‚ฌ์šฉํ•˜๋Š” ํ•œ, ๋ฐฐ์น˜ ํฌ๊ธฐ๊ฐ€ 1์ธ ๊ฒฝ์šฐ์—๋„ ์œ ์šฉํ•ฉ๋‹ˆ๋‹ค. + ํ•˜๋‚˜ ์ด์ƒ์˜ ์–ดํ…์…˜ ํ—ค๋“œ๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ *QK^T* ์–ดํ…์…˜ ๋งคํŠธ๋ฆญ์Šค๋Š” ์ƒ๋‹นํ•œ ์–‘์˜ ๋ฉ”๋ชจ๋ฆฌ๋ฅผ ์ ˆ์•ฝํ•  ์ˆ˜ ์žˆ๋Š” ๊ฐ ํ—ค๋“œ์— ๋Œ€ํ•ด ์ˆœ์ฐจ์ ์œผ๋กœ ๊ณ„์‚ฐ๋  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + + +๊ฐ ํ—ค๋“œ์— ๋Œ€ํ•ด ์ˆœ์ฐจ์ ์œผ๋กœ ์–ดํ…์…˜ ๊ณ„์‚ฐ์„ ์ˆ˜ํ–‰ํ•˜๋ ค๋ฉด, ๋‹ค์Œ๊ณผ ๊ฐ™์ด ์ถ”๋ก  ์ „์— ํŒŒ์ดํ”„๋ผ์ธ์—์„œ [`~StableDiffusionPipeline.enable_attention_slicing`]๋ฅผ ํ˜ธ์ถœํ•˜๋ฉด ๋ฉ๋‹ˆ๋‹ค: + +```Python +import torch +from diffusers import StableDiffusionPipeline + +pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + + torch_dtype=torch.float16, +) +pipe = pipe.to("cuda") + +prompt = "a photo of an astronaut riding a horse on mars" +pipe.enable_attention_slicing() +image = pipe(prompt).images[0] +``` + +์ถ”๋ก  ์‹œ๊ฐ„์ด ์•ฝ 10% ๋Š๋ ค์ง€๋Š” ์•ฝ๊ฐ„์˜ ์„ฑ๋Šฅ ์ €ํ•˜๊ฐ€ ์žˆ์ง€๋งŒ ์ด ๋ฐฉ๋ฒ•์„ ์‚ฌ์šฉํ•˜๋ฉด 3.2GB ์ •๋„์˜ ์ž‘์€ VRAM์œผ๋กœ๋„ Stable Diffusion์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค! + + +## ๋” ํฐ ๋ฐฐ์น˜๋ฅผ ์œ„ํ•œ sliced VAE ๋””์ฝ”๋“œ + +์ œํ•œ๋œ VRAM์—์„œ ๋Œ€๊ทœ๋ชจ ์ด๋ฏธ์ง€ ๋ฐฐ์น˜๋ฅผ ๋””์ฝ”๋”ฉํ•˜๊ฑฐ๋‚˜ 32๊ฐœ ์ด์ƒ์˜ ์ด๋ฏธ์ง€๊ฐ€ ํฌํ•จ๋œ ๋ฐฐ์น˜๋ฅผ ํ™œ์„ฑํ™”ํ•˜๊ธฐ ์œ„ํ•ด, ๋ฐฐ์น˜์˜ latent ์ด๋ฏธ์ง€๋ฅผ ํ•œ ๋ฒˆ์— ํ•˜๋‚˜์”ฉ ๋””์ฝ”๋”ฉํ•˜๋Š” ์Šฌ๋ผ์ด์Šค VAE ๋””์ฝ”๋“œ๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +์ด๋ฅผ [`~StableDiffusionPipeline.enable_attention_slicing`] ๋˜๋Š” [`~StableDiffusionPipeline.enable_xformers_memory_efficient_attention`]๊ณผ ๊ฒฐํ•ฉํ•˜์—ฌ ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ์„ ์ถ”๊ฐ€๋กœ ์ตœ์†Œํ™”ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +VAE ๋””์ฝ”๋“œ๋ฅผ ํ•œ ๋ฒˆ์— ํ•˜๋‚˜์”ฉ ์ˆ˜ํ–‰ํ•˜๋ ค๋ฉด ์ถ”๋ก  ์ „์— ํŒŒ์ดํ”„๋ผ์ธ์—์„œ [`~StableDiffusionPipeline.enable_vae_slicing`]์„ ํ˜ธ์ถœํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ๋ฅผ ๋“ค์–ด: + +```Python +import torch +from diffusers import StableDiffusionPipeline + +pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + + torch_dtype=torch.float16, +) +pipe = pipe.to("cuda") + +prompt = "a photo of an astronaut riding a horse on mars" +pipe.enable_vae_slicing() +images = pipe([prompt] * 32).images +``` + +๋‹ค์ค‘ ์ด๋ฏธ์ง€ ๋ฐฐ์น˜์—์„œ VAE ๋””์ฝ”๋“œ๊ฐ€ ์•ฝ๊ฐ„์˜ ์„ฑ๋Šฅ ํ–ฅ์ƒ์ด ์ด๋ฃจ์–ด์ง‘๋‹ˆ๋‹ค. ๋‹จ์ผ ์ด๋ฏธ์ง€ ๋ฐฐ์น˜์—์„œ๋Š” ์„ฑ๋Šฅ ์˜ํ–ฅ์€ ์—†์Šต๋‹ˆ๋‹ค. + + + +## ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ์„ ์œ„ํ•ด ๊ฐ€์† ๊ธฐ๋Šฅ์„ ์‚ฌ์šฉํ•˜์—ฌ CPU๋กœ ์˜คํ”„๋กœ๋”ฉ + +์ถ”๊ฐ€ ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ์„ ์œ„ํ•ด ๊ฐ€์ค‘์น˜๋ฅผ CPU๋กœ ์˜คํ”„๋กœ๋“œํ•˜๊ณ  ์ˆœ๋ฐฉํ–ฅ ์ „๋‹ฌ์„ ์ˆ˜ํ–‰ํ•  ๋•Œ๋งŒ GPU๋กœ ๋กœ๋“œํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +CPU ์˜คํ”„๋กœ๋”ฉ์„ ์ˆ˜ํ–‰ํ•˜๋ ค๋ฉด [`~StableDiffusionPipeline.enable_sequential_cpu_offload`]๋ฅผ ํ˜ธ์ถœํ•˜๊ธฐ๋งŒ ํ•˜๋ฉด ๋ฉ๋‹ˆ๋‹ค: + +```Python +import torch +from diffusers import StableDiffusionPipeline + +pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + + torch_dtype=torch.float16, +) + +prompt = "a photo of an astronaut riding a horse on mars" +pipe.enable_sequential_cpu_offload() +image = pipe(prompt).images[0] +``` + +๊ทธ๋Ÿฌ๋ฉด ๋ฉ”๋ชจ๋ฆฌ ์†Œ๋น„๋ฅผ 3GB ๋ฏธ๋งŒ์œผ๋กœ ์ค„์ผ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +์ฐธ๊ณ ๋กœ ์ด ๋ฐฉ๋ฒ•์€ ์ „์ฒด ๋ชจ๋ธ์ด ์•„๋‹Œ ์„œ๋ธŒ๋ชจ๋“ˆ ์ˆ˜์ค€์—์„œ ์ž‘๋™ํ•ฉ๋‹ˆ๋‹ค. ์ด๋Š” ๋ฉ”๋ชจ๋ฆฌ ์†Œ๋น„๋ฅผ ์ตœ์†Œํ™”ํ•˜๋Š” ๊ฐ€์žฅ ์ข‹์€ ๋ฐฉ๋ฒ•์ด์ง€๋งŒ ํ”„๋กœ์„ธ์Šค์˜ ๋ฐ˜๋ณต์  ํŠน์„ฑ์œผ๋กœ ์ธํ•ด ์ถ”๋ก  ์†๋„๊ฐ€ ํ›จ์”ฌ ๋Š๋ฆฝ๋‹ˆ๋‹ค. ํŒŒ์ดํ”„๋ผ์ธ์˜ UNet ๊ตฌ์„ฑ ์š”์†Œ๋Š” ์—ฌ๋Ÿฌ ๋ฒˆ ์‹คํ–‰๋ฉ๋‹ˆ๋‹ค('num_inference_steps' ๋งŒํผ). ๋งค๋ฒˆ UNet์˜ ์„œ๋กœ ๋‹ค๋ฅธ ์„œ๋ธŒ๋ชจ๋“ˆ์ด ์ˆœ์ฐจ์ ์œผ๋กœ ์˜จ๋กœ๋“œ๋œ ๋‹ค์Œ ํ•„์š”์— ๋”ฐ๋ผ ์˜คํ”„๋กœ๋“œ๋˜๋ฏ€๋กœ ๋ฉ”๋ชจ๋ฆฌ ์ด๋™ ํšŸ์ˆ˜๊ฐ€ ๋งŽ์Šต๋‹ˆ๋‹ค. + + +๋˜ ๋‹ค๋ฅธ ์ตœ์ ํ™” ๋ฐฉ๋ฒ•์ธ ๋ชจ๋ธ ์˜คํ”„๋กœ๋”ฉ์„ ์‚ฌ์šฉํ•˜๋Š” ๊ฒƒ์„ ๊ณ ๋ คํ•˜์‹ญ์‹œ์˜ค. ์ด๋Š” ํ›จ์”ฌ ๋น ๋ฅด์ง€๋งŒ ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ์ด ํฌ์ง€๋Š” ์•Š์Šต๋‹ˆ๋‹ค. + + +๋˜ํ•œ ttention slicing๊ณผ ์—ฐ๊ฒฐํ•ด์„œ ์ตœ์†Œ ๋ฉ”๋ชจ๋ฆฌ(< 2GB)๋กœ๋„ ๋™์ž‘ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + + +```Python +import torch +from diffusers import StableDiffusionPipeline + +pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + + torch_dtype=torch.float16, +) + +prompt = "a photo of an astronaut riding a horse on mars" +pipe.enable_sequential_cpu_offload() +pipe.enable_attention_slicing(1) + +image = pipe(prompt).images[0] +``` + +**์ฐธ๊ณ **: 'enable_sequential_cpu_offload()'๋ฅผ ์‚ฌ์šฉํ•  ๋•Œ, ๋ฏธ๋ฆฌ ํŒŒ์ดํ”„๋ผ์ธ์„ CUDA๋กœ ์ด๋™ํ•˜์ง€ **์•Š๋Š”** ๊ฒƒ์ด ์ค‘์š”ํ•ฉ๋‹ˆ๋‹ค.๊ทธ๋ ‡์ง€ ์•Š์œผ๋ฉด ๋ฉ”๋ชจ๋ฆฌ ์†Œ๋น„์˜ ์ด๋“์ด ์ตœ์†Œํ™”๋ฉ๋‹ˆ๋‹ค. ๋” ๋งŽ์€ ์ •๋ณด๋ฅผ ์œ„ํ•ด [์ด ์ด์Šˆ](https://github.com/huggingface/diffusers/issues/1934)๋ฅผ ๋ณด์„ธ์š”. + + +## ๋น ๋ฅธ ์ถ”๋ก ๊ณผ ๋ฉ”๋ชจ๋ฆฌ ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ์„ ์œ„ํ•œ ๋ชจ๋ธ ์˜คํ”„๋กœ๋”ฉ + +[์ˆœ์ฐจ์  CPU ์˜คํ”„๋กœ๋”ฉ](#sequential_offloading)์€ ์ด์ „ ์„น์…˜์—์„œ ์„ค๋ช…ํ•œ ๊ฒƒ์ฒ˜๋Ÿผ ๋งŽ์€ ๋ฉ”๋ชจ๋ฆฌ๋ฅผ ๋ณด์กดํ•˜์ง€๋งŒ ํ•„์š”์— ๋”ฐ๋ผ ์„œ๋ธŒ๋ชจ๋“ˆ์„ GPU๋กœ ์ด๋™ํ•˜๊ณ  ์ƒˆ ๋ชจ๋“ˆ์ด ์‹คํ–‰๋  ๋•Œ ์ฆ‰์‹œ CPU๋กœ ๋ฐ˜ํ™˜๋˜๊ธฐ ๋•Œ๋ฌธ์— ์ถ”๋ก  ์†๋„๊ฐ€ ๋Š๋ ค์ง‘๋‹ˆ๋‹ค. + +์ „์ฒด ๋ชจ๋ธ ์˜คํ”„๋กœ๋”ฉ์€ ๊ฐ ๋ชจ๋ธ์˜ ๊ตฌ์„ฑ ์š”์†Œ์ธ _modules_์„ ์ฒ˜๋ฆฌํ•˜๋Š” ๋Œ€์‹ , ์ „์ฒด ๋ชจ๋ธ์„ GPU๋กœ ์ด๋™ํ•˜๋Š” ๋Œ€์•ˆ์ž…๋‹ˆ๋‹ค. ์ด๋กœ ์ธํ•ด ์ถ”๋ก  ์‹œ๊ฐ„์— ๋ฏธ์น˜๋Š” ์˜ํ–ฅ์€ ๋ฏธ๋ฏธํ•˜์ง€๋งŒ(ํŒŒ์ดํ”„๋ผ์ธ์„ 'cuda'๋กœ ์ด๋™ํ•˜๋Š” ๊ฒƒ๊ณผ ๋น„๊ตํ•˜์—ฌ) ์—ฌ์ „ํžˆ ์•ฝ๊ฐ„์˜ ๋ฉ”๋ชจ๋ฆฌ๋ฅผ ์ ˆ์•ฝํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +์ด ์‹œ๋‚˜๋ฆฌ์˜ค์—์„œ๋Š” ํŒŒ์ดํ”„๋ผ์ธ์˜ ์ฃผ์š” ๊ตฌ์„ฑ ์š”์†Œ ์ค‘ ํ•˜๋‚˜๋งŒ(์ผ๋ฐ˜์ ์œผ๋กœ ํ…์ŠคํŠธ ์ธ์ฝ”๋”, unet ๋ฐ vae) GPU์— ์žˆ๊ณ , ๋‚˜๋จธ์ง€๋Š” CPU์—์„œ ๋Œ€๊ธฐํ•  ๊ฒƒ์ž…๋‹ˆ๋‹ค. +์—ฌ๋Ÿฌ ๋ฐ˜๋ณต์„ ์œ„ํ•ด ์‹คํ–‰๋˜๋Š” UNet๊ณผ ๊ฐ™์€ ๊ตฌ์„ฑ ์š”์†Œ๋Š” ๋” ์ด์ƒ ํ•„์š”ํ•˜์ง€ ์•Š์„ ๋•Œ๊นŒ์ง€ GPU์— ๋‚จ์•„ ์žˆ์Šต๋‹ˆ๋‹ค. + +์ด ๊ธฐ๋Šฅ์€ ์•„๋ž˜์™€ ๊ฐ™์ด ํŒŒ์ดํ”„๋ผ์ธ์—์„œ `enable_model_cpu_offload()`๋ฅผ ํ˜ธ์ถœํ•˜์—ฌ ํ™œ์„ฑํ™”ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +```Python +import torch +from diffusers import StableDiffusionPipeline + +pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + torch_dtype=torch.float16, +) + +prompt = "a photo of an astronaut riding a horse on mars" +pipe.enable_model_cpu_offload() +image = pipe(prompt).images[0] +``` + +์ด๋Š” ์ถ”๊ฐ€์ ์ธ ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ์„ ์œ„ํ•œ attention slicing๊ณผ๋„ ํ˜ธํ™˜๋ฉ๋‹ˆ๋‹ค. + +```Python +import torch +from diffusers import StableDiffusionPipeline + +pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + torch_dtype=torch.float16, +) + +prompt = "a photo of an astronaut riding a horse on mars" +pipe.enable_model_cpu_offload() +pipe.enable_attention_slicing(1) + +image = pipe(prompt).images[0] +``` + + +์ด ๊ธฐ๋Šฅ์„ ์‚ฌ์šฉํ•˜๋ ค๋ฉด 'accelerate' ๋ฒ„์ „ 0.17.0 ์ด์ƒ์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. + + +## Channels Last ๋ฉ”๋ชจ๋ฆฌ ํ˜•์‹ ์‚ฌ์šฉํ•˜๊ธฐ + +Channels Last ๋ฉ”๋ชจ๋ฆฌ ํ˜•์‹์€ ์ฐจ์› ์ˆœ์„œ๋ฅผ ๋ณด์กดํ•˜๋Š” ๋ฉ”๋ชจ๋ฆฌ์—์„œ NCHW ํ…์„œ ๋ฐฐ์—ด์„ ๋Œ€์ฒดํ•˜๋Š” ๋ฐฉ๋ฒ•์ž…๋‹ˆ๋‹ค. +Channels Last ํ…์„œ๋Š” ์ฑ„๋„์ด ๊ฐ€์žฅ ์กฐ๋ฐ€ํ•œ ์ฐจ์›์ด ๋˜๋Š” ๋ฐฉ์‹์œผ๋กœ ์ •๋ ฌ๋ฉ๋‹ˆ๋‹ค(์ผ๋ช… ํ”ฝ์…€๋‹น ์ด๋ฏธ์ง€๋ฅผ ์ €์žฅ). +ํ˜„์žฌ ๋ชจ๋“  ์—ฐ์‚ฐ์ž Channels Last ํ˜•์‹์„ ์ง€์›ํ•˜๋Š” ๊ฒƒ์€ ์•„๋‹ˆ๋ผ ์„ฑ๋Šฅ์ด ์ €ํ•˜๋  ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ, ์‚ฌ์šฉํ•ด๋ณด๊ณ  ๋ชจ๋ธ์— ์ž˜ ์ž‘๋™ํ•˜๋Š”์ง€ ํ™•์ธํ•˜๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. + + +์˜ˆ๋ฅผ ๋“ค์–ด ํŒŒ์ดํ”„๋ผ์ธ์˜ UNet ๋ชจ๋ธ์ด channels Last ํ˜•์‹์„ ์‚ฌ์šฉํ•˜๋„๋ก ์„ค์ •ํ•˜๋ ค๋ฉด ๋‹ค์Œ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค: + +```python +print(pipe.unet.conv_out.state_dict()["weight"].stride()) # (2880, 9, 3, 1) +pipe.unet.to(memory_format=torch.channels_last) # in-place ์—ฐ์‚ฐ +# 2๋ฒˆ์งธ ์ฐจ์›์—์„œ ์ŠคํŠธ๋ผ์ด๋“œ 1์„ ๊ฐ€์ง€๋Š” (2880, 1, 960, 320)๋กœ, ์—ฐ์‚ฐ์ด ์ž‘๋™ํ•จ์„ ์ฆ๋ช…ํ•ฉ๋‹ˆ๋‹ค. +print(pipe.unet.conv_out.state_dict()["weight"].stride()) +``` + +## ์ถ”์ (tracing) + +์ถ”์ ์€ ๋ชจ๋ธ์„ ํ†ตํ•ด ์˜ˆ์ œ ์ž…๋ ฅ ํ…์„œ๋ฅผ ํ†ตํ•ด ์‹คํ–‰๋˜๋Š”๋ฐ, ํ•ด๋‹น ์ž…๋ ฅ์ด ๋ชจ๋ธ์˜ ๋ ˆ์ด์–ด๋ฅผ ํ†ต๊ณผํ•  ๋•Œ ํ˜ธ์ถœ๋˜๋Š” ์ž‘์—…์„ ์บก์ฒ˜ํ•˜์—ฌ ์‹คํ–‰ ํŒŒ์ผ ๋˜๋Š” 'ScriptFunction'์ด ๋ฐ˜ํ™˜๋˜๋„๋ก ํ•˜๊ณ , ์ด๋Š” just-in-time ์ปดํŒŒ์ผ๋กœ ์ตœ์ ํ™”๋ฉ๋‹ˆ๋‹ค. + +UNet ๋ชจ๋ธ์„ ์ถ”์ ํ•˜๊ธฐ ์œ„ํ•ด ๋‹ค์Œ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค: + +```python +import time +import torch +from diffusers import StableDiffusionPipeline +import functools + +# torch ๊ธฐ์šธ๊ธฐ ๋น„ํ™œ์„ฑํ™” +torch.set_grad_enabled(False) + +# ๋ณ€์ˆ˜ ์„ค์ • +n_experiments = 2 +unet_runs_per_experiment = 50 + + +# ์ž…๋ ฅ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ +def generate_inputs(): + sample = torch.randn(2, 4, 64, 64).half().cuda() + timestep = torch.rand(1).half().cuda() * 999 + encoder_hidden_states = torch.randn(2, 77, 768).half().cuda() + return sample, timestep, encoder_hidden_states + + +pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + torch_dtype=torch.float16, +).to("cuda") +unet = pipe.unet +unet.eval() +unet.to(memory_format=torch.channels_last) # Channels Last ๋ฉ”๋ชจ๋ฆฌ ํ˜•์‹ ์‚ฌ์šฉ +unet.forward = functools.partial(unet.forward, return_dict=False) # return_dict=False์„ ๊ธฐ๋ณธ๊ฐ’์œผ๋กœ ์„ค์ • + +# ์›Œ๋ฐ์—… +for _ in range(3): + with torch.inference_mode(): + inputs = generate_inputs() + orig_output = unet(*inputs) + +# ์ถ”์  +print("tracing..") +unet_traced = torch.jit.trace(unet, inputs) +unet_traced.eval() +print("done tracing") + + +# ์›Œ๋ฐ์—… ๋ฐ ๊ทธ๋ž˜ํ”„ ์ตœ์ ํ™” +for _ in range(5): + with torch.inference_mode(): + inputs = generate_inputs() + orig_output = unet_traced(*inputs) + + +# ๋ฒค์น˜๋งˆํ‚น +with torch.inference_mode(): + for _ in range(n_experiments): + torch.cuda.synchronize() + start_time = time.time() + for _ in range(unet_runs_per_experiment): + orig_output = unet_traced(*inputs) + torch.cuda.synchronize() + print(f"unet traced inference took {time.time() - start_time:.2f} seconds") + for _ in range(n_experiments): + torch.cuda.synchronize() + start_time = time.time() + for _ in range(unet_runs_per_experiment): + orig_output = unet(*inputs) + torch.cuda.synchronize() + print(f"unet inference took {time.time() - start_time:.2f} seconds") + +# ๋ชจ๋ธ ์ €์žฅ +unet_traced.save("unet_traced.pt") +``` + +๊ทธ ๋‹ค์Œ, ํŒŒ์ดํ”„๋ผ์ธ์˜ `unet` ํŠน์„ฑ์„ ๋‹ค์Œ๊ณผ ๊ฐ™์ด ์ถ”์ ๋œ ๋ชจ๋ธ๋กœ ๋ฐ”๊ฟ€ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +```python +from diffusers import StableDiffusionPipeline +import torch +from dataclasses import dataclass + + +@dataclass +class UNet2DConditionOutput: + sample: torch.FloatTensor + + +pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + torch_dtype=torch.float16, +).to("cuda") + +# jitted unet ์‚ฌ์šฉ +unet_traced = torch.jit.load("unet_traced.pt") + + +# pipe.unet ์‚ญ์ œ +class TracedUNet(torch.nn.Module): + def __init__(self): + super().__init__() + self.in_channels = pipe.unet.in_channels + self.device = pipe.unet.device + + def forward(self, latent_model_input, t, encoder_hidden_states): + sample = unet_traced(latent_model_input, t, encoder_hidden_states)[0] + return UNet2DConditionOutput(sample=sample) + + +pipe.unet = TracedUNet() + +with torch.inference_mode(): + image = pipe([prompt] * 1, num_inference_steps=50).images[0] +``` + + +## Memory-efficient attention + +์–ดํ…์…˜ ๋ธ”๋ก์˜ ๋Œ€์—ญํญ์„ ์ตœ์ ํ™”ํ•˜๋Š” ์ตœ๊ทผ ์ž‘์—…์œผ๋กœ GPU ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰์ด ํฌ๊ฒŒ ํ–ฅ์ƒ๋˜๊ณ  ํ–ฅ์ƒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. +@tridao์˜ ๊ฐ€์žฅ ์ตœ๊ทผ์˜ ํ”Œ๋ž˜์‹œ ์–ดํ…์…˜: [code](https://github.com/HazyResearch/flash-attention), [paper](https://arxiv.org/pdf/2205.14135.pdf). + +๋ฐฐ์น˜ ํฌ๊ธฐ 1(ํ”„๋กฌํ”„ํŠธ 1๊ฐœ)์˜ 512x512 ํฌ๊ธฐ๋กœ ์ถ”๋ก ์„ ์‹คํ–‰ํ•  ๋•Œ ๋ช‡ ๊ฐ€์ง€ Nvidia GPU์—์„œ ์–ป์€ ์†๋„ ํ–ฅ์ƒ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: + +| GPU | ๊ธฐ์ค€ ์–ดํ…์…˜ FP16 | ๋ฉ”๋ชจ๋ฆฌ ํšจ์œจ์ ์ธ ์–ดํ…์…˜ FP16 | +|------------------ |--------------------- |--------------------------------- | +| NVIDIA Tesla T4 | 3.5it/s | 5.5it/s | +| NVIDIA 3060 RTX | 4.6it/s | 7.8it/s | +| NVIDIA A10G | 8.88it/s | 15.6it/s | +| NVIDIA RTX A6000 | 11.7it/s | 21.09it/s | +| NVIDIA TITAN RTX | 12.51it/s | 18.22it/s | +| A100-SXM4-40GB | 18.6it/s | 29.it/s | +| A100-SXM-80GB | 18.7it/s | 29.5it/s | + +์ด๋ฅผ ํ™œ์šฉํ•˜๋ ค๋ฉด ๋‹ค์Œ์„ ๋งŒ์กฑํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค: + - PyTorch > 1.12 + - Cuda ์‚ฌ์šฉ ๊ฐ€๋Šฅ + - [xformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์„ค์น˜ํ•จ](xformers) +```python +from diffusers import StableDiffusionPipeline +import torch + +pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + torch_dtype=torch.float16, +).to("cuda") + +pipe.enable_xformers_memory_efficient_attention() + +with torch.inference_mode(): + sample = pipe("a small cat") + +# ์„ ํƒ: ์ด๋ฅผ ๋น„ํ™œ์„ฑํ™” ํ•˜๊ธฐ ์œ„ํ•ด ๋‹ค์Œ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. +# pipe.disable_xformers_memory_efficient_attention() +``` diff --git a/docs/source/ko/optimization/habana.mdx b/docs/source/ko/optimization/habana.mdx new file mode 100644 index 000000000000..0f076245fb1c --- /dev/null +++ b/docs/source/ko/optimization/habana.mdx @@ -0,0 +1,71 @@ + + +# Habana Gaudi์—์„œ Stable Diffusion์„ ์‚ฌ์šฉํ•˜๋Š” ๋ฐฉ๋ฒ• + +๐Ÿค— Diffusers๋Š” ๐Ÿค— [Optimum Habana](https://huggingface.co/docs/optimum/habana/usage_guides/stable_diffusion)๋ฅผ ํ†ตํ•ด์„œ Habana Gaudi์™€ ํ˜ธํ™˜๋ฉ๋‹ˆ๋‹ค. + +## ์š”๊ตฌ ์‚ฌํ•ญ + +- Optimum Habana 1.4 ๋˜๋Š” ์ดํ›„, [์—ฌ๊ธฐ](https://huggingface.co/docs/optimum/habana/installation)์— ์„ค์น˜ํ•˜๋Š” ๋ฐฉ๋ฒ•์ด ์žˆ์Šต๋‹ˆ๋‹ค. +- SynapseAI 1.8. + + +## ์ถ”๋ก  ํŒŒ์ดํ”„๋ผ์ธ + +Gaudi์—์„œ Stable Diffusion 1 ๋ฐ 2๋กœ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑํ•˜๋ ค๋ฉด ๋‘ ์ธ์Šคํ„ด์Šค๋ฅผ ์ธ์Šคํ„ด์Šคํ™”ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค: +- [`GaudiStableDiffusionPipeline`](https://huggingface.co/docs/optimum/habana/package_reference/stable_diffusion_pipeline)์ด ํฌํ•จ๋œ ํŒŒ์ดํ”„๋ผ์ธ. ์ด ํŒŒ์ดํ”„๋ผ์ธ์€ *ํ…์ŠคํŠธ-์ด๋ฏธ์ง€ ์ƒ์„ฑ*์„ ์ง€์›ํ•ฉ๋‹ˆ๋‹ค. +- [`GaudiDDIMScheduler`](https://huggingface.co/docs/optimum/habana/package_reference/stable_diffusion_pipeline#optimum.habana.diffusers.GaudiDDIMScheduler)์ด ํฌํ•จ๋œ ์Šค์ผ€์ค„๋Ÿฌ. ์ด ์Šค์ผ€์ค„๋Ÿฌ๋Š” Habana Gaudi์— ์ตœ์ ํ™”๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค. + +ํŒŒ์ดํ”„๋ผ์ธ์„ ์ดˆ๊ธฐํ™”ํ•  ๋•Œ, HPU์— ๋ฐฐํฌํ•˜๊ธฐ ์œ„ํ•ด `use_habana=True`๋ฅผ ์ง€์ •ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. +๋˜ํ•œ ๊ฐ€๋Šฅํ•œ ๊ฐ€์žฅ ๋น ๋ฅธ ์ƒ์„ฑ์„ ์œ„ํ•ด `use_hpu_graphs=True`๋กœ **HPU ๊ทธ๋ž˜ํ”„**๋ฅผ ํ™œ์„ฑํ™”ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. +๋งˆ์ง€๋ง‰์œผ๋กœ, [Hugging Face Hub](https://huggingface.co/Habana)์—์„œ ๋‹ค์šด๋กœ๋“œํ•  ์ˆ˜ ์žˆ๋Š” [Gaudi configuration](https://huggingface.co/docs/optimum/habana/package_reference/gaudi_config)์„ ์ง€์ •ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. + +```python +from optimum.habana import GaudiConfig +from optimum.habana.diffusers import GaudiDDIMScheduler, GaudiStableDiffusionPipeline + +model_name = "stabilityai/stable-diffusion-2-base" +scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler") +pipeline = GaudiStableDiffusionPipeline.from_pretrained( + model_name, + scheduler=scheduler, + use_habana=True, + use_hpu_graphs=True, + gaudi_config="Habana/stable-diffusion", +) +``` + +ํŒŒ์ดํ”„๋ผ์ธ์„ ํ˜ธ์ถœํ•˜์—ฌ ํ•˜๋‚˜ ์ด์ƒ์˜ ํ”„๋กฌํ”„ํŠธ์—์„œ ๋ฐฐ์น˜๋ณ„๋กœ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +```python +outputs = pipeline( + prompt=[ + "High quality photo of an astronaut riding a horse in space", + "Face of a yellow cat, high resolution, sitting on a park bench", + ], + num_images_per_prompt=10, + batch_size=4, +) +``` + +๋” ๋งŽ์€ ์ •๋ณด๋ฅผ ์–ป๊ธฐ ์œ„ํ•ด, Optimum Habana์˜ [๋ฌธ์„œ](https://huggingface.co/docs/optimum/habana/usage_guides/stable_diffusion)์™€ ๊ณต์‹ Github ์ €์žฅ์†Œ์— ์ œ๊ณต๋œ [์˜ˆ์‹œ](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)๋ฅผ ํ™•์ธํ•˜์„ธ์š”. + + +## ๋ฒค์น˜๋งˆํฌ + +๋‹ค์Œ์€ [Habana/stable-diffusion](https://huggingface.co/Habana/stable-diffusion) Gaudi ๊ตฌ์„ฑ(ํ˜ผํ•ฉ ์ •๋ฐ€๋„ bf16/fp32)์„ ์‚ฌ์šฉํ•˜๋Š” Habana first-generation Gaudi ๋ฐ Gaudi2์˜ ์ง€์—ฐ ์‹œ๊ฐ„์ž…๋‹ˆ๋‹ค: + +| | Latency (๋ฐฐ์น˜ ํฌ๊ธฐ = 1) | Throughput (๋ฐฐ์น˜ ํฌ๊ธฐ = 8) | +| ---------------------- |:------------------------:|:---------------------------:| +| first-generation Gaudi | 4.29s | 0.283 images/s | +| Gaudi2 | 1.54s | 0.904 images/s | diff --git a/docs/source/ko/optimization/mps.mdx b/docs/source/ko/optimization/mps.mdx new file mode 100644 index 000000000000..cd04d6d1103d --- /dev/null +++ b/docs/source/ko/optimization/mps.mdx @@ -0,0 +1,71 @@ + + +# Apple Silicon (M1/M2)์—์„œ Stable Diffusion์„ ์‚ฌ์šฉํ•˜๋Š” ๋ฐฉ๋ฒ• + +Diffusers๋Š” Stable Diffusion ์ถ”๋ก ์„ ์œ„ํ•ด PyTorch `mps`๋ฅผ ์‚ฌ์šฉํ•ด Apple ์‹ค๋ฆฌ์ฝ˜๊ณผ ํ˜ธํ™˜๋ฉ๋‹ˆ๋‹ค. ๋‹ค์Œ์€ Stable Diffusion์ด ์žˆ๋Š” M1 ๋˜๋Š” M2 ์ปดํ“จํ„ฐ๋ฅผ ์‚ฌ์šฉํ•˜๊ธฐ ์œ„ํ•ด ๋”ฐ๋ผ์•ผ ํ•˜๋Š” ๋‹จ๊ณ„์ž…๋‹ˆ๋‹ค. + +## ์š”๊ตฌ ์‚ฌํ•ญ + +- Apple silicon (M1/M2) ํ•˜๋“œ์›จ์–ด์˜ Mac ์ปดํ“จํ„ฐ. +- macOS 12.6 ๋˜๋Š” ์ดํ›„ (13.0 ๋˜๋Š” ์ดํ›„ ์ถ”์ฒœ). +- Python arm64 ๋ฒ„์ „ +- PyTorch 2.0(์ถ”์ฒœ) ๋˜๋Š” 1.13(`mps`๋ฅผ ์ง€์›ํ•˜๋Š” ์ตœ์†Œ ๋ฒ„์ „). Yhttps://pytorch.org/get-started/locally/์˜ ์ง€์นจ์— ๋”ฐ๋ผ `pip` ๋˜๋Š” `conda`๋กœ ์„ค์น˜ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + + +## ์ถ”๋ก  ํŒŒ์ดํ”„๋ผ์ธ + +์•„๋ž˜ ์ฝ”๋„๋Š” ์ต์ˆ™ํ•œ `to()` ์ธํ„ฐํŽ˜์ด์Šค๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ `mps` ๋ฐฑ์—”๋“œ๋กœ Stable Diffusion ํŒŒ์ดํ”„๋ผ์ธ์„ M1 ๋˜๋Š” M2 ์žฅ์น˜๋กœ ์ด๋™ํ•˜๋Š” ๋ฐฉ๋ฒ•์„ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค. + + + + +**PyTorch 1.13์„ ์‚ฌ์šฉ ์ค‘์ผ ๋•Œ ** ์ถ”๊ฐ€ ์ผํšŒ์„ฑ ์ „๋‹ฌ์„ ์‚ฌ์šฉํ•˜์—ฌ ํŒŒ์ดํ”„๋ผ์ธ์„ "ํ”„๋ผ์ด๋ฐ"ํ•˜๋Š” ๊ฒƒ์„ ์ถ”์ฒœํ•ฉ๋‹ˆ๋‹ค. ์ด๊ฒƒ์€ ๋ฐœ๊ฒฌํ•œ ์ด์ƒํ•œ ๋ฌธ์ œ์— ๋Œ€ํ•œ ์ž„์‹œ ํ•ด๊ฒฐ ๋ฐฉ๋ฒ•์ž…๋‹ˆ๋‹ค. ์ฒซ ๋ฒˆ์งธ ์ถ”๋ก  ์ „๋‹ฌ์€ ํ›„์† ์ „๋‹ฌ์™€ ์•ฝ๊ฐ„ ๋‹ค๋ฅธ ๊ฒฐ๊ณผ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค. ์ด ์ „๋‹ฌ์€ ํ•œ ๋ฒˆ๋งŒ ์ˆ˜ํ–‰ํ•˜๋ฉด ๋˜๋ฉฐ ์ถ”๋ก  ๋‹จ๊ณ„๋ฅผ ํ•œ ๋ฒˆ๋งŒ ์‚ฌ์šฉํ•˜๊ณ  ๊ฒฐ๊ณผ๋ฅผ ํ๊ธฐํ•ด๋„ ๋ฉ๋‹ˆ๋‹ค. + + + +์ด์ „ ํŒ์—์„œ ์„ค๋ช…ํ•œ ๊ฒƒ๋“ค์„ ํฌํ•จํ•œ ์—ฌ๋Ÿฌ ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•˜๋ฏ€๋กœ PyTorch 2 ์ด์ƒ์„ ์‚ฌ์šฉํ•˜๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. + + +```python +# `huggingface-cli login`์— ๋กœ๊ทธ์ธ๋˜์–ด ์žˆ์Œ์„ ํ™•์ธ +from diffusers import DiffusionPipeline + +pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") +pipe = pipe.to("mps") + +# ์ปดํ“จํ„ฐ๊ฐ€ 64GB ์ดํ•˜์˜ RAM ๋žจ์ผ ๋•Œ ์ถ”์ฒœ +pipe.enable_attention_slicing() + +prompt = "a photo of an astronaut riding a horse on mars" + +# ์ฒ˜์Œ "์›Œ๋ฐ์—…" ์ „๋‹ฌ (์œ„ ์„ค๋ช…์„ ๋ณด์„ธ์š”) +_ = pipe(prompt, num_inference_steps=1) + +# ๊ฒฐ๊ณผ๋Š” ์›Œ๋ฐ์—… ์ „๋‹ฌ ํ›„์˜ CPU ์žฅ์น˜์˜ ๊ฒฐ๊ณผ์™€ ์ผ์น˜ํ•ฉ๋‹ˆ๋‹ค. +image = pipe(prompt).images[0] +``` + +## ์„ฑ๋Šฅ ์ถ”์ฒœ + +M1/M2 ์„ฑ๋Šฅ์€ ๋ฉ”๋ชจ๋ฆฌ ์••๋ ฅ์— ๋งค์šฐ ๋ฏผ๊ฐํ•ฉ๋‹ˆ๋‹ค. ์‹œ์Šคํ…œ์€ ํ•„์š”ํ•œ ๊ฒฝ์šฐ ์ž๋™์œผ๋กœ ์Šค์™‘๋˜์ง€๋งŒ ์Šค์™‘ํ•  ๋•Œ ์„ฑ๋Šฅ์ด ํฌ๊ฒŒ ์ €ํ•˜๋ฉ๋‹ˆ๋‹ค. + + +ํŠนํžˆ ์ปดํ“จํ„ฐ์˜ ์‹œ์Šคํ…œ RAM์ด 64GB ๋ฏธ๋งŒ์ด๊ฑฐ๋‚˜ 512 ร— 512ํ”ฝ์…€๋ณด๋‹ค ํฐ ๋น„ํ‘œ์ค€ ํ•ด์ƒ๋„์—์„œ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑํ•˜๋Š” ๊ฒฝ์šฐ, ์ถ”๋ก  ์ค‘์— ๋ฉ”๋ชจ๋ฆฌ ์••๋ ฅ์„ ์ค„์ด๊ณ  ์Šค์™€ํ•‘์„ ๋ฐฉ์ง€ํ•˜๊ธฐ ์œ„ํ•ด *์–ดํ…์…˜ ์Šฌ๋ผ์ด์‹ฑ*์„ ์‚ฌ์šฉํ•˜๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. ์–ดํ…์…˜ ์Šฌ๋ผ์ด์‹ฑ์€ ๋น„์šฉ์ด ๋งŽ์ด ๋“œ๋Š” ์–ดํ…์…˜ ์ž‘์—…์„ ํ•œ ๋ฒˆ์— ๋ชจ๋‘ ์ˆ˜ํ–‰ํ•˜๋Š” ๋Œ€์‹  ์—ฌ๋Ÿฌ ๋‹จ๊ณ„๋กœ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค. ์ผ๋ฐ˜์ ์œผ๋กœ ๋ฒ”์šฉ ๋ฉ”๋ชจ๋ฆฌ๊ฐ€ ์—†๋Š” ์ปดํ“จํ„ฐ์—์„œ ~20%์˜ ์„ฑ๋Šฅ ์˜ํ–ฅ์„ ๋ฏธ์น˜์ง€๋งŒ 64GB ์ด์ƒ์ด ์•„๋‹Œ ๊ฒฝ์šฐ ๋Œ€๋ถ€๋ถ„์˜ Apple Silicon ์ปดํ“จํ„ฐ์—์„œ *๋” ๋‚˜์€ ์„ฑ๋Šฅ*์ด ๊ด€์ฐฐ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. + +```python +pipeline.enable_attention_slicing() +``` + +## Known Issues + +- ์—ฌ๋Ÿฌ ํ”„๋กฌํ”„ํŠธ๋ฅผ ๋ฐฐ์น˜๋กœ ์ƒ์„ฑํ•˜๋Š” ๊ฒƒ์€ [์ถฉ๋Œ์ด ๋ฐœ์ƒํ•˜๊ฑฐ๋‚˜ ์•ˆ์ •์ ์œผ๋กœ ์ž‘๋™ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค](https://github.com/huggingface/diffusers/issues/363). ์šฐ๋ฆฌ๋Š” ์ด๊ฒƒ์ด [PyTorch์˜ `mps` ๋ฐฑ์—”๋“œ](https://github.com/pytorch/pytorch/issues/84039)์™€ ๊ด€๋ จ์ด ์žˆ๋‹ค๊ณ  ์ƒ๊ฐํ•ฉ๋‹ˆ๋‹ค. ์ด ๋ฌธ์ œ๋Š” ํ•ด๊ฒฐ๋˜๊ณ  ์žˆ์ง€๋งŒ ์ง€๊ธˆ์€ ๋ฐฐ์น˜ ๋Œ€์‹  ๋ฐ˜๋ณต ๋ฐฉ๋ฒ•์„ ์‚ฌ์šฉํ•˜๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. \ No newline at end of file diff --git a/docs/source/ko/optimization/onnx.mdx b/docs/source/ko/optimization/onnx.mdx new file mode 100644 index 000000000000..d52110b8c1fb --- /dev/null +++ b/docs/source/ko/optimization/onnx.mdx @@ -0,0 +1,65 @@ + + + +# ์ถ”๋ก ์„ ์œ„ํ•ด ONNX ๋Ÿฐํƒ€์ž„์„ ์‚ฌ์šฉํ•˜๋Š” ๋ฐฉ๋ฒ• + +๐Ÿค— Diffusers๋Š” ONNX Runtime๊ณผ ํ˜ธํ™˜๋˜๋Š” Stable Diffusion ํŒŒ์ดํ”„๋ผ์ธ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค. ์ด๋ฅผ ํ†ตํ•ด ONNX(CPU ํฌํ•จ)๋ฅผ ์ง€์›ํ•˜๊ณ  PyTorch์˜ ๊ฐ€์† ๋ฒ„์ „์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†๋Š” ๋ชจ๋“  ํ•˜๋“œ์›จ์–ด์—์„œ Stable Diffusion์„ ์‹คํ–‰ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +## ์„ค์น˜ + +๋‹ค์Œ ๋ช…๋ น์–ด๋กœ ONNX Runtime๋ฅผ ์ง€์›ํ•˜๋Š” ๐Ÿค— Optimum๋ฅผ ์„ค์น˜ํ•ฉ๋‹ˆ๋‹ค: + +``` +pip install optimum["onnxruntime"] +``` + +## Stable Diffusion ์ถ”๋ก  + +์•„๋ž˜ ์ฝ”๋“œ๋Š” ONNX ๋Ÿฐํƒ€์ž„์„ ์‚ฌ์šฉํ•˜๋Š” ๋ฐฉ๋ฒ•์„ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค. `StableDiffusionPipeline` ๋Œ€์‹  `OnnxStableDiffusionPipeline`์„ ์‚ฌ์šฉํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. +PyTorch ๋ชจ๋ธ์„ ๋ถˆ๋Ÿฌ์˜ค๊ณ  ์ฆ‰์‹œ ONNX ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•˜๋ ค๋Š” ๊ฒฝ์šฐ `export=True`๋กœ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค. + +```python +from optimum.onnxruntime import ORTStableDiffusionPipeline + +model_id = "runwayml/stable-diffusion-v1-5" +pipe = ORTStableDiffusionPipeline.from_pretrained(model_id, export=True) +prompt = "a photo of an astronaut riding a horse on mars" +images = pipe(prompt).images[0] +pipe.save_pretrained("./onnx-stable-diffusion-v1-5") +``` + +ํŒŒ์ดํ”„๋ผ์ธ์„ ONNX ํ˜•์‹์œผ๋กœ ์˜คํ”„๋ผ์ธ์œผ๋กœ ๋‚ด๋ณด๋‚ด๊ณ  ๋‚˜์ค‘์— ์ถ”๋ก ์— ์‚ฌ์šฉํ•˜๋ ค๋Š” ๊ฒฝ์šฐ, +[`optimum-cli export`](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#exporting-a-model-to-onnx-using-the-cli) ๋ช…๋ น์–ด๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค: + +```bash +optimum-cli export onnx --model runwayml/stable-diffusion-v1-5 sd_v15_onnx/ +``` + +๊ทธ ๋‹ค์Œ ์ถ”๋ก ์„ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค: + +```python +from optimum.onnxruntime import ORTStableDiffusionPipeline + +model_id = "sd_v15_onnx" +pipe = ORTStableDiffusionPipeline.from_pretrained(model_id) +prompt = "a photo of an astronaut riding a horse on mars" +images = pipe(prompt).images[0] +``` + +Notice that we didn't have to specify `export=True` above. + +[Optimum ๋ฌธ์„œ](https://huggingface.co/docs/optimum/)์—์„œ ๋” ๋งŽ์€ ์˜ˆ์‹œ๋ฅผ ์ฐพ์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +## ์•Œ๋ ค์ง„ ์ด์Šˆ๋“ค + +- ์—ฌ๋Ÿฌ ํ”„๋กฌํ”„ํŠธ๋ฅผ ๋ฐฐ์น˜๋กœ ์ƒ์„ฑํ•˜๋ฉด ๋„ˆ๋ฌด ๋งŽ์€ ๋ฉ”๋ชจ๋ฆฌ๊ฐ€ ์‚ฌ์šฉ๋˜๋Š” ๊ฒƒ ๊ฐ™์Šต๋‹ˆ๋‹ค. ์ด๋ฅผ ์กฐ์‚ฌํ•˜๋Š” ๋™์•ˆ, ๋ฐฐ์น˜ ๋Œ€์‹  ๋ฐ˜๋ณต ๋ฐฉ๋ฒ•์ด ํ•„์š”ํ•  ์ˆ˜๋„ ์žˆ์Šต๋‹ˆ๋‹ค. diff --git a/docs/source/ko/optimization/open_vino.mdx b/docs/source/ko/optimization/open_vino.mdx new file mode 100644 index 000000000000..cb279909f618 --- /dev/null +++ b/docs/source/ko/optimization/open_vino.mdx @@ -0,0 +1,39 @@ + + +# ์ถ”๋ก ์„ ์œ„ํ•œ OpenVINO ์‚ฌ์šฉ ๋ฐฉ๋ฒ• + +๐Ÿค— [Optimum](https://github.com/huggingface/optimum-intel)์€ OpenVINO์™€ ํ˜ธํ™˜๋˜๋Š” Stable Diffusion ํŒŒ์ดํ”„๋ผ์ธ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค. +์ด์ œ ๋‹ค์–‘ํ•œ Intel ํ”„๋กœ์„ธ์„œ์—์„œ OpenVINO Runtime์œผ๋กœ ์‰ฝ๊ฒŒ ์ถ”๋ก ์„ ์ˆ˜ํ–‰ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ([์—ฌ๊ธฐ](https://docs.openvino.ai/latest/openvino_docs_OV_UG_supported_plugins_Supported_Devices.html)์„œ ์ง€์›๋˜๋Š” ์ „ ๊ธฐ๊ธฐ ๋ชฉ๋ก์„ ํ™•์ธํ•˜์„ธ์š”). + +## ์„ค์น˜ + +๋‹ค์Œ ๋ช…๋ น์–ด๋กœ ๐Ÿค— Optimum์„ ์„ค์น˜ํ•ฉ๋‹ˆ๋‹ค: + +``` +pip install optimum["openvino"] +``` + +## Stable Diffusion ์ถ”๋ก  + +OpenVINO ๋ชจ๋ธ์„ ๋ถˆ๋Ÿฌ์˜ค๊ณ  OpenVINO ๋Ÿฐํƒ€์ž„์œผ๋กœ ์ถ”๋ก ์„ ์‹คํ–‰ํ•˜๋ ค๋ฉด `StableDiffusionPipeline`์„ `OVStableDiffusionPipeline`์œผ๋กœ ๊ต์ฒดํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. PyTorch ๋ชจ๋ธ์„ ๋ถˆ๋Ÿฌ์˜ค๊ณ  ์ฆ‰์‹œ OpenVINO ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•˜๋ ค๋Š” ๊ฒฝ์šฐ `export=True`๋กœ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค. + +```python +from optimum.intel.openvino import OVStableDiffusionPipeline + +model_id = "runwayml/stable-diffusion-v1-5" +pipe = OVStableDiffusionPipeline.from_pretrained(model_id, export=True) +prompt = "a photo of an astronaut riding a horse on mars" +images = pipe(prompt).images[0] +``` + +[Optimum ๋ฌธ์„œ](https://huggingface.co/docs/optimum/intel/inference#export-and-inference-of-stable-diffusion-models)์—์„œ (์ •์  reshaping๊ณผ ๋ชจ๋ธ ์ปดํŒŒ์ผ ๋“ฑ์˜) ๋” ๋งŽ์€ ์˜ˆ์‹œ๋“ค์„ ์ฐพ์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. diff --git a/docs/source/ko/optimization/xformers.mdx b/docs/source/ko/optimization/xformers.mdx new file mode 100644 index 000000000000..a8b9408fbe50 --- /dev/null +++ b/docs/source/ko/optimization/xformers.mdx @@ -0,0 +1,36 @@ + + +# xFormers ์„ค์น˜ํ•˜๊ธฐ + +์ถ”๋ก ๊ณผ ํ•™์Šต ๋ชจ๋‘์— [xFormers](https://github.com/facebookresearch/xformers)๋ฅผ ์‚ฌ์šฉํ•˜๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. +์ž์ฒด ํ…Œ์ŠคํŠธ๋กœ ์–ดํ…์…˜ ๋ธ”๋ก์—์„œ ์ˆ˜ํ–‰๋œ ์ตœ์ ํ™”๊ฐ€ ๋” ๋น ๋ฅธ ์†๋„์™€ ์ ์€ ๋ฉ”๋ชจ๋ฆฌ ์†Œ๋น„๋ฅผ ํ™•์ธํ–ˆ์Šต๋‹ˆ๋‹ค. + +2023๋…„ 1์›”์— ์ถœ์‹œ๋œ xFormers ๋ฒ„์ „ '0.0.16'๋ถ€ํ„ฐ ์‚ฌ์ „ ๋นŒ๋“œ๋œ pip wheel์„ ์‚ฌ์šฉํ•˜์—ฌ ์‰ฝ๊ฒŒ ์„ค์น˜ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค: + +```bash +pip install xformers +``` + + + +xFormers PIP ํŒจํ‚ค์ง€์—๋Š” ์ตœ์‹  ๋ฒ„์ „์˜ PyTorch(xFormers 0.0.16์— 1.13.1)๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. ์ด์ „ ๋ฒ„์ „์˜ PyTorch๋ฅผ ์‚ฌ์šฉํ•ด์•ผ ํ•˜๋Š” ๊ฒฝ์šฐ [ํ”„๋กœ์ ํŠธ ์ง€์นจ](https://github.com/facebookresearch/xformers#installing-xformers)์˜ ์†Œ์Šค๋ฅผ ์‚ฌ์šฉํ•ด xFormers๋ฅผ ์„ค์น˜ํ•˜๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. + + + +xFormers๋ฅผ ์„ค์น˜ํ•˜๋ฉด, [์—ฌ๊ธฐ](fp16#memory-efficient-attention)์„œ ์„ค๋ช…ํ•œ ๊ฒƒ์ฒ˜๋Ÿผ 'enable_xformers_memory_efficient_attention()'์„ ์‚ฌ์šฉํ•˜์—ฌ ์ถ”๋ก  ์†๋„๋ฅผ ๋†’์ด๊ณ  ๋ฉ”๋ชจ๋ฆฌ ์†Œ๋น„๋ฅผ ์ค„์ผ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + + + +[์ด ์ด์Šˆ](https://github.com/huggingface/diffusers/issues/2234#issuecomment-1416931212)์— ๋”ฐ๋ฅด๋ฉด xFormers `v0.0.16`์—์„œ GPU๋ฅผ ์‚ฌ์šฉํ•œ ํ•™์Šต(ํŒŒ์ธ ํŠœ๋‹ ๋˜๋Š” Dreambooth)์„ ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ํ•ด๋‹น ๋ฌธ์ œ๊ฐ€ ๋ฐœ๊ฒฌ๋˜๋ฉด. ํ•ด๋‹น ์ฝ”๋ฉ˜ํŠธ๋ฅผ ์ฐธ๊ณ ํ•ด development ๋ฒ„์ „์„ ์„ค์น˜ํ•˜์„ธ์š”. + + diff --git a/docs/source/ko/training/dreambooth.mdx b/docs/source/ko/training/dreambooth.mdx new file mode 100644 index 000000000000..cc282d9d24f8 --- /dev/null +++ b/docs/source/ko/training/dreambooth.mdx @@ -0,0 +1,475 @@ + + +# DreamBooth + +[DreamBooth](https://arxiv.org/abs/2208.12242)๋Š” ํ•œ ์ฃผ์ œ์— ๋Œ€ํ•œ ์ ์€ ์ด๋ฏธ์ง€(3~5๊ฐœ)๋งŒ์œผ๋กœ๋„ stable diffusion๊ณผ ๊ฐ™์ด text-to-image ๋ชจ๋ธ์„ ๊ฐœ์ธํ™”ํ•  ์ˆ˜ ์žˆ๋Š” ๋ฐฉ๋ฒ•์ž…๋‹ˆ๋‹ค. ์ด๋ฅผ ํ†ตํ•ด ๋ชจ๋ธ์€ ๋‹ค์–‘ํ•œ ์žฅ๋ฉด, ํฌ์ฆˆ ๋ฐ ์žฅ๋ฉด(๋ทฐ)์—์„œ ํ”ผ์‚ฌ์ฒด์— ๋Œ€ํ•ด ๋งฅ๋ฝํ™”(contextualized)๋œ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +![ํ”„๋กœ์ ํŠธ ๋ธ”๋กœ๊ทธ์—์„œ์˜ DreamBooth ์˜ˆ์‹œ](https://dreambooth.github.io/DreamBooth_files/teaser_static.jpg) +project's blog. +ํ”„๋กœ์ ํŠธ ๋ธ”๋กœ๊ทธ์—์„œ์˜ Dreambooth ์˜ˆ์‹œ + + +์ด ๊ฐ€์ด๋“œ๋Š” ๋‹ค์–‘ํ•œ GPU, Flax ์‚ฌ์–‘์— ๋Œ€ํ•ด [`CompVis/stable-diffusion-v1-4`](https://huggingface.co/CompVis/stable-diffusion-v1-4) ๋ชจ๋ธ๋กœ DreamBooth๋ฅผ ํŒŒ์ธํŠœ๋‹ํ•˜๋Š” ๋ฐฉ๋ฒ•์„ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค. ๋” ๊นŠ์ด ํŒŒ๊ณ ๋“ค์–ด ์ž‘๋™ ๋ฐฉ์‹์„ ํ™•์ธํ•˜๋Š” ๋ฐ ๊ด€์‹ฌ์ด ์žˆ๋Š” ๊ฒฝ์šฐ, ์ด ๊ฐ€์ด๋“œ์— ์‚ฌ์šฉ๋œ DreamBooth์˜ ๋ชจ๋“  ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ๋ฅผ [์—ฌ๊ธฐ](https://github.com/huggingface/diffusers/tree/main/examples/dreambooth)์—์„œ ์ฐพ์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +์Šคํฌ๋ฆฝํŠธ๋ฅผ ์‹คํ–‰ํ•˜๊ธฐ ์ „์— ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ์˜ ํ•™์Šต์— ํ•„์š”ํ•œ dependencies๋ฅผ ์„ค์น˜ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. ๋˜ํ•œ `main` GitHub ๋ธŒ๋žœ์น˜์—์„œ ๐Ÿงจ Diffusers๋ฅผ ์„ค์น˜ํ•˜๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. + +```bash +pip install git+https://github.com/huggingface/diffusers +pip install -U -r diffusers/examples/dreambooth/requirements.txt +``` + +xFormers๋Š” ํ•™์Šต์— ํ•„์š”ํ•œ ์š”๊ตฌ ์‚ฌํ•ญ์€ ์•„๋‹ˆ์ง€๋งŒ, ๊ฐ€๋Šฅํ•˜๋ฉด [์„ค์น˜](../optimization/xformers)ํ•˜๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. ํ•™์Šต ์†๋„๋ฅผ ๋†’์ด๊ณ  ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰์„ ์ค„์ผ ์ˆ˜ ์žˆ๊ธฐ ๋•Œ๋ฌธ์ž…๋‹ˆ๋‹ค. + +๋ชจ๋“  dependencies์„ ์„ค์ •ํ•œ ํ›„ ๋‹ค์Œ์„ ์‚ฌ์šฉํ•˜์—ฌ [๐Ÿค— Accelerate](https://github.com/huggingface/accelerate/) ํ™˜๊ฒฝ์„ ๋‹ค์Œ๊ณผ ๊ฐ™์ด ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค: + +```bash +accelerate config +``` + +๋ณ„๋„ ์„ค์ • ์—†์ด ๊ธฐ๋ณธ ๐Ÿค— Accelerate ํ™˜๊ฒฝ์„ ์„ค์น˜ํ•˜๋ ค๋ฉด ๋‹ค์Œ์„ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค: + +```bash +accelerate config default +``` + +๋˜๋Š” ํ˜„์žฌ ํ™˜๊ฒฝ์ด ๋…ธํŠธ๋ถ๊ณผ ๊ฐ™์€ ๋Œ€ํ™”ํ˜• ์…ธ์„ ์ง€์›ํ•˜์ง€ ์•Š๋Š” ๊ฒฝ์šฐ ๋‹ค์Œ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค: + +```py +from accelerate.utils import write_basic_config + +write_basic_config() +``` + +## ํŒŒ์ธํŠœ๋‹ + + + +DreamBooth ํŒŒ์ธํŠœ๋‹์€ ํ•˜์ดํผํŒŒ๋ผ๋ฏธํ„ฐ์— ๋งค์šฐ ๋ฏผ๊ฐํ•˜๊ณ  ๊ณผ์ ํ•ฉ๋˜๊ธฐ ์‰ฝ์Šต๋‹ˆ๋‹ค. ์ ์ ˆํ•œ ํ•˜์ดํผํŒŒ๋ผ๋ฏธํ„ฐ๋ฅผ ์„ ํƒํ•˜๋Š” ๋ฐ ๋„์›€์ด ๋˜๋„๋ก ๋‹ค์–‘ํ•œ ๊ถŒ์žฅ ์„ค์ •์ด ํฌํ•จ๋œ [์‹ฌ์ธต ๋ถ„์„](https://huggingface.co/blog/dreambooth)์„ ์‚ดํŽด๋ณด๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. + + + + + +[๋ช‡ ์žฅ์˜ ๊ฐ•์•„์ง€ ์ด๋ฏธ์ง€๋“ค](https://drive.google.com/drive/folders/1BO_dyz-p65qhBRRMRA4TbZ8qW4rB99JZ)๋กœ DreamBooth๋ฅผ ์‹œ๋„ํ•ด๋ด…์‹œ๋‹ค. +์ด๋ฅผ ๋‹ค์šด๋กœ๋“œํ•ด ๋””๋ ‰ํ„ฐ๋ฆฌ์— ์ €์žฅํ•œ ๋‹ค์Œ `INSTANCE_DIR` ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋ฅผ ํ•ด๋‹น ๊ฒฝ๋กœ๋กœ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค: + + +```bash +export MODEL_NAME="CompVis/stable-diffusion-v1-4" +export INSTANCE_DIR="path_to_training_images" +export OUTPUT_DIR="path_to_saved_model" +``` + +๊ทธ๋Ÿฐ ๋‹ค์Œ, ๋‹ค์Œ ๋ช…๋ น์„ ์‚ฌ์šฉํ•˜์—ฌ ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ๋ฅผ ์‹คํ–‰ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค (์ „์ฒด ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ๋Š” [์—ฌ๊ธฐ](https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth.py)์—์„œ ์ฐพ์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค): + +```bash +accelerate launch train_dreambooth.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --instance_data_dir=$INSTANCE_DIR \ + --output_dir=$OUTPUT_DIR \ + --instance_prompt="a photo of sks dog" \ + --resolution=512 \ + --train_batch_size=1 \ + --gradient_accumulation_steps=1 \ + --learning_rate=5e-6 \ + --lr_scheduler="constant" \ + --lr_warmup_steps=0 \ + --max_train_steps=400 +``` + + + +TPU์— ์•ก์„ธ์Šคํ•  ์ˆ˜ ์žˆ๊ฑฐ๋‚˜ ๋” ๋น ๋ฅด๊ฒŒ ํ›ˆ๋ จํ•˜๊ณ  ์‹ถ๋‹ค๋ฉด [Flax ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ](https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth_flax.py)๋ฅผ ์‚ฌ์šฉํ•ด ๋ณผ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. Flax ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ๋Š” gradient checkpointing ๋˜๋Š” gradient accumulation์„ ์ง€์›ํ•˜์ง€ ์•Š์œผ๋ฏ€๋กœ, ๋ฉ”๋ชจ๋ฆฌ๊ฐ€ 30GB ์ด์ƒ์ธ GPU๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. + +์Šคํฌ๋ฆฝํŠธ๋ฅผ ์‹คํ–‰ํ•˜๊ธฐ ์ „์— ์š”๊ตฌ ์‚ฌํ•ญ์ด ์„ค์น˜๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธํ•˜์‹ญ์‹œ์˜ค. + +```bash +pip install -U -r requirements.txt +``` + +๊ทธ๋Ÿฌ๋ฉด ๋‹ค์Œ ๋ช…๋ น์–ด๋กœ ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ๋ฅผ ์‹คํ–‰์‹œํ‚ฌ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค: + +```bash +export MODEL_NAME="duongna/stable-diffusion-v1-4-flax" +export INSTANCE_DIR="path-to-instance-images" +export OUTPUT_DIR="path-to-save-model" + +python train_dreambooth_flax.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --instance_data_dir=$INSTANCE_DIR \ + --output_dir=$OUTPUT_DIR \ + --instance_prompt="a photo of sks dog" \ + --resolution=512 \ + --train_batch_size=1 \ + --learning_rate=5e-6 \ + --max_train_steps=400 +``` + + + +### Prior-preserving(์‚ฌ์ „ ๋ณด์กด) loss๋ฅผ ์‚ฌ์šฉํ•œ ํŒŒ์ธํŠœ๋‹ + +๊ณผ์ ํ•ฉ๊ณผ language drift๋ฅผ ๋ฐฉ์ง€ํ•˜๊ธฐ ์œ„ํ•ด ์‚ฌ์ „ ๋ณด์กด์ด ์‚ฌ์šฉ๋ฉ๋‹ˆ๋‹ค(๊ด€์‹ฌ์ด ์žˆ๋Š” ๊ฒฝ์šฐ [๋…ผ๋ฌธ](https://arxiv.org/abs/2208.12242)์„ ์ฐธ์กฐํ•˜์„ธ์š”). ์‚ฌ์ „ ๋ณด์กด์„ ์œ„ํ•ด ๋™์ผํ•œ ํด๋ž˜์Šค์˜ ๋‹ค๋ฅธ ์ด๋ฏธ์ง€๋ฅผ ํ•™์Šต ํ”„๋กœ์„ธ์Šค์˜ ์ผ๋ถ€๋กœ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. ์ข‹์€ ์ ์€ Stable Diffusion ๋ชจ๋ธ ์ž์ฒด๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ด๋Ÿฌํ•œ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑํ•  ์ˆ˜ ์žˆ๋‹ค๋Š” ๊ฒƒ์ž…๋‹ˆ๋‹ค! ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ๋Š” ์ƒ์„ฑ๋œ ์ด๋ฏธ์ง€๋ฅผ ์šฐ๋ฆฌ๊ฐ€ ์ง€์ •ํ•œ ๋กœ์ปฌ ๊ฒฝ๋กœ์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค. + +์ €์ž๋“ค์— ๋”ฐ๋ฅด๋ฉด ์‚ฌ์ „ ๋ณด์กด์„ ์œ„ํ•ด `num_epochs * num_samples`๊ฐœ์˜ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑํ•˜๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. 200-300๊ฐœ์—์„œ ๋Œ€๋ถ€๋ถ„ ์ž˜ ์ž‘๋™ํ•ฉ๋‹ˆ๋‹ค. + + + +```bash +export MODEL_NAME="CompVis/stable-diffusion-v1-4" +export INSTANCE_DIR="path_to_training_images" +export CLASS_DIR="path_to_class_images" +export OUTPUT_DIR="path_to_saved_model" + +accelerate launch train_dreambooth.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --instance_data_dir=$INSTANCE_DIR \ + --class_data_dir=$CLASS_DIR \ + --output_dir=$OUTPUT_DIR \ + --with_prior_preservation --prior_loss_weight=1.0 \ + --instance_prompt="a photo of sks dog" \ + --class_prompt="a photo of dog" \ + --resolution=512 \ + --train_batch_size=1 \ + --gradient_accumulation_steps=1 \ + --learning_rate=5e-6 \ + --lr_scheduler="constant" \ + --lr_warmup_steps=0 \ + --num_class_images=200 \ + --max_train_steps=800 +``` + + +```bash +export MODEL_NAME="duongna/stable-diffusion-v1-4-flax" +export INSTANCE_DIR="path-to-instance-images" +export CLASS_DIR="path-to-class-images" +export OUTPUT_DIR="path-to-save-model" + +python train_dreambooth_flax.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --instance_data_dir=$INSTANCE_DIR \ + --class_data_dir=$CLASS_DIR \ + --output_dir=$OUTPUT_DIR \ + --with_prior_preservation --prior_loss_weight=1.0 \ + --instance_prompt="a photo of sks dog" \ + --class_prompt="a photo of dog" \ + --resolution=512 \ + --train_batch_size=1 \ + --learning_rate=5e-6 \ + --num_class_images=200 \ + --max_train_steps=800 +``` + + + +## ํ…์ŠคํŠธ ์ธ์ฝ”๋”์™€ and UNet๋กœ ํŒŒ์ธํŠœ๋‹ํ•˜๊ธฐ + +ํ•ด๋‹น ์Šคํฌ๋ฆฝํŠธ๋ฅผ ์‚ฌ์šฉํ•˜๋ฉด `unet`๊ณผ ํ•จ๊ป˜ `text_encoder`๋ฅผ ํŒŒ์ธํŠœ๋‹ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ์‹คํ—˜์—์„œ(์ž์„ธํ•œ ๋‚ด์šฉ์€ [๐Ÿงจ Diffusers๋ฅผ ์‚ฌ์šฉํ•ด DreamBooth๋กœ Stable Diffusion ํ•™์Šตํ•˜๊ธฐ](https://huggingface.co/blog/dreambooth) ๊ฒŒ์‹œ๋ฌผ์„ ํ™•์ธํ•˜์„ธ์š”), ํŠนํžˆ ์–ผ๊ตด ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑํ•  ๋•Œ ํ›จ์”ฌ ๋” ๋‚˜์€ ๊ฒฐ๊ณผ๋ฅผ ์–ป์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + + + +ํ…์ŠคํŠธ ์ธ์ฝ”๋”๋ฅผ ํ•™์Šต์‹œํ‚ค๋ ค๋ฉด ์ถ”๊ฐ€ ๋ฉ”๋ชจ๋ฆฌ๊ฐ€ ํ•„์š”ํ•ด 16GB GPU๋กœ๋Š” ๋™์ž‘ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ์ด ์˜ต์…˜์„ ์‚ฌ์šฉํ•˜๋ ค๋ฉด ์ตœ์†Œ 24GB VRAM์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. + + + +`--train_text_encoder` ์ธ์ˆ˜๋ฅผ ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ์— ์ „๋‹ฌํ•˜์—ฌ `text_encoder` ๋ฐ `unet`์„ ํŒŒ์ธํŠœ๋‹ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค: + + + +```bash +export MODEL_NAME="CompVis/stable-diffusion-v1-4" +export INSTANCE_DIR="path_to_training_images" +export CLASS_DIR="path_to_class_images" +export OUTPUT_DIR="path_to_saved_model" + +accelerate launch train_dreambooth.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --train_text_encoder \ + --instance_data_dir=$INSTANCE_DIR \ + --class_data_dir=$CLASS_DIR \ + --output_dir=$OUTPUT_DIR \ + --with_prior_preservation --prior_loss_weight=1.0 \ + --instance_prompt="a photo of sks dog" \ + --class_prompt="a photo of dog" \ + --resolution=512 \ + --train_batch_size=1 \ + --use_8bit_adam + --gradient_checkpointing \ + --learning_rate=2e-6 \ + --lr_scheduler="constant" \ + --lr_warmup_steps=0 \ + --num_class_images=200 \ + --max_train_steps=800 +``` + + +```bash +export MODEL_NAME="duongna/stable-diffusion-v1-4-flax" +export INSTANCE_DIR="path-to-instance-images" +export CLASS_DIR="path-to-class-images" +export OUTPUT_DIR="path-to-save-model" + +python train_dreambooth_flax.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --train_text_encoder \ + --instance_data_dir=$INSTANCE_DIR \ + --class_data_dir=$CLASS_DIR \ + --output_dir=$OUTPUT_DIR \ + --with_prior_preservation --prior_loss_weight=1.0 \ + --instance_prompt="a photo of sks dog" \ + --class_prompt="a photo of dog" \ + --resolution=512 \ + --train_batch_size=1 \ + --learning_rate=2e-6 \ + --num_class_images=200 \ + --max_train_steps=800 +``` + + + +## LoRA๋กœ ํŒŒ์ธํŠœ๋‹ํ•˜๊ธฐ + +DreamBooth์—์„œ ๋Œ€๊ทœ๋ชจ ๋ชจ๋ธ์˜ ํ•™์Šต์„ ๊ฐ€์†ํ™”ํ•˜๊ธฐ ์œ„ํ•œ ํŒŒ์ธํŠœ๋‹ ๊ธฐ์ˆ ์ธ LoRA(Low-Rank Adaptation of Large Language Models)๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ์ž์„ธํ•œ ๋‚ด์šฉ์€ [LoRA ํ•™์Šต](training/lora#dreambooth) ๊ฐ€์ด๋“œ๋ฅผ ์ฐธ์กฐํ•˜์„ธ์š”. + +### ํ•™์Šต ์ค‘ ์ฒดํฌํฌ์ธํŠธ ์ €์žฅํ•˜๊ธฐ + +Dreambooth๋กœ ํ›ˆ๋ จํ•˜๋Š” ๋™์•ˆ ๊ณผ์ ํ•ฉํ•˜๊ธฐ ์‰ฌ์šฐ๋ฏ€๋กœ, ๋•Œ๋•Œ๋กœ ํ•™์Šต ์ค‘์— ์ •๊ธฐ์ ์ธ ์ฒดํฌํฌ์ธํŠธ๋ฅผ ์ €์žฅํ•˜๋Š” ๊ฒƒ์ด ์œ ์šฉํ•ฉ๋‹ˆ๋‹ค. ์ค‘๊ฐ„ ์ฒดํฌํฌ์ธํŠธ ์ค‘ ํ•˜๋‚˜๊ฐ€ ์ตœ์ข… ๋ชจ๋ธ๋ณด๋‹ค ๋” ์ž˜ ์ž‘๋™ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค! ์ฒดํฌํฌ์ธํŠธ ์ €์žฅ ๊ธฐ๋Šฅ์„ ํ™œ์„ฑํ™”ํ•˜๋ ค๋ฉด ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ์— ๋‹ค์Œ ์ธ์ˆ˜๋ฅผ ์ „๋‹ฌํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค: + +```bash + --checkpointing_steps=500 +``` + +์ด๋ ‡๊ฒŒ ํ•˜๋ฉด `output_dir`์˜ ํ•˜์œ„ ํด๋”์— ์ „์ฒด ํ•™์Šต ์ƒํƒœ๊ฐ€ ์ €์žฅ๋ฉ๋‹ˆ๋‹ค. ํ•˜์œ„ ํด๋” ์ด๋ฆ„์€ ์ ‘๋‘์‚ฌ `checkpoint-`๋กœ ์‹œ์ž‘ํ•˜๊ณ  ์ง€๊ธˆ๊นŒ์ง€ ์ˆ˜ํ–‰๋œ step ์ˆ˜์ž…๋‹ˆ๋‹ค. ์˜ˆ์‹œ๋กœ `checkpoint-1500`์€ 1500 ํ•™์Šต step ํ›„์— ์ €์žฅ๋œ ์ฒดํฌํฌ์ธํŠธ์ž…๋‹ˆ๋‹ค. + +#### ์ €์žฅ๋œ ์ฒดํฌํฌ์ธํŠธ์—์„œ ํ›ˆ๋ จ ์žฌ๊ฐœํ•˜๊ธฐ + +์ €์žฅ๋œ ์ฒดํฌํฌ์ธํŠธ์—์„œ ํ›ˆ๋ จ์„ ์žฌ๊ฐœํ•˜๋ ค๋ฉด, `--resume_from_checkpoint` ์ธ์ˆ˜๋ฅผ ์ „๋‹ฌํ•œ ๋‹ค์Œ ์‚ฌ์šฉํ•  ์ฒดํฌํฌ์ธํŠธ์˜ ์ด๋ฆ„์„ ์ง€์ •ํ•˜๋ฉด ๋ฉ๋‹ˆ๋‹ค. ํŠน์ˆ˜ ๋ฌธ์ž์—ด `"latest"`๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ €์žฅ๋œ ๋งˆ์ง€๋ง‰ ์ฒดํฌํฌ์ธํŠธ(์ฆ‰, step ์ˆ˜๊ฐ€ ๊ฐ€์žฅ ๋งŽ์€ ์ฒดํฌํฌ์ธํŠธ)์—์„œ ์žฌ๊ฐœํ•  ์ˆ˜๋„ ์žˆ์Šต๋‹ˆ๋‹ค. ์˜ˆ๋ฅผ ๋“ค์–ด ๋‹ค์Œ์€ 1500 step ํ›„์— ์ €์žฅ๋œ ์ฒดํฌํฌ์ธํŠธ์—์„œ๋ถ€ํ„ฐ ํ•™์Šต์„ ์žฌ๊ฐœํ•ฉ๋‹ˆ๋‹ค: + +```bash + --resume_from_checkpoint="checkpoint-1500" +``` + +์›ํ•˜๋Š” ๊ฒฝ์šฐ ์ผ๋ถ€ ํ•˜์ดํผํŒŒ๋ผ๋ฏธํ„ฐ๋ฅผ ์กฐ์ •ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +#### ์ €์žฅ๋œ ์ฒดํฌํฌ์ธํŠธ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ถ”๋ก  ์ˆ˜ํ–‰ํ•˜๊ธฐ + +์ €์žฅ๋œ ์ฒดํฌํฌ์ธํŠธ๋Š” ํ›ˆ๋ จ ์žฌ๊ฐœ์— ์ ํ•ฉํ•œ ํ˜•์‹์œผ๋กœ ์ €์žฅ๋ฉ๋‹ˆ๋‹ค. ์—ฌ๊ธฐ์—๋Š” ๋ชจ๋ธ ๊ฐ€์ค‘์น˜๋ฟ๋งŒ ์•„๋‹ˆ๋ผ ์˜ตํ‹ฐ๋งˆ์ด์ €, ๋ฐ์ดํ„ฐ ๋กœ๋” ๋ฐ ํ•™์Šต๋ฅ ์˜ ์ƒํƒœ๋„ ํฌํ•จ๋ฉ๋‹ˆ๋‹ค. + +**`"accelerate>=0.16.0"`**์ด ์„ค์น˜๋œ ๊ฒฝ์šฐ ๋‹ค์Œ ์ฝ”๋“œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ค‘๊ฐ„ ์ฒดํฌํฌ์ธํŠธ์—์„œ ์ถ”๋ก ์„ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค. + +```python +from diffusers import DiffusionPipeline, UNet2DConditionModel +from transformers import CLIPTextModel +import torch + +# ํ•™์Šต์— ์‚ฌ์šฉ๋œ ๊ฒƒ๊ณผ ๋™์ผํ•œ ์ธ์ˆ˜(model, revision)๋กœ ํŒŒ์ดํ”„๋ผ์ธ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค. +model_id = "CompVis/stable-diffusion-v1-4" + +unet = UNet2DConditionModel.from_pretrained("/sddata/dreambooth/daruma-v2-1/checkpoint-100/unet") + +# `args.train_text_encoder`๋กœ ํ•™์Šตํ•œ ๊ฒฝ์šฐ๋ฉด ํ…์ŠคํŠธ ์ธ์ฝ”๋”๋ฅผ ๊ผญ ๋ถˆ๋Ÿฌ์˜ค์„ธ์š” +text_encoder = CLIPTextModel.from_pretrained("/sddata/dreambooth/daruma-v2-1/checkpoint-100/text_encoder") + +pipeline = DiffusionPipeline.from_pretrained(model_id, unet=unet, text_encoder=text_encoder, dtype=torch.float16) +pipeline.to("cuda") + +# ์ถ”๋ก ์„ ์ˆ˜ํ–‰ํ•˜๊ฑฐ๋‚˜ ์ €์žฅํ•˜๊ฑฐ๋‚˜, ํ—ˆ๋ธŒ์— ํ‘ธ์‹œํ•ฉ๋‹ˆ๋‹ค. +pipeline.save_pretrained("dreambooth-pipeline") +``` + +If you have **`"accelerate<0.16.0"`** installed, you need to convert it to an inference pipeline first: + +```python +from accelerate import Accelerator +from diffusers import DiffusionPipeline + +# ํ•™์Šต์— ์‚ฌ์šฉ๋œ ๊ฒƒ๊ณผ ๋™์ผํ•œ ์ธ์ˆ˜(model, revision)๋กœ ํŒŒ์ดํ”„๋ผ์ธ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค. +model_id = "CompVis/stable-diffusion-v1-4" +pipeline = DiffusionPipeline.from_pretrained(model_id) + +accelerator = Accelerator() + +# ์ดˆ๊ธฐ ํ•™์Šต์— `--train_text_encoder`๊ฐ€ ์‚ฌ์šฉ๋œ ๊ฒฝ์šฐ text_encoder๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. +unet, text_encoder = accelerator.prepare(pipeline.unet, pipeline.text_encoder) + +# ์ฒดํฌํฌ์ธํŠธ ๊ฒฝ๋กœ๋กœ๋ถ€ํ„ฐ ์ƒํƒœ๋ฅผ ๋ณต์›ํ•ฉ๋‹ˆ๋‹ค. ์—ฌ๊ธฐ์„œ๋Š” ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. +accelerator.load_state("/sddata/dreambooth/daruma-v2-1/checkpoint-100") + +# unwrapped ๋ชจ๋ธ๋กœ ํŒŒ์ดํ”„๋ผ์ธ์„ ๋‹ค์‹œ ๋นŒ๋“œํ•ฉ๋‹ˆ๋‹ค.(.unet and .text_encoder๋กœ์˜ ํ• ๋‹น๋„ ์ž‘๋™ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค) +pipeline = DiffusionPipeline.from_pretrained( + model_id, + unet=accelerator.unwrap_model(unet), + text_encoder=accelerator.unwrap_model(text_encoder), +) + +# ์ถ”๋ก ์„ ์ˆ˜ํ–‰ํ•˜๊ฑฐ๋‚˜ ์ €์žฅํ•˜๊ฑฐ๋‚˜, ํ—ˆ๋ธŒ์— ํ‘ธ์‹œํ•ฉ๋‹ˆ๋‹ค. +pipeline.save_pretrained("dreambooth-pipeline") +``` + +## ๊ฐ GPU ์šฉ๋Ÿ‰์—์„œ์˜ ์ตœ์ ํ™” + +ํ•˜๋“œ์›จ์–ด์— ๋”ฐ๋ผ 16GB์—์„œ 8GB๊นŒ์ง€ GPU์—์„œ DreamBooth๋ฅผ ์ตœ์ ํ™”ํ•˜๋Š” ๋ช‡ ๊ฐ€์ง€ ๋ฐฉ๋ฒ•์ด ์žˆ์Šต๋‹ˆ๋‹ค! + +### xFormers + +[xFormers](https://github.com/facebookresearch/xformers)๋Š” Transformers๋ฅผ ์ตœ์ ํ™”ํ•˜๊ธฐ ์œ„ํ•œ toolbox์ด๋ฉฐ, ๐Ÿงจ Diffusers์—์„œ ์‚ฌ์šฉ๋˜๋Š”[memory-efficient attention](https://facebookresearch.github.io/xformers/components/ops.html#module-xformers.ops) ๋ฉ”์ปค๋‹ˆ์ฆ˜์„ ํฌํ•จํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. [xFormers๋ฅผ ์„ค์น˜](./optimization/xformers)ํ•œ ๋‹ค์Œ ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ์— ๋‹ค์Œ ์ธ์ˆ˜๋ฅผ ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค: + +```bash + --enable_xformers_memory_efficient_attention +``` + +xFormers๋Š” Flax์—์„œ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. + +### ๊ทธ๋ž˜๋””์–ธํŠธ ์—†์Œ์œผ๋กœ ์„ค์ • + +๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰์„ ์ค„์ผ ์ˆ˜ ์žˆ๋Š” ๋˜ ๋‹ค๋ฅธ ๋ฐฉ๋ฒ•์€ [๊ธฐ์šธ๊ธฐ ์„ค์ •](https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html)์„ 0 ๋Œ€์‹  `None`์œผ๋กœ ํ•˜๋Š” ๊ฒƒ์ž…๋‹ˆ๋‹ค. ๊ทธ๋Ÿฌ๋‚˜ ์ด๋กœ ์ธํ•ด ํŠน์ • ๋™์ž‘์ด ๋ณ€๊ฒฝ๋  ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ๋ฌธ์ œ๊ฐ€ ๋ฐœ์ƒํ•˜๋ฉด ์ด ์ธ์ˆ˜๋ฅผ ์ œ๊ฑฐํ•ด ๋ณด์‹ญ์‹œ์˜ค. ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ์— ๋‹ค์Œ ์ธ์ˆ˜๋ฅผ ์ถ”๊ฐ€ํ•˜์—ฌ ๊ทธ๋ž˜๋””์–ธํŠธ๋ฅผ `None`์œผ๋กœ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค. + +```bash + --set_grads_to_none +``` + +### 16GB GPU + +Gradient checkpointing๊ณผ [bitsandbytes](https://github.com/TimDettmers/bitsandbytes)์˜ 8๋น„ํŠธ ์˜ตํ‹ฐ๋งˆ์ด์ €์˜ ๋„์›€์œผ๋กœ, 16GB GPU์—์„œ dreambooth๋ฅผ ํ›ˆ๋ จํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. bitsandbytes๊ฐ€ ์„ค์น˜๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”: + +```bash +pip install bitsandbytes +``` + +๊ทธ ๋‹ค์Œ, ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ์— `--use_8bit_adam` ์˜ต์…˜์„ ๋ช…์‹œํ•ฉ๋‹ˆ๋‹ค: + +```bash +export MODEL_NAME="CompVis/stable-diffusion-v1-4" +export INSTANCE_DIR="path_to_training_images" +export CLASS_DIR="path_to_class_images" +export OUTPUT_DIR="path_to_saved_model" + +accelerate launch train_dreambooth.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --instance_data_dir=$INSTANCE_DIR \ + --class_data_dir=$CLASS_DIR \ + --output_dir=$OUTPUT_DIR \ + --with_prior_preservation --prior_loss_weight=1.0 \ + --instance_prompt="a photo of sks dog" \ + --class_prompt="a photo of dog" \ + --resolution=512 \ + --train_batch_size=1 \ + --gradient_accumulation_steps=2 --gradient_checkpointing \ + --use_8bit_adam \ + --learning_rate=5e-6 \ + --lr_scheduler="constant" \ + --lr_warmup_steps=0 \ + --num_class_images=200 \ + --max_train_steps=800 +``` + +### 12GB GPU + +12GB GPU์—์„œ DreamBooth๋ฅผ ์‹คํ–‰ํ•˜๋ ค๋ฉด gradient checkpointing, 8๋น„ํŠธ ์˜ตํ‹ฐ๋งˆ์ด์ €, xFormers๋ฅผ ํ™œ์„ฑํ™”ํ•˜๊ณ  ๊ทธ๋ž˜๋””์–ธํŠธ๋ฅผ `None`์œผ๋กœ ์„ค์ •ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. + +```bash +export MODEL_NAME="CompVis/stable-diffusion-v1-4" +export INSTANCE_DIR="path-to-instance-images" +export CLASS_DIR="path-to-class-images" +export OUTPUT_DIR="path-to-save-model" + +accelerate launch train_dreambooth.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --instance_data_dir=$INSTANCE_DIR \ + --class_data_dir=$CLASS_DIR \ + --output_dir=$OUTPUT_DIR \ + --with_prior_preservation --prior_loss_weight=1.0 \ + --instance_prompt="a photo of sks dog" \ + --class_prompt="a photo of dog" \ + --resolution=512 \ + --train_batch_size=1 \ + --gradient_accumulation_steps=1 --gradient_checkpointing \ + --use_8bit_adam \ + --enable_xformers_memory_efficient_attention \ + --set_grads_to_none \ + --learning_rate=2e-6 \ + --lr_scheduler="constant" \ + --lr_warmup_steps=0 \ + --num_class_images=200 \ + --max_train_steps=800 +``` + +### 8GB GPU์—์„œ ํ•™์Šตํ•˜๊ธฐ + +8GB GPU์— ๋Œ€ํ•ด์„œ๋Š” [DeepSpeed](https://www.deepspeed.ai/)๋ฅผ ์‚ฌ์šฉํ•ด ์ผ๋ถ€ ํ…์„œ๋ฅผ VRAM์—์„œ CPU ๋˜๋Š” NVME๋กœ ์˜คํ”„๋กœ๋“œํ•˜์—ฌ ๋” ์ ์€ GPU ๋ฉ”๋ชจ๋ฆฌ๋กœ ํ•™์Šตํ•  ์ˆ˜๋„ ์žˆ์Šต๋‹ˆ๋‹ค. + +๐Ÿค— Accelerate ํ™˜๊ฒฝ์„ ๊ตฌ์„ฑํ•˜๋ ค๋ฉด ๋‹ค์Œ ๋ช…๋ น์„ ์‹คํ–‰ํ•˜์„ธ์š”: + +```bash +accelerate config +``` + +ํ™˜๊ฒฝ ๊ตฌ์„ฑ ์ค‘์— DeepSpeed๋ฅผ ์‚ฌ์šฉํ•  ๊ฒƒ์„ ํ™•์ธํ•˜์„ธ์š”. +๊ทธ๋Ÿฌ๋ฉด DeepSpeed stage 2, fp16 ํ˜ผํ•ฉ ์ •๋ฐ€๋„๋ฅผ ๊ฒฐํ•ฉํ•˜๊ณ  ๋ชจ๋ธ ๋งค๊ฐœ๋ณ€์ˆ˜์™€ ์˜ตํ‹ฐ๋งˆ์ด์ € ์ƒํƒœ๋ฅผ ๋ชจ๋‘ CPU๋กœ ์˜คํ”„๋กœ๋“œํ•˜๋ฉด 8GB VRAM ๋ฏธ๋งŒ์—์„œ ํ•™์Šตํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. +๋‹จ์ ์€ ๋” ๋งŽ์€ ์‹œ์Šคํ…œ RAM(์•ฝ 25GB)์ด ํ•„์š”ํ•˜๋‹ค๋Š” ๊ฒƒ์ž…๋‹ˆ๋‹ค. ์ถ”๊ฐ€ ๊ตฌ์„ฑ ์˜ต์…˜์€ [DeepSpeed ๋ฌธ์„œ](https://huggingface.co/docs/accelerate/usage_guides/deepspeed)๋ฅผ ์ฐธ์กฐํ•˜์„ธ์š”. + +๋˜ํ•œ ๊ธฐ๋ณธ Adam ์˜ตํ‹ฐ๋งˆ์ด์ €๋ฅผ DeepSpeed์˜ ์ตœ์ ํ™”๋œ Adam ๋ฒ„์ „์œผ๋กœ ๋ณ€๊ฒฝํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. +์ด๋Š” ์ƒ๋‹นํ•œ ์†๋„ ํ–ฅ์ƒ์„ ์œ„ํ•œ Adam์ธ [`deepspeed.ops.adam.DeepSpeedCPUAdam`](https://deepspeed.readthedocs.io/en/latest/optimizers.html#adam-cpu)์ž…๋‹ˆ๋‹ค. +`DeepSpeedCPUAdam`์„ ํ™œ์„ฑํ™”ํ•˜๋ ค๋ฉด ์‹œ์Šคํ…œ์˜ CUDA toolchain ๋ฒ„์ „์ด PyTorch์™€ ํ•จ๊ป˜ ์„ค์น˜๋œ ๊ฒƒ๊ณผ ๋™์ผํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. + +8๋น„ํŠธ ์˜ตํ‹ฐ๋งˆ์ด์ €๋Š” ํ˜„์žฌ DeepSpeed์™€ ํ˜ธํ™˜๋˜์ง€ ์•Š๋Š” ๊ฒƒ ๊ฐ™์Šต๋‹ˆ๋‹ค. + +๋‹ค์Œ ๋ช…๋ น์œผ๋กœ ํ•™์Šต์„ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค: + +```bash +export MODEL_NAME="CompVis/stable-diffusion-v1-4" +export INSTANCE_DIR="path_to_training_images" +export CLASS_DIR="path_to_class_images" +export OUTPUT_DIR="path_to_saved_model" + +accelerate launch train_dreambooth.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --instance_data_dir=$INSTANCE_DIR \ + --class_data_dir=$CLASS_DIR \ + --output_dir=$OUTPUT_DIR \ + --with_prior_preservation --prior_loss_weight=1.0 \ + --instance_prompt="a photo of sks dog" \ + --class_prompt="a photo of dog" \ + --resolution=512 \ + --train_batch_size=1 \ + --sample_batch_size=1 \ + --gradient_accumulation_steps=1 --gradient_checkpointing \ + --learning_rate=5e-6 \ + --lr_scheduler="constant" \ + --lr_warmup_steps=0 \ + --num_class_images=200 \ + --max_train_steps=800 \ + --mixed_precision=fp16 +``` + +## ์ถ”๋ก  + +๋ชจ๋ธ์„ ํ•™์Šตํ•œ ํ›„์—๋Š”, ๋ชจ๋ธ์ด ์ €์žฅ๋œ ๊ฒฝ๋กœ๋ฅผ ์ง€์ •ํ•ด [`StableDiffusionPipeline`]๋กœ ์ถ”๋ก ์„ ์ˆ˜ํ–‰ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ํ”„๋กฌํ”„ํŠธ์— ํ•™์Šต์— ์‚ฌ์šฉ๋œ ํŠน์ˆ˜ `์‹๋ณ„์ž`(์ด์ „ ์˜ˆ์‹œ์˜ `sks`)๊ฐ€ ํฌํ•จ๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”. + +**`"accelerate>=0.16.0"`**์ด ์„ค์น˜๋˜์–ด ์žˆ๋Š” ๊ฒฝ์šฐ ๋‹ค์Œ ์ฝ”๋“œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ค‘๊ฐ„ ์ฒดํฌํฌ์ธํŠธ์—์„œ ์ถ”๋ก ์„ ์‹คํ–‰ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค: + +```python +from diffusers import StableDiffusionPipeline +import torch + +model_id = "path_to_saved_model" +pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") + +prompt = "A photo of sks dog in a bucket" +image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5).images[0] + +image.save("dog-bucket.png") +``` + +[์ €์žฅ๋œ ํ•™์Šต ์ฒดํฌํฌ์ธํŠธ](#inference-from-a-saved-checkpoint)์—์„œ๋„ ์ถ”๋ก ์„ ์‹คํ–‰ํ•  ์ˆ˜๋„ ์žˆ์Šต๋‹ˆ๋‹ค. \ No newline at end of file diff --git a/docs/source/ko/training/lora.mdx b/docs/source/ko/training/lora.mdx new file mode 100644 index 000000000000..9aebb0fa3109 --- /dev/null +++ b/docs/source/ko/training/lora.mdx @@ -0,0 +1,128 @@ + + +# Low-Rank Adaptation of Large Language Models (LoRA) + +[[open-in-colab]] + + + +ํ˜„์žฌ LoRA๋Š” [`UNet2DConditionalModel`]์˜ ์–ดํ…์…˜ ๋ ˆ์ด์–ด์—์„œ๋งŒ ์ง€์›๋ฉ๋‹ˆ๋‹ค. + + + +[LoRA(Low-Rank Adaptation of Large Language Models)](https://arxiv.org/abs/2106.09685)๋Š” ๋ฉ”๋ชจ๋ฆฌ๋ฅผ ์ ๊ฒŒ ์‚ฌ์šฉํ•˜๋ฉด์„œ ๋Œ€๊ทœ๋ชจ ๋ชจ๋ธ์˜ ํ•™์Šต์„ ๊ฐ€์†ํ™”ํ•˜๋Š” ํ•™์Šต ๋ฐฉ๋ฒ•์ž…๋‹ˆ๋‹ค. ์ด๋Š” rank-decomposition weight ํ–‰๋ ฌ ์Œ(**์—…๋ฐ์ดํŠธ ํ–‰๋ ฌ**์ด๋ผ๊ณ  ํ•จ)์„ ์ถ”๊ฐ€ํ•˜๊ณ  ์ƒˆ๋กœ ์ถ”๊ฐ€๋œ ๊ฐ€์ค‘์น˜**๋งŒ** ํ•™์Šตํ•ฉ๋‹ˆ๋‹ค. ์—ฌ๊ธฐ์—๋Š” ๋ช‡ ๊ฐ€์ง€ ์žฅ์ ์ด ์žˆ์Šต๋‹ˆ๋‹ค. + +- ์ด์ „์— ๋ฏธ๋ฆฌ ํ•™์Šต๋œ ๊ฐ€์ค‘์น˜๋Š” ๊ณ ์ •๋œ ์ƒํƒœ๋กœ ์œ ์ง€๋˜๋ฏ€๋กœ ๋ชจ๋ธ์ด [์น˜๋ช…์ ์ธ ๋ง๊ฐ](https://www.pnas.org/doi/10.1073/pnas.1611835114) ๊ฒฝํ–ฅ์ด ์—†์Šต๋‹ˆ๋‹ค. +- Rank-decomposition ํ–‰๋ ฌ์€ ์›๋ž˜ ๋ชจ๋ธ๋ณด๋‹ค ํŒŒ๋ผ๋ฉ”ํ„ฐ ์ˆ˜๊ฐ€ ํ›จ์”ฌ ์ ์œผ๋ฏ€๋กœ ํ•™์Šต๋œ LoRA ๊ฐ€์ค‘์น˜๋ฅผ ์‰ฝ๊ฒŒ ๋ผ์›Œ๋„ฃ์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. +- LoRA ๋งคํŠธ๋ฆญ์Šค๋Š” ์ผ๋ฐ˜์ ์œผ๋กœ ์›๋ณธ ๋ชจ๋ธ์˜ ์–ดํ…์…˜ ๋ ˆ์ด์–ด์— ์ถ”๊ฐ€๋ฉ๋‹ˆ๋‹ค. ๐Ÿงจ Diffusers๋Š” [`~diffusers.loaders.UNet2DConditionLoadersMixin.load_attn_procs`] ๋ฉ”์„œ๋“œ๋ฅผ ์ œ๊ณตํ•˜์—ฌ LoRA ๊ฐ€์ค‘์น˜๋ฅผ ๋ชจ๋ธ์˜ ์–ดํ…์…˜ ๋ ˆ์ด์–ด๋กœ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค. `scale` ๋งค๊ฐœ๋ณ€์ˆ˜๋ฅผ ํ†ตํ•ด ๋ชจ๋ธ์ด ์ƒˆ๋กœ์šด ํ•™์Šต ์ด๋ฏธ์ง€์— ๋งž๊ฒŒ ์กฐ์ •๋˜๋Š” ๋ฒ”์œ„๋ฅผ ์ œ์–ดํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. +- ๋ฉ”๋ชจ๋ฆฌ ํšจ์œจ์„ฑ์ด ํ–ฅ์ƒ๋˜์–ด Tesla T4, RTX 3080 ๋˜๋Š” RTX 2080 Ti์™€ ๊ฐ™์€ ์†Œ๋น„์ž์šฉ GPU์—์„œ ํŒŒ์ธํŠœ๋‹์„ ์‹คํ–‰ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค! T4์™€ ๊ฐ™์€ GPU๋Š” ๋ฌด๋ฃŒ์ด๋ฉฐ Kaggle ๋˜๋Š” Google Colab ๋…ธํŠธ๋ถ์—์„œ ์‰ฝ๊ฒŒ ์•ก์„ธ์Šคํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + + + + +๐Ÿ’ก LoRA๋Š” ์–ดํ…์…˜ ๋ ˆ์ด์–ด์—๋งŒ ํ•œ์ •๋˜์ง€๋Š” ์•Š์Šต๋‹ˆ๋‹ค. ์ €์ž๋Š” ์–ธ์–ด ๋ชจ๋ธ์˜ ์–ดํ…์…˜ ๋ ˆ์ด์–ด๋ฅผ ์ˆ˜์ •ํ•˜๋Š” ๊ฒƒ์ด ๋งค์šฐ ํšจ์œจ์ ์œผ๋กœ ์ฃป์€ ์„ฑ๋Šฅ์„ ์–ป๊ธฐ์— ์ถฉ๋ถ„ํ•˜๋‹ค๋Š” ๊ฒƒ์„ ๋ฐœ๊ฒฌํ–ˆ์Šต๋‹ˆ๋‹ค. ์ด๊ฒƒ์ด LoRA ๊ฐ€์ค‘์น˜๋ฅผ ๋ชจ๋ธ์˜ ์–ดํ…์…˜ ๋ ˆ์ด์–ด์— ์ถ”๊ฐ€ํ•˜๋Š” ๊ฒƒ์ด ์ผ๋ฐ˜์ ์ธ ์ด์œ ์ž…๋‹ˆ๋‹ค. LoRA ์ž‘๋™ ๋ฐฉ์‹์— ๋Œ€ํ•œ ์ž์„ธํ•œ ๋‚ด์šฉ์€ [Using LoRA for effective Stable Diffusion fine-tuning](https://huggingface.co/blog/lora) ๋ธ”๋กœ๊ทธ๋ฅผ ํ™•์ธํ•˜์„ธ์š”! + + + +[cloneofsimo](https://github.com/cloneofsimo)๋Š” ์ธ๊ธฐ ์žˆ๋Š” [lora](https://github.com/cloneofsimo/lora) GitHub ๋ฆฌํฌ์ง€ํ† ๋ฆฌ์—์„œ Stable Diffusion์„ ์œ„ํ•œ LoRA ํ•™์Šต์„ ์ตœ์ดˆ๋กœ ์‹œ๋„ํ–ˆ์Šต๋‹ˆ๋‹ค. ๐Ÿงจ Diffusers๋Š” [text-to-image ์ƒ์„ฑ](https://github.com/huggingface/diffusers/tree/main/examples/text_to_image#training-with-lora) ๋ฐ [DreamBooth](https://github.com/huggingface/diffusers/tree/main/examples/dreambooth#training-with-low-rank-adaptation-of-large-language-models-lora)์„ ์ง€์›ํ•ฉ๋‹ˆ๋‹ค. ์ด ๊ฐ€์ด๋“œ๋Š” ๋‘ ๊ฐ€์ง€๋ฅผ ๋ชจ๋‘ ์ˆ˜ํ–‰ํ•˜๋Š” ๋ฐฉ๋ฒ•์„ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค. + +๋ชจ๋ธ์„ ์ €์žฅํ•˜๊ฑฐ๋‚˜ ์ปค๋ฎค๋‹ˆํ‹ฐ์™€ ๊ณต์œ ํ•˜๋ ค๋ฉด Hugging Face ๊ณ„์ •์— ๋กœ๊ทธ์ธํ•˜์„ธ์š”(์•„์ง ๊ณ„์ •์ด ์—†๋Š” ๊ฒฝ์šฐ [์ƒ์„ฑ](hf.co/join)ํ•˜์„ธ์š”): + +```bash +huggingface-cli login +``` + +## Text-to-image + +์ˆ˜์‹ญ์–ต ๊ฐœ์˜ ํŒŒ๋ผ๋ฉ”ํ„ฐ๋“ค์ด ์žˆ๋Š” Stable Diffusion๊ณผ ๊ฐ™์€ ๋ชจ๋ธ์„ ํŒŒ์ธํŠœ๋‹ํ•˜๋Š” ๊ฒƒ์€ ๋Š๋ฆฌ๊ณ  ์–ด๋ ค์šธ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. LoRA๋ฅผ ์‚ฌ์šฉํ•˜๋ฉด diffusion ๋ชจ๋ธ์„ ํŒŒ์ธํŠœ๋‹ํ•˜๋Š” ๊ฒƒ์ด ํ›จ์”ฌ ์‰ฝ๊ณ  ๋น ๋ฆ…๋‹ˆ๋‹ค. 8๋น„ํŠธ ์˜ตํ‹ฐ๋งˆ์ด์ €์™€ ๊ฐ™์€ ํŠธ๋ฆญ์— ์˜์กดํ•˜์ง€ ์•Š๊ณ ๋„ 11GB์˜ GPU RAM์œผ๋กœ ํ•˜๋“œ์›จ์–ด์—์„œ ์‹คํ–‰ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + + +### ํ•™์Šต [[text-to-image ํ•™์Šต]] + +[Pokรฉmon BLIP ์บก์…˜](https://huggingface.co/datasets/lambdalabs/pokemon-blip-captions) ๋ฐ์ดํ„ฐ์…‹์œผ๋กœ [`stable-diffusion-v1-5`](https://huggingface.co/runwayml/stable-diffusion-v1-5)๋ฅผ ํŒŒ์ธํŠœ๋‹ํ•ด ๋‚˜๋งŒ์˜ ํฌ์ผ“๋ชฌ์„ ์ƒ์„ฑํ•ด ๋ณด๊ฒ ์Šต๋‹ˆ๋‹ค. + +์‹œ์ž‘ํ•˜๋ ค๋ฉด `MODEL_NAME` ๋ฐ `DATASET_NAME` ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธํ•˜์‹ญ์‹œ์˜ค. `OUTPUT_DIR` ๋ฐ `HUB_MODEL_ID` ๋ณ€์ˆ˜๋Š” ์„ ํƒ ์‚ฌํ•ญ์ด๋ฉฐ ํ—ˆ๋ธŒ์—์„œ ๋ชจ๋ธ์„ ์ €์žฅํ•  ์œ„์น˜๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค. + +```bash +export MODEL_NAME="runwayml/stable-diffusion-v1-5" +export OUTPUT_DIR="/sddata/finetune/lora/pokemon" +export HUB_MODEL_ID="pokemon-lora" +export DATASET_NAME="lambdalabs/pokemon-blip-captions" +``` + +ํ•™์Šต์„ ์‹œ์ž‘ํ•˜๊ธฐ ์ „์— ์•Œ์•„์•ผ ํ•  ๋ช‡ ๊ฐ€์ง€ ํ”Œ๋ž˜๊ทธ๊ฐ€ ์žˆ์Šต๋‹ˆ๋‹ค. + +* `--push_to_hub`๋ฅผ ๋ช…์‹œํ•˜๋ฉด ํ•™์Šต๋œ LoRA ์ž„๋ฒ ๋”ฉ์„ ํ—ˆ๋ธŒ์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค. +* `--report_to=wandb`๋Š” ํ•™์Šต ๊ฒฐ๊ณผ๋ฅผ ๊ฐ€์ค‘์น˜ ๋ฐ ํŽธํ–ฅ ๋Œ€์‹œ๋ณด๋“œ์— ๋ณด๊ณ ํ•˜๊ณ  ๊ธฐ๋กํ•ฉ๋‹ˆ๋‹ค(์˜ˆ๋ฅผ ๋“ค์–ด, ์ด [๋ณด๊ณ ์„œ](https://wandb.ai/pcuenq/text2image-fine-tune/run/b4k1w0tn?workspace=user-pcuenq)๋ฅผ ์ฐธ์กฐํ•˜์„ธ์š”). +* `--learning_rate=1e-04`, ์ผ๋ฐ˜์ ์œผ๋กœ LoRA์—์„œ ์‚ฌ์šฉํ•˜๋Š” ๊ฒƒ๋ณด๋‹ค ๋” ๋†’์€ ํ•™์Šต๋ฅ ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +์ด์ œ ํ•™์Šต์„ ์‹œ์ž‘ํ•  ์ค€๋น„๊ฐ€ ๋˜์—ˆ์Šต๋‹ˆ๋‹ค (์ „์ฒด ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ๋Š” [์—ฌ๊ธฐ](https://github.com/huggingface/diffusers/blob/main/examples/text_to_image/train_text_to_image_lora.py)์—์„œ ์ฐพ์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค). + +```bash +accelerate launch train_dreambooth_lora.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --instance_data_dir=$INSTANCE_DIR \ + --output_dir=$OUTPUT_DIR \ + --instance_prompt="a photo of sks dog" \ + --resolution=512 \ + --train_batch_size=1 \ + --gradient_accumulation_steps=1 \ + --checkpointing_steps=100 \ + --learning_rate=1e-4 \ + --report_to="wandb" \ + --lr_scheduler="constant" \ + --lr_warmup_steps=0 \ + --max_train_steps=500 \ + --validation_prompt="A photo of sks dog in a bucket" \ + --validation_epochs=50 \ + --seed="0" \ + --push_to_hub +``` + +### ์ถ”๋ก  [[dreambooth ์ถ”๋ก ]] + +์ด์ œ [`StableDiffusionPipeline`]์—์„œ ๊ธฐ๋ณธ ๋ชจ๋ธ์„ ๋ถˆ๋Ÿฌ์™€ ์ถ”๋ก ์„ ์œ„ํ•ด ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค: + +```py +>>> import torch +>>> from diffusers import StableDiffusionPipeline + +>>> model_base = "runwayml/stable-diffusion-v1-5" + +>>> pipe = StableDiffusionPipeline.from_pretrained(model_base, torch_dtype=torch.float16) +``` + +*๊ธฐ๋ณธ ๋ชจ๋ธ์˜ ๊ฐ€์ค‘์น˜ ์œ„์—* ํŒŒ์ธํŠœ๋‹๋œ DreamBooth ๋ชจ๋ธ์—์„œ LoRA ๊ฐ€์ค‘์น˜๋ฅผ ๋กœ๋“œํ•œ ๋‹ค์Œ, ๋” ๋น ๋ฅธ ์ถ”๋ก ์„ ์œ„ํ•ด ํŒŒ์ดํ”„๋ผ์ธ์„ GPU๋กœ ์ด๋™ํ•ฉ๋‹ˆ๋‹ค. LoRA ๊ฐ€์ค‘์น˜๋ฅผ ํ”„๋ฆฌ์ง•๋œ ์‚ฌ์ „ ํ›ˆ๋ จ๋œ ๋ชจ๋ธ ๊ฐ€์ค‘์น˜์™€ ๋ณ‘ํ•ฉํ•  ๋•Œ, ์„ ํƒ์ ์œผ๋กœ 'scale' ๋งค๊ฐœ๋ณ€์ˆ˜๋กœ ์–ด๋Š ์ •๋„์˜ ๊ฐ€์ค‘์น˜๋ฅผ ๋ณ‘ํ•ฉํ•  ์ง€ ์กฐ์ ˆํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค: + + + +๐Ÿ’ก `0`์˜ `scale` ๊ฐ’์€ LoRA ๊ฐ€์ค‘์น˜๋ฅผ ์‚ฌ์šฉํ•˜์ง€ ์•Š์•„ ์›๋ž˜ ๋ชจ๋ธ์˜ ๊ฐ€์ค‘์น˜๋งŒ ์‚ฌ์šฉํ•œ ๊ฒƒ๊ณผ ๊ฐ™๊ณ , `1`์˜ `scale` ๊ฐ’์€ ํŒŒ์ธํŠœ๋‹๋œ LoRA ๊ฐ€์ค‘์น˜๋งŒ ์‚ฌ์šฉํ•จ์„ ์˜๋ฏธํ•ฉ๋‹ˆ๋‹ค. 0๊ณผ 1 ์‚ฌ์ด์˜ ๊ฐ’๋“ค์€ ๋‘ ๊ฒฐ๊ณผ๋“ค ์‚ฌ์ด๋กœ ๋ณด๊ฐ„๋ฉ๋‹ˆ๋‹ค. + + + +```py +>>> pipe.unet.load_attn_procs(model_path) +>>> pipe.to("cuda") +# LoRA ํŒŒ์ธํŠœ๋‹๋œ ๋ชจ๋ธ์˜ ๊ฐ€์ค‘์น˜ ์ ˆ๋ฐ˜๊ณผ ๊ธฐ๋ณธ ๋ชจ๋ธ์˜ ๊ฐ€์ค‘์น˜ ์ ˆ๋ฐ˜ ์‚ฌ์šฉ + +>>> image = pipe( +... "A picture of a sks dog in a bucket.", +... num_inference_steps=25, +... guidance_scale=7.5, +... cross_attention_kwargs={"scale": 0.5}, +... ).images[0] +# ์™„์ „ํžˆ ํŒŒ์ธํŠœ๋‹๋œ LoRA ๋ชจ๋ธ์˜ ๊ฐ€์ค‘์น˜ ์‚ฌ์šฉ + +>>> image = pipe("A picture of a sks dog in a bucket.", num_inference_steps=25, guidance_scale=7.5).images[0] +>>> image.save("bucket-dog.png") +``` \ No newline at end of file diff --git a/docs/source/ko/training/text2image.mdx b/docs/source/ko/training/text2image.mdx new file mode 100644 index 000000000000..069388603124 --- /dev/null +++ b/docs/source/ko/training/text2image.mdx @@ -0,0 +1,224 @@ + + + +# Text-to-image + + + +text-to-image ํŒŒ์ธํŠœ๋‹ ์Šคํฌ๋ฆฝํŠธ๋Š” experimental ์ƒํƒœ์ž…๋‹ˆ๋‹ค. ๊ณผ์ ํ•ฉํ•˜๊ธฐ ์‰ฝ๊ณ  ์น˜๋ช…์ ์ธ ๋ง๊ฐ๊ณผ ๊ฐ™์€ ๋ฌธ์ œ์— ๋ถ€๋”ชํžˆ๊ธฐ ์‰ฝ์Šต๋‹ˆ๋‹ค. ์ž์ฒด ๋ฐ์ดํ„ฐ์…‹์—์„œ ์ตœ์ƒ์˜ ๊ฒฐ๊ณผ๋ฅผ ์–ป์œผ๋ ค๋ฉด ๋‹ค์–‘ํ•œ ํ•˜์ดํผํŒŒ๋ผ๋ฏธํ„ฐ๋ฅผ ํƒ์ƒ‰ํ•˜๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. + + + +Stable Diffusion๊ณผ ๊ฐ™์€ text-to-image ๋ชจ๋ธ์€ ํ…์ŠคํŠธ ํ”„๋กฌํ”„ํŠธ์—์„œ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค. ์ด ๊ฐ€์ด๋“œ๋Š” PyTorch ๋ฐ Flax๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ž์ฒด ๋ฐ์ดํ„ฐ์…‹์—์„œ [`CompVis/stable-diffusion-v1-4`](https://huggingface.co/CompVis/stable-diffusion-v1-4) ๋ชจ๋ธ๋กœ ํŒŒ์ธํŠœ๋‹ํ•˜๋Š” ๋ฐฉ๋ฒ•์„ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค. ์ด ๊ฐ€์ด๋“œ์— ์‚ฌ์šฉ๋œ text-to-image ํŒŒ์ธํŠœ๋‹์„ ์œ„ํ•œ ๋ชจ๋“  ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ์— ๊ด€์‹ฌ์ด ์žˆ๋Š” ๊ฒฝ์šฐ ์ด [๋ฆฌํฌ์ง€ํ† ๋ฆฌ](https://github.com/huggingface/diffusers/tree/main/examples/text_to_image)์—์„œ ์ž์„ธํžˆ ์ฐพ์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +์Šคํฌ๋ฆฝํŠธ๋ฅผ ์‹คํ–‰ํ•˜๊ธฐ ์ „์—, ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ์˜ ํ•™์Šต dependency๋“ค์„ ์„ค์น˜ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค: + +```bash +pip install git+https://github.com/huggingface/diffusers.git +pip install -U -r requirements.txt +``` + +๊ทธ๋ฆฌ๊ณ  [๐Ÿค—Accelerate](https://github.com/huggingface/accelerate/) ํ™˜๊ฒฝ์„ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค: + +```bash +accelerate config +``` + +๋ฆฌํฌ์ง€ํ† ๋ฆฌ๋ฅผ ์ด๋ฏธ ๋ณต์ œํ•œ ๊ฒฝ์šฐ, ์ด ๋‹จ๊ณ„๋ฅผ ์ˆ˜ํ–‰ํ•  ํ•„์š”๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. ๋Œ€์‹ , ๋กœ์ปฌ ์ฒดํฌ์•„์›ƒ ๊ฒฝ๋กœ๋ฅผ ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ์— ๋ช…์‹œํ•  ์ˆ˜ ์žˆ์œผ๋ฉฐ ๊ฑฐ๊ธฐ์—์„œ ๋กœ๋“œ๋ฉ๋‹ˆ๋‹ค. + +### ํ•˜๋“œ์›จ์–ด ์š”๊ตฌ ์‚ฌํ•ญ + +`gradient_checkpointing` ๋ฐ `mixed_precision`์„ ์‚ฌ์šฉํ•˜๋ฉด ๋‹จ์ผ 24GB GPU์—์„œ ๋ชจ๋ธ์„ ํŒŒ์ธํŠœ๋‹ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ๋” ๋†’์€ `batch_size`์™€ ๋” ๋น ๋ฅธ ํ›ˆ๋ จ์„ ์œ„ํ•ด์„œ๋Š” GPU ๋ฉ”๋ชจ๋ฆฌ๊ฐ€ 30GB ์ด์ƒ์ธ GPU๋ฅผ ์‚ฌ์šฉํ•˜๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. TPU ๋˜๋Š” GPU์—์„œ ํŒŒ์ธํŠœ๋‹์„ ์œ„ํ•ด JAX๋‚˜ Flax๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜๋„ ์žˆ์Šต๋‹ˆ๋‹ค. ์ž์„ธํ•œ ๋‚ด์šฉ์€ [์•„๋ž˜](#flax-jax-finetuning)๋ฅผ ์ฐธ์กฐํ•˜์„ธ์š”. + +xFormers๋กœ memory efficient attention์„ ํ™œ์„ฑํ™”ํ•˜์—ฌ ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ํ›จ์”ฌ ๋” ์ค„์ผ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. [xFormers๊ฐ€ ์„ค์น˜](./optimization/xformers)๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธํ•˜๊ณ  `--enable_xformers_memory_efficient_attention`๋ฅผ ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ์— ๋ช…์‹œํ•ฉ๋‹ˆ๋‹ค. + +xFormers๋Š” Flax์— ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. + +## Hub์— ๋ชจ๋ธ ์—…๋กœ๋“œํ•˜๊ธฐ + +ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ์— ๋‹ค์Œ ์ธ์ˆ˜๋ฅผ ์ถ”๊ฐ€ํ•˜์—ฌ ๋ชจ๋ธ์„ ํ—ˆ๋ธŒ์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค: + +```bash + --push_to_hub +``` + + +## ์ฒดํฌํฌ์ธํŠธ ์ €์žฅ ๋ฐ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ + +ํ•™์Šต ์ค‘ ๋ฐœ์ƒํ•  ์ˆ˜ ์žˆ๋Š” ์ผ์— ๋Œ€๋น„ํ•˜์—ฌ ์ •๊ธฐ์ ์œผ๋กœ ์ฒดํฌํฌ์ธํŠธ๋ฅผ ์ €์žฅํ•ด ๋‘๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค. ์ฒดํฌํฌ์ธํŠธ๋ฅผ ์ €์žฅํ•˜๋ ค๋ฉด ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ์— ๋‹ค์Œ ์ธ์ˆ˜๋ฅผ ๋ช…์‹œํ•ฉ๋‹ˆ๋‹ค. + +```bash + --checkpointing_steps=500 +``` + +500์Šคํ…๋งˆ๋‹ค ์ „์ฒด ํ•™์Šต state๊ฐ€ 'output_dir'์˜ ํ•˜์œ„ ํด๋”์— ์ €์žฅ๋ฉ๋‹ˆ๋‹ค. ์ฒดํฌํฌ์ธํŠธ๋Š” 'checkpoint-'์— ์ง€๊ธˆ๊นŒ์ง€ ํ•™์Šต๋œ step ์ˆ˜์ž…๋‹ˆ๋‹ค. ์˜ˆ๋ฅผ ๋“ค์–ด 'checkpoint-1500'์€ 1500 ํ•™์Šต step ํ›„์— ์ €์žฅ๋œ ์ฒดํฌํฌ์ธํŠธ์ž…๋‹ˆ๋‹ค. + +ํ•™์Šต์„ ์žฌ๊ฐœํ•˜๊ธฐ ์œ„ํ•ด ์ฒดํฌํฌ์ธํŠธ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋ ค๋ฉด '--resume_from_checkpoint' ์ธ์ˆ˜๋ฅผ ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ์— ๋ช…์‹œํ•˜๊ณ  ์žฌ๊ฐœํ•  ์ฒดํฌํฌ์ธํŠธ๋ฅผ ์ง€์ •ํ•˜์‹ญ์‹œ์˜ค. ์˜ˆ๋ฅผ ๋“ค์–ด ๋‹ค์Œ ์ธ์ˆ˜๋Š” 1500๊ฐœ์˜ ํ•™์Šต step ํ›„์— ์ €์žฅ๋œ ์ฒดํฌํฌ์ธํŠธ์—์„œ๋ถ€ํ„ฐ ํ›ˆ๋ จ์„ ์žฌ๊ฐœํ•ฉ๋‹ˆ๋‹ค. + +```bash + --resume_from_checkpoint="checkpoint-1500" +``` + +## ํŒŒ์ธํŠœ๋‹ + + + +๋‹ค์Œ๊ณผ ๊ฐ™์ด [Pokรฉmon BLIP ์บก์…˜](https://huggingface.co/datasets/lambdalabs/pokemon-blip-captions) ๋ฐ์ดํ„ฐ์…‹์—์„œ ํŒŒ์ธํŠœ๋‹ ์‹คํ–‰์„ ์œ„ํ•ด [PyTorch ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ](https://github.com/huggingface/diffusers/blob/main/examples/text_to_image/train_text_to_image.py)๋ฅผ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค: + + +```bash +export MODEL_NAME="CompVis/stable-diffusion-v1-4" +export dataset_name="lambdalabs/pokemon-blip-captions" + +accelerate launch train_text_to_image.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --dataset_name=$dataset_name \ + --use_ema \ + --resolution=512 --center_crop --random_flip \ + --train_batch_size=1 \ + --gradient_accumulation_steps=4 \ + --gradient_checkpointing \ + --mixed_precision="fp16" \ + --max_train_steps=15000 \ + --learning_rate=1e-05 \ + --max_grad_norm=1 \ + --lr_scheduler="constant" --lr_warmup_steps=0 \ + --output_dir="sd-pokemon-model" +``` + +์ž์ฒด ๋ฐ์ดํ„ฐ์…‹์œผ๋กœ ํŒŒ์ธํŠœ๋‹ํ•˜๋ ค๋ฉด ๐Ÿค— [Datasets](https://huggingface.co/docs/datasets/index)์—์„œ ์š”๊ตฌํ•˜๋Š” ํ˜•์‹์— ๋”ฐ๋ผ ๋ฐ์ดํ„ฐ์…‹์„ ์ค€๋น„ํ•˜์„ธ์š”. [๋ฐ์ดํ„ฐ์…‹์„ ํ—ˆ๋ธŒ์— ์—…๋กœ๋“œ](https://huggingface.co/docs/datasets/image_dataset#upload-dataset-to-the-hub)ํ•˜๊ฑฐ๋‚˜ [ํŒŒ์ผ๋“ค์ด ์žˆ๋Š” ๋กœ์ปฌ ํด๋”๋ฅผ ์ค€๋น„](https ://huggingface.co/docs/datasets/image_dataset#imagefolder)ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +์‚ฌ์šฉ์ž ์ปค์Šคํ…€ loading logic์„ ์‚ฌ์šฉํ•˜๋ ค๋ฉด ์Šคํฌ๋ฆฝํŠธ๋ฅผ ์ˆ˜์ •ํ•˜์‹ญ์‹œ์˜ค. ๋„์›€์ด ๋˜๋„๋ก ์ฝ”๋“œ์˜ ์ ์ ˆํ•œ ์œ„์น˜์— ํฌ์ธํ„ฐ๋ฅผ ๋‚จ๊ฒผ์Šต๋‹ˆ๋‹ค. ๐Ÿค— ์•„๋ž˜ ์˜ˆ์ œ ์Šคํฌ๋ฆฝํŠธ๋Š” `TRAIN_DIR`์˜ ๋กœ์ปฌ ๋ฐ์ดํ„ฐ์…‹์œผ๋กœ๋ฅผ ํŒŒ์ธํŠœ๋‹ํ•˜๋Š” ๋ฐฉ๋ฒ•๊ณผ `OUTPUT_DIR`์—์„œ ๋ชจ๋ธ์„ ์ €์žฅํ•  ์œ„์น˜๋ฅผ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค: + + +```bash +export MODEL_NAME="CompVis/stable-diffusion-v1-4" +export TRAIN_DIR="path_to_your_dataset" +export OUTPUT_DIR="path_to_save_model" + +accelerate launch train_text_to_image.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --train_data_dir=$TRAIN_DIR \ + --use_ema \ + --resolution=512 --center_crop --random_flip \ + --train_batch_size=1 \ + --gradient_accumulation_steps=4 \ + --gradient_checkpointing \ + --mixed_precision="fp16" \ + --max_train_steps=15000 \ + --learning_rate=1e-05 \ + --max_grad_norm=1 \ + --lr_scheduler="constant" --lr_warmup_steps=0 \ + --output_dir=${OUTPUT_DIR} +``` + + + +[@duongna211](https://github.com/duongna21)์˜ ๊ธฐ์—ฌ๋กœ, Flax๋ฅผ ์‚ฌ์šฉํ•ด TPU ๋ฐ GPU์—์„œ Stable Diffusion ๋ชจ๋ธ์„ ๋” ๋น ๋ฅด๊ฒŒ ํ•™์Šตํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ์ด๋Š” TPU ํ•˜๋“œ์›จ์–ด์—์„œ ๋งค์šฐ ํšจ์œจ์ ์ด์ง€๋งŒ GPU์—์„œ๋„ ํ›Œ๋ฅญํ•˜๊ฒŒ ์ž‘๋™ํ•ฉ๋‹ˆ๋‹ค. Flax ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ๋Š” gradient checkpointing๋‚˜ gradient accumulation๊ณผ ๊ฐ™์€ ๊ธฐ๋Šฅ์„ ์•„์ง ์ง€์›ํ•˜์ง€ ์•Š์œผ๋ฏ€๋กœ ๋ฉ”๋ชจ๋ฆฌ๊ฐ€ 30GB ์ด์ƒ์ธ GPU ๋˜๋Š” TPU v3๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. + +์Šคํฌ๋ฆฝํŠธ๋ฅผ ์‹คํ–‰ํ•˜๊ธฐ ์ „์— ์š”๊ตฌ ์‚ฌํ•ญ์ด ์„ค์น˜๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธํ•˜์‹ญ์‹œ์˜ค: + +```bash +pip install -U -r requirements_flax.txt +``` + +๊ทธ๋Ÿฌ๋ฉด ๋‹ค์Œ๊ณผ ๊ฐ™์ด [Flax ํ•™์Šต ์Šคํฌ๋ฆฝํŠธ](https://github.com/huggingface/diffusers/blob/main/examples/text_to_image/train_text_to_image_flax.py)๋ฅผ ์‹คํ–‰ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +```bash +export MODEL_NAME="runwayml/stable-diffusion-v1-5" +export dataset_name="lambdalabs/pokemon-blip-captions" + +python train_text_to_image_flax.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --dataset_name=$dataset_name \ + --resolution=512 --center_crop --random_flip \ + --train_batch_size=1 \ + --max_train_steps=15000 \ + --learning_rate=1e-05 \ + --max_grad_norm=1 \ + --output_dir="sd-pokemon-model" +``` + +์ž์ฒด ๋ฐ์ดํ„ฐ์…‹์œผ๋กœ ํŒŒ์ธํŠœ๋‹ํ•˜๋ ค๋ฉด ๐Ÿค— [Datasets](https://huggingface.co/docs/datasets/index)์—์„œ ์š”๊ตฌํ•˜๋Š” ํ˜•์‹์— ๋”ฐ๋ผ ๋ฐ์ดํ„ฐ์…‹์„ ์ค€๋น„ํ•˜์„ธ์š”. [๋ฐ์ดํ„ฐ์…‹์„ ํ—ˆ๋ธŒ์— ์—…๋กœ๋“œ](https://huggingface.co/docs/datasets/image_dataset#upload-dataset-to-the-hub)ํ•˜๊ฑฐ๋‚˜ [ํŒŒ์ผ๋“ค์ด ์žˆ๋Š” ๋กœ์ปฌ ํด๋”๋ฅผ ์ค€๋น„](https ://huggingface.co/docs/datasets/image_dataset#imagefolder)ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +์‚ฌ์šฉ์ž ์ปค์Šคํ…€ loading logic์„ ์‚ฌ์šฉํ•˜๋ ค๋ฉด ์Šคํฌ๋ฆฝํŠธ๋ฅผ ์ˆ˜์ •ํ•˜์‹ญ์‹œ์˜ค. ๋„์›€์ด ๋˜๋„๋ก ์ฝ”๋“œ์˜ ์ ์ ˆํ•œ ์œ„์น˜์— ํฌ์ธํ„ฐ๋ฅผ ๋‚จ๊ฒผ์Šต๋‹ˆ๋‹ค. ๐Ÿค— ์•„๋ž˜ ์˜ˆ์ œ ์Šคํฌ๋ฆฝํŠธ๋Š” `TRAIN_DIR`์˜ ๋กœ์ปฌ ๋ฐ์ดํ„ฐ์…‹์œผ๋กœ๋ฅผ ํŒŒ์ธํŠœ๋‹ํ•˜๋Š” ๋ฐฉ๋ฒ•์„ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค: + +```bash +export MODEL_NAME="duongna/stable-diffusion-v1-4-flax" +export TRAIN_DIR="path_to_your_dataset" + +python train_text_to_image_flax.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --train_data_dir=$TRAIN_DIR \ + --resolution=512 --center_crop --random_flip \ + --train_batch_size=1 \ + --mixed_precision="fp16" \ + --max_train_steps=15000 \ + --learning_rate=1e-05 \ + --max_grad_norm=1 \ + --output_dir="sd-pokemon-model" +``` + + + +## LoRA + +Text-to-image ๋ชจ๋ธ ํŒŒ์ธํŠœ๋‹์„ ์œ„ํ•ด, ๋Œ€๊ทœ๋ชจ ๋ชจ๋ธ ํ•™์Šต์„ ๊ฐ€์†ํ™”ํ•˜๊ธฐ ์œ„ํ•œ ํŒŒ์ธํŠœ๋‹ ๊ธฐ์ˆ ์ธ LoRA(Low-Rank Adaptation of Large Language Models)๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ์ž์„ธํ•œ ๋‚ด์šฉ์€ [LoRA ํ•™์Šต](lora#text-to-image) ๊ฐ€์ด๋“œ๋ฅผ ์ฐธ์กฐํ•˜์„ธ์š”. + +## ์ถ”๋ก  + +ํ—ˆ๋ธŒ์˜ ๋ชจ๋ธ ๊ฒฝ๋กœ ๋˜๋Š” ๋ชจ๋ธ ์ด๋ฆ„์„ [`StableDiffusionPipeline`]์— ์ „๋‹ฌํ•˜์—ฌ ์ถ”๋ก ์„ ์œ„ํ•ด ํŒŒ์ธ ํŠœ๋‹๋œ ๋ชจ๋ธ์„ ๋ถˆ๋Ÿฌ์˜ฌ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค: + + + +```python +from diffusers import StableDiffusionPipeline + +model_path = "path_to_saved_model" +pipe = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16) +pipe.to("cuda") + +image = pipe(prompt="yoda").images[0] +image.save("yoda-pokemon.png") +``` + + +```python +import jax +import numpy as np +from flax.jax_utils import replicate +from flax.training.common_utils import shard +from diffusers import FlaxStableDiffusionPipeline + +model_path = "path_to_saved_model" +pipe, params = FlaxStableDiffusionPipeline.from_pretrained(model_path, dtype=jax.numpy.bfloat16) + +prompt = "yoda pokemon" +prng_seed = jax.random.PRNGKey(0) +num_inference_steps = 50 + +num_samples = jax.device_count() +prompt = num_samples * [prompt] +prompt_ids = pipeline.prepare_inputs(prompt) + +# shard inputs and rng +params = replicate(params) +prng_seed = jax.random.split(prng_seed, jax.device_count()) +prompt_ids = shard(prompt_ids) + +images = pipeline(prompt_ids, params, prng_seed, num_inference_steps, jit=True).images +images = pipeline.numpy_to_pil(np.asarray(images.reshape((num_samples,) + images.shape[-3:]))) +image.save("yoda-pokemon.png") +``` + + \ No newline at end of file