In [2]:
from diffusers import StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline
import torch
from PIL import Image
from io import BytesIO
import requests

class StableDiffusionModel:
    def __init__(self, use_lora=False, lora_weights_path=None, base_model="stabilityai/stable-diffusion-xl-base-1.0"):
        """
        Initialize the Stable Diffusion XL model with optional LoRA weights.
        If no LoRA is provided, it uses the base SDXL model only.
        
        Args:
            use_lora (bool): Whether to load a LoRA model on top of the base model.
            lora_weights_path (str): Path to the LoRA weights file.
            base_model (str): Path to the base SDXL model, default is SDXL 1.0.
        """
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.base_model = base_model
        self.use_lora = use_lora
        self.lora_weights_path = lora_weights_path

        # Load the base SDXL model for Text-to-Image and Image-to-Image
        self.text2img_pipe = StableDiffusionXLPipeline.from_pretrained(self.base_model, torch_dtype=torch.float16).to(self.device)
        self.img2img_pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(self.base_model, torch_dtype=torch.float16).to(self.device)

        # Apply LoRA weights if specified
        if self.use_lora and self.lora_weights_path:
            self.load_lora_weights()

    def load_lora_weights(self):
        """Load LoRA weights onto the SDXL pipeline if specified."""
        print(f"Loading LoRA weights from {self.lora_weights_path}")
        self.text2img_pipe.load_lora_weights(self.lora_weights_path)
        self.img2img_pipe.load_lora_weights(self.lora_weights_path)

    def generate_text_to_image(self, prompt, negative_prompt=None, guidance_scale=7, num_inference_steps=31, seed=None):
        """
        Generate an image from a text prompt using the Text-to-Image pipeline.
        
        Args:
            prompt (str): The text prompt for image generation.
            negative_prompt (str): The negative prompt for avoiding certain attributes in the image.
            guidance_scale (float): The strength of the prompt influence. Default is 7.
            num_inference_steps (int): The number of diffusion steps. Default is 31.
            seed (int): Optional seed for reproducibility. If None, random seed is used.
        """
        # Set the seed if specified for reproducibility
        if seed is not None:
            torch.manual_seed(seed)
            print(f"Using seed: {seed}")
        
        # Generate the image
        image = self.text2img_pipe(prompt, negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps).images[0]
        return image

    def generate_image_to_image(self, prompt, init_image, negative_prompt=None, guidance_scale=7, num_inference_steps=31, strength=0.8, seed=None):
        """
        Generate an image based on an initial image using Image-to-Image pipeline.
        
        Args:
            prompt (str): The text prompt to guide the image generation.
            init_image (PIL.Image): The initial image to use for Image-to-Image generation.
            negative_prompt (str): The negative prompt to avoid unwanted elements.
            guidance_scale (float): The strength of prompt influence. Default is 7.
            num_inference_steps (int): The number of diffusion steps. Default is 31.
            strength (float): The strength of the image guidance (how much the init image influences the result). Default is 0.8.
            seed (int): Optional seed for reproducibility. If None, a random seed is used.
        """
        # Set the seed if specified for reproducibility
        if seed is not None:
            torch.manual_seed(seed)
            print(f"Using seed: {seed}")

        # Resize the image to 1024x1024 if necessary for SDXL
        init_image = init_image.resize((1024, 1024))

        # Generate the image based on the initial image and prompt
        image = self.img2img_pipe(prompt, init_image=init_image, negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, strength=strength).images[0]
        return image


In [4]:
# Path to the downloaded LoRA file
lora_weights_path = "Helldivers_V2.safetensors" 

# Initialize the model with LoRA
model_sdxl = StableDiffusionModel(use_lora=True, lora_weights_path=lora_weights_path, base_model="stabilityai/stable-diffusion-xl-base-1.0")

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# Define the prompt and negative prompt
prompt = "black armor, upper body, profile picture, cape, yellow accents, explosion, fire, laser, on a pile of fragmented automatons in a battlefield where the sky is red and black, blood and oil spilled armor"
negative_prompt = "bad anatomy, bad hands, poorly drawn face, poorly drawn hands, missing limb, out of focus, monochrome, symbol, text, logo, lowres, censored, signature"

# Set parameters
guidance_scale = 7
num_inference_steps = 31
seed=None

# Generate a Text-to-Image output with the specified prompt and negative prompt
image = model.generate_text_to_image(prompt, negative_prompt, guidance_scale, num_inference_steps, seed)
image.show()  # Display the generated image