In [2]:
# Imports
import os
import sys
import torch
import warnings
import diffusers
import accelerate
import transformers

from PIL import Image
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
#pip install torch==2.3.1 diffusers==0.29.0 transformers==4.41.2 accelerate==0.30.1

# Suppress warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

  from .autonotebook import tqdm as notebook_tqdm
  deprecate("Transformer2DModelOutput", "1.0.0", deprecation_message)


In [3]:
# --- Step 1: Verify the environment from within the script ---
print("--- Diagnosing Environment ---")
print(f"Python Executable: {sys.executable}")
print(f"PyTorch version: {torch.__version__}")
print(f"Diffusers version: {diffusers.__version__}")
print(f"Transformers version: {transformers.__version__}")
print(f"Accelerate version: {accelerate.__version__}")
print("----------------------------\n")

--- Diagnosing Environment ---
Python Executable: /Library/Developer/CommandLineTools/usr/bin/python3
PyTorch version: 2.3.1
Diffusers version: 0.29.0
Transformers version: 4.41.2
Accelerate version: 0.30.1
----------------------------



In [4]:
# Check if a GPU is available and set the device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cpu


# Track A

## Load the Stable Diffusion Model

In [5]:
# --- Step 2: Attempt to load the model with a basic configuration ---
pipe = diffusers.StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
)
pipe = pipe.to(device)

print("\n✅ Model loaded successfully!")

Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00, 11.04it/s]


✅ Model loaded successfully!





### Generate 3 Images

In [6]:
# Create output directory
OUTPUT_DIR = "output_images"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [10]:
# --- Step 3: Generate an image ---
prompts = [
"a two dimensional renaisaunce era crown in a pop-art graphic style",
"a destroyed medievil castle bombarded by artillery",
"a wooden chess board"
]

for i, prompt in enumerate(prompts):
    print(f"Generating image for prompt: '{prompt}'")
    
    # Generate the image
    image = pipe(prompt).images[0]
    
    # Save the image to the output directory
    output_path = os.path.join(OUTPUT_DIR, f"output_{i+1}.png")
    image.save(output_path)
    print(f"Saved image as {output_path}")

print("Image generation complete.")

Generating image for prompt: 'a two dimensional renaisaunce era crown in a pop-art graphic style'


100%|██████████| 50/50 [03:38<00:00,  4.38s/it]


Saved image as output_images/output_1.png
Generating image for prompt: 'a destroyed medievil castle bombarded by artillery'


100%|██████████| 50/50 [04:01<00:00,  4.84s/it]


Saved image as output_images/output_2.png
Generating image for prompt: 'a wooden chess board'


100%|██████████| 50/50 [04:03<00:00,  4.87s/it]


Saved image as output_images/output_3.png
Image generation complete.


## Apply One Edit

In [12]:
try:
    init_image = Image.open("output_images/output_3.png").convert("RGB")
except FileNotFoundError:
    print("Error: 'test_output.png' not found. Please generate it first.")
    exit()

# Resize for consistency
init_image = init_image.resize((768, 512))
init_image.show()


# --- 3. Define the Edit ---
prompt = "The chess board should have a grid of alternating light and dark squares"

# --- 4. Generate the Edited Image ---
print("Generating edited image...")
# 'strength' controls how much the new image differs from the original (0.0 to 1.0)
# A higher strength allows for more creative changes.
edited_image = pipe(prompt=prompt, image=init_image, strength=0.10).images[0]
edited_image.show()

# --- 5. Save the Result ---
output_path = os.path.join(OUTPUT_DIR, f"edited_output_3.png")
edited_image.save(output_path)
print(f"Saved image as {output_path}")

Generating edited image...


100%|██████████| 50/50 [03:42<00:00,  4.45s/it]


Saved image as output_images/edited_output_3.png


## Train LoRA

I am currently going to skip over this section of the assignment. Having to clone the full repository to do the LoRA training is not something that will advance our project along forward. It is a nice excersize in learning about image generation model training, but for the scope of this project and the decreasing timeline, I will be skipping this part. I have the code below, but will not be utilizing it further.

https://github.com/huggingface/diffusers.git

In [16]:
from pathlib import Path
DATA_DIR = Path("lora_data")      # put 10–20 images of your SAFE concept here
LORA_OUT = Path("lora_weights")   # will contain your trained LoRA
LORA_OUT.mkdir(exist_ok=True, parents=True)
MODEL_ID = "runwayml/stable-diffusion-v1-5"

# Example CLI command (uncomment and adapt):
# !accelerate launch \
#   diffusers/examples/text_to_image/train_text_to_image_lora.py \
#   --pretrained_model_name_or_path={MODEL_ID} \
#   --instance_data_dir={DATA_DIR} \
#   --output_dir={LORA_OUT} \
#   --train_batch_size=1 --gradient_accumulation_steps=4 \
#   --learning_rate=1e-4 --lr_warmup_steps=0 \
#   --max_train_steps=1000 --mixed_precision="fp16"

print("➡️ After training, place your LoRA weights in:", LORA_OUT.resolve())

➡️ After training, place your LoRA weights in: /Users/jam/Documents/git/Capstone/llm-games-project/TrackA/lora_weights


In [17]:
# Example of loading LoRA:
# pipe.load_lora_weights(str(LORA_OUT))
# lora_prompts = [
#     "your concept in a modern flat illustration, teal accents",
#     "your concept in a photorealistic lab interior, soft light"
# ]
# for i,p in enumerate(lora_prompts):
#     img = pipe(prompt=p, num_inference_steps=25, guidance_scale=7.5).images[0]

#     output_path = os.path.join(OUTPUT_DIR, f"lora_out_{i}.png")
#     img.save(output_path)
#     print(f"Saved image as {output_path}")

# print("Done. Exported images are in:", os.path.abspath(OUTPUT_DIR))