<a href="https://colab.research.google.com/github/amien1410/stable-diffusion-scripts/blob/main/stable_diffusion_basic_settings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Stable Diffusion - Basic Settings

In [None]:
#@title Installing dependencies { display-mode: "form" }
!pip -q install torch diffusers transformers accelerate scipy safetensors xformers mediapy ipywidgets==7.7.1

In [None]:
#@title Importing Model, Sampler and Checker { form-width: "20%", display-mode: "form" }

from diffusers import StableDiffusionPipeline, EulerAncestralDiscreteScheduler, DDIMScheduler, EulerDiscreteScheduler, UniPCMultistepScheduler, PNDMScheduler, DPMSolverMultistepScheduler
from diffusers.models import AutoencoderKL
from diffusers.utils import make_image_grid
import torch

In [None]:
#@title Generating Images { form-width: "20%", display-mode: "form" }
text2img_pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5"
    , torch_dtype = torch.float16
).to("cuda:0")

# generate an image
prompt = "high resolution, a photograph of an astronaut riding a horse"
image = text2img_pipe(
    prompt = prompt
).images[0]
image

In [None]:
generator = torch.Generator("cuda:0").manual_seed(1234)
prompt = "high resolution, a photograph of an astronaut riding a horse"
image = text2img_pipe(
    prompt = prompt
    , generator = generator
).images[0]
image

In [None]:
# Check out the current schduler
text2img_pipe.scheduler

In [None]:
# Use EulerDiscreteScheduler
text2img_pipe.scheduler = EulerDiscreteScheduler.from_config(text2img_pipe.scheduler.config)
generator = torch.Generator("cuda:0").manual_seed(1234)
prompt = "high resolution, a photograph of an astronaut riding a horse"
image = text2img_pipe(
    prompt = prompt
    , generator = generator
).images[0]
image

In [None]:
# Euler scheduler with 20 steps
prompt = "high resolution, a photograph of an astronaut riding a horse"
image = text2img_pipe(
    prompt = prompt
    , generator = generator
    , num_inference_steps = 20
).images[0]
image

In [None]:
# PNDMScheduler with 20 steps
text2img_pipe.scheduler = PNDMScheduler.from_config(text2img_pipe.scheduler.config)
generator = torch.Generator("cuda:0").manual_seed(1234)
prompt = "high resolution, a photograph of an astronaut riding a horse"
image = text2img_pipe(
    prompt = prompt
    , generator = generator
    , num_inference_steps = 20
).images[0]
image

In [None]:
text2img_pipe.scheduler = DPMSolverMultistepScheduler.from_config(text2img_pipe.scheduler.config)
prompt = "high resolution, a photograph of an astronaut riding a horse"
image = text2img_pipe(
    prompt = prompt
    , generator = generator
).images[0]
image

In [None]:
# Use deliberate-v2
text2img_pipe.scheduler = DPMSolverMultistepScheduler.from_config(text2img_pipe.scheduler.config)
text2img_pipe = StableDiffusionPipeline.from_pretrained(
    "stablediffusionapi/deliberate-v2"
    , torch_dtype = torch.float16
).to("cuda:0")

prompt = "high resolution, a photograph of an astronaut riding a horse"
image = text2img_pipe(
    prompt = prompt
).images[0]
image

In [None]:
# Generate image with deterministic generation
generator = torch.Generator("cuda:0").manual_seed(1)
prompt = "high resolution, a photograph of an astronaut riding a horse"
image = text2img_pipe(
    prompt = prompt
    , generator = generator
).images[0]
image

In [None]:
# Use random generator, you will get different images every time run this cell
import random
seed = random.randint(1,999999)
generator = torch.Generator("cuda:0").manual_seed(seed)
prompt = "high resolution, a photograph of an astronaut riding a horse"
image = text2img_pipe(
    prompt = prompt
    , generator = generator
).images[0]
image

In [None]:
#@title Specify inference steps
# Generate image using assigned steps
prompt = "high resolution, a photograph of an astronaut riding a horse"
image = text2img_pipe(
    prompt = prompt
    , num_inference_steps = 30
).images[0]
image

In [None]:
#@title Guidance Scale
generator = torch.Generator("cuda:0").manual_seed(123)

prompt = "high resolution, a photograph of an astronaut riding a horse on mars"

image_3_gs = text2img_pipe(
    prompt = prompt
    , num_inference_steps = 30
    , guidance_scale = 3
    , generator = generator
).images[0]

image_7_gs = text2img_pipe(
    prompt = prompt
    , num_inference_steps = 30
    , guidance_scale = 7
    , generator = generator
).images[0]

image_10_gs = text2img_pipe(
    prompt = prompt
    , num_inference_steps = 30
    , guidance_scale = 10
    , generator = generator
).images[0]

images = [image_3_gs,image_7_gs,image_10_gs]
make_image_grid(images,rows=1,cols=3)

In [None]:
#@title Specify the image size
prompt = "high resolution, a photograph of an astronaut riding a horse"

text2img_pipe.scheduler = EulerDiscreteScheduler.from_config(text2img_pipe.scheduler.config)
image = text2img_pipe(
    prompt = prompt
    , num_inference_steps = 40
    , guidance_scale = 7.5
    , width = 768
    , height = 512
    , generator = torch.Generator("cuda:0").manual_seed(2)
).images[0]
image

In [None]:
#@title Text Guided Image Inpainting
# load CLIPSeg
from transformers import CLIPSegProcessor,CLIPSegForImageSegmentation

processor = CLIPSegProcessor.from_pretrained(
    "CIDAS/clipseg-rd64-refined"
)
model = CLIPSegForImageSegmentation.from_pretrained(
    "CIDAS/clipseg-rd64-refined"
)

# generate mask data
import matplotlib.pyplot as plt

prompts = ['the background']
inputs = processor(
    text             = prompts
    , images         = [image] * len(prompts)
    , padding        = True
    , return_tensors = "pt"
)

with torch.no_grad():
    outputs = model(**inputs)

preds = outputs.logits

mask_data = torch.sigmoid(preds)[0]
print(mask_data.shape)
plt.imshow(mask_data)

In [None]:
# genearte mask binary image
import cv2
from PIL import Image
mask_file_name = f"bg_mask.png"
plt.imsave(mask_file_name,mask_data)
mask_data_cv = cv2.imread(mask_file_name) # -> (352, 352, 3)

def get_mask_img(mask_data):
    gray_image = cv2.cvtColor(mask_data,cv2.COLOR_BGR2GRAY)
    thresh, bw_image = cv2.threshold(gray_image,100,255,cv2.THRESH_BINARY)
    cv2.cvtColor(bw_image, cv2.COLOR_BGR2RGB)
    return Image.fromarray(bw_image)

bw_image = get_mask_img(mask_data=mask_data_cv)
#cv2.imwrite(bw_image)
bw_image = bw_image.resize((768,512))
bw_image

In [None]:
from diffusers import StableDiffusionInpaintPipeline, EulerDiscreteScheduler
inpaint_pipe = StableDiffusionInpaintPipeline.from_pretrained(
    "CompVis/stable-diffusion-v1-4"
    , torch_dtype = torch.float16
    , safety_checker = None
).to("cuda:0")

prompt = "blue sky, clouds"
inpaint_pipe.scheduler = EulerDiscreteScheduler.from_config(inpaint_pipe.scheduler.config)
inpaint_image = inpaint_pipe(
    prompt                  = prompt
    , image                 = image
    , mask_image            = bw_image
    , num_inference_steps   = 50
    , guidance_scale        = 9
    , strength              = 1
    #, width                 = 768
    #, height                = 512
    , generator             = torch.Generator("cuda:0").manual_seed(1)
).images[0]
inpaint_image

In [None]:
#@title Add detail to image using img2img
prompt = "child sketch of a wonderland"
image = text2img_pipe(
    prompt = prompt
    , generator = torch.Generator("cuda:0").manual_seed(1234)
    , width = 768
    , height = 512
).images[0]
image

In [None]:

import torch
from diffusers import (
    StableDiffusionImg2ImgPipeline
    , EulerDiscreteScheduler
)

import torch
from diffusers import StableDiffusionPipeline
img2img_pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
    "stablediffusionapi/deliberate-v2"
    , torch_dtype = torch.float16
).to("cuda:0")
img2img_pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
    "stablediffusionapi/deliberate-v2"
    , torch_dtype = torch.float16
).to("cuda:0")
enrich_prompt = "a fantasy wonderland with castles, colorful, two kids on the road"
neg_prompt = "black white sketch, gray scale"

img2img_w_details = img2img_pipe(
    prompt                  = enrich_prompt
    , negative_prompt       = neg_prompt
    , image                 = image
    , num_inference_steps   = 120
    , guidance_scale        = 16
    , strength              = 0.3
    , generator             = torch.Generator("cuda:0").manual_seed(1)
).images[0]
img2img_w_details