You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I use stable-fast ==0.0.13.post3 to test a lcm lora, the result like this
but to use lcm lora in pure diffsuers is ok
my code like this:
importtorchfromdiffusersimportLCMScheduler, AutoPipelineForText2Image, DiffusionPipelinefromsfast.compilers.stable_diffusion_pipeline_compilerimport (
compile, CompilationConfig)
importnumpyasnpfromPILimportImagebase_model_path="runwayml/stable-diffusion-v1-5"lcm_path="latent-consistency/lcm-lora-sdv1-5"defload_model():
model=DiffusionPipeline.from_pretrained(base_model_path,
torch_dtype=torch.float16,
safety_checker=None,
use_safetensors=True)
model.scheduler=LCMScheduler.from_config(model.scheduler.config)
model.safety_checker=Nonemodel.to(torch.device('cuda'))
#model.unet.load_attn_procs(lcm_path)model.load_lora_weights(lcm_path)
model.fuse_lora()
returnmodeldefcompile_model(model):
config=CompilationConfig.Default()
# xformers and Triton are suggested for achieving best performance.# It might be slow for Triton to generate, compile and fine-tune kernels.try:
importxformersconfig.enable_xformers=TrueexceptImportError:
print('xformers not installed, skip')
# NOTE:# When GPU VRAM is insufficient or the architecture is too old, Triton might be slow.# Disable Triton if you encounter this problem.try:
importtritonconfig.enable_triton=TrueexceptImportError:
print('Triton not installed, skip')
# NOTE:# CUDA Graph is suggested for small batch sizes and small resolutions to reduce CPU overhead.# My implementation can handle dynamic shape with increased need for GPU memory.# But when your GPU VRAM is insufficient or the image resolution is high,# CUDA Graph could cause less efficient VRAM utilization and slow down the inference,# especially when on Windows or WSL which has the "shared VRAM" mechanism.# If you meet problems related to it, you should disable it.config.enable_cuda_graph=Truemodel=compile(model, config)
returnmodeldefmain():
prompt="a rendering of a living room with a couch and a tv"negative_prompt="ugly,logo,pixelated,lowres,text,word,cropped,low quality,normal quality,username,watermark,signature,blurry,soft,NSFW,painting,cartoon,hang,occluded objects,Fisheye View"model=load_model()
model=compile_model(model)
kwarg_inputs=dict(
prompt=prompt,
negative_prompt=negative_prompt,
width=768,
height=512,
num_inference_steps=7,
num_images_per_prompt=1,
guidance_scale=1.5,
)
# NOTE: Warm it up.# The initial calls will trigger compilation and might be very slow.# After that, it should be very fast.for_inrange(3):
output_image=model(**kwarg_inputs).images[0]
# Let's see it!# Note: Progress bar might work incorrectly due to the async nature of CUDA.img_total= []
foriinrange(2):
output_image=model(
prompt=prompt,
negative_prompt=negative_prompt,
width=768,
height=512,
num_inference_steps=7,
num_images_per_prompt=6,
# generator=generators
).imagesimg_row= []
forimginoutput_image:
img_row.append(np.asarray(img))
img=np.hstack(img_row)
img_total.append(img)
image=np.vstack(img_total)
# cv2.putText(image,prompt,(40,50),cv2.FONT_HERSHEY_SIMPLEX,2,(0,0,255),3)image=Image.fromarray(image)
image.save("./output_lcm.png")
if__name__=='__main__':
main()
The text was updated successfully, but these errors were encountered:
I use stable-fast ==0.0.13.post3 to test a lcm lora, the result like this
but to use lcm lora in pure diffsuers is ok
my code like this:
The text was updated successfully, but these errors were encountered: