-
Notifications
You must be signed in to change notification settings - Fork 6.2k
Closed
Labels
bugSomething isn't workingSomething isn't workingstaleIssues that haven't received updatesIssues that haven't received updates
Description
Describe the bug
when I load flux fill in fp16. I get the black image as generated image.
Reproduction
my inference code
import torch
from diffusers import FluxFillPipeline
from PIL import Image
import cv2
import numpy as np
def read_and_check_input_parameters(image_path, mask_path=None):
image = cv2.imread(image_path)
image = image[..., ::-1] # RGB
mask = cv2.imread(mask_path, 0)
return image, mask
def crop_around_mask(mask, square_to_mask_ratio=2, r=2.5):
# Get the coordinates of non-zero elements in the mask
h, w = mask.shape
xs, ys = np.where(mask)
# Calculate the bounding box for the mask
xmin, xmax = xs.min(), xs.max()
ymin, ymax = ys.min(), ys.max()
# Calculate the size of the bounding square for the mask
mask_size = max(xmax - xmin, ymax - ymin)
# Check the size
if max(xmax - xmin, ymax - ymin) > min(h, w) / r:
return mask, None, None
# Calculate the distance of each side of the square from the center
center_distance = int(square_to_mask_ratio * mask_size // 2) + 1
# Calculate the center of the mask
center_x = xmin + (xmax - xmin) // 2
center_y = ymin + (ymax - ymin) // 2
# Determine the coordinates of the cropping rectangle
y1, x1 = center_y - center_distance, center_x - center_distance
y2, x2 = center_y + center_distance, center_x + center_distance
# Ensure coordinates are within bounds of the mask
if y1 < 0:
y2 -= y1
y1 = 0
if x1 < 0:
x2 -= x1
x1 = 0
if y2 > w:
y1 -= y2 - w
y2 = w
if x2 > h:
x1 -= x2 - h
x2 = h
# Ensure crop is square
if x2 - x1 != y2 - y1:
if (x2 - x1) < (y2 - y1):
x1 += (y2 - y1) - (x2 - x1)
else:
y1 += (x2 - x1) - (y2 - x1)
crop_image_points = (x1, y1, x2, y2)
mask_points_in_cropped = (xmin - x1, ymin - y1, xmax - x1, ymax - y1)
return mask[x1:x2, y1:y2], crop_image_points, mask_points_in_cropped
def resize_and_pad(image: np.ndarray, mask=None, target_size=(1024, 1024)):
height, width, _ = image.shape
scale = min(target_size) / max(height, width)
height = int(height * scale)
width = int(width * scale)
new_image = cv2.resize(image, (width, height), interpolation=cv2.INTER_LINEAR)
pad_height = target_size[0] - height
pad_width = target_size[1] - width
top_pad = pad_height // 2
bottom_pad = pad_height - top_pad
left_pad = pad_width // 2
right_pad = pad_width - left_pad
new_image = np.pad(
new_image,
((top_pad, bottom_pad), (left_pad, right_pad), (0, 0)),
mode="constant",
)
if mask is not None:
new_mask = cv2.resize(
mask.astype(np.uint8),
(width, height),
interpolation=cv2.INTER_LINEAR,
)
new_mask = np.pad(
new_mask,
((top_pad, bottom_pad), (left_pad, right_pad)),
mode="constant",
)
return new_image, new_mask, (top_pad, bottom_pad, left_pad, right_pad)
return new_image, (top_pad, bottom_pad, left_pad, right_pad)
model_path = "black-forest-labs/FLUX.1-Fill-dev"
pipe = FluxFillPipeline.from_pretrained(model_path,
torch_dtype=torch.float16).to("cuda")
img_path = "image05.jpg"
brush_path = "brush05.jpg"
image, brush = read_and_check_input_parameters(
img_path, brush_path
)
brush = ((brush > 100) * 255).astype("uint8")
original_image = image.copy()
brush, pts, mask_pts = crop_around_mask(brush)
if pts is not None: # Check whether it is cropped
image = image[pts[0] : pts[2], pts[1] : pts[3]]
image_padded, brush_padded, padding_factors = resize_and_pad(
image, brush, [1024, 1024]
)
image_padded = image_padded[:,:,::-1]
image_padded = Image.fromarray(image_padded)
brush_padded = Image.fromarray(brush_padded)
flux_output_img = pipe(
prompt="glass",
image=image_padded,
mask_image=brush_padded,
height=1024,
width=1024,
guidance_scale=30,
num_inference_steps=8,
max_sequence_length=512,
generator=torch.Generator("cpu").manual_seed(0)
).images[0]
flux_output_img.save('result.png')
Logs
Loading pipeline components...: 0%| | 0/7 [00:00<?, ?it/s]You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:02<00:00, 1.12s/it]
Loading pipeline components...: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:07<00:00, 1.12s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:11<00:00, 1.48s/it]
/diffusers/src/diffusers/image_processor.py:147: RuntimeWarning: invalid value encountered in cast
images = (images * 255).round().astype("uint8")
System Info
cuda 12.1
diffusers==0.33.0.dev0 ( build from source)
torch==2.4.1
torchvision==0.19.1
pillow==11.1.0
opencv-python==4.10.0.84
Who can help?
No response
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't workingstaleIssues that haven't received updatesIssues that haven't received updates