Skip to content

generated black image in flux fill fp16 #10533

@saeedkhanehgir

Description

@saeedkhanehgir

Describe the bug

when I load flux fill in fp16. I get the black image as generated image.

Reproduction

image05
brush05

my inference code



import torch
from diffusers import FluxFillPipeline
from PIL import Image
import cv2
import numpy as np
def read_and_check_input_parameters(image_path, mask_path=None):
   
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # RGB
    mask = cv2.imread(mask_path, 0)
    return image, mask

def crop_around_mask(mask, square_to_mask_ratio=2, r=2.5):
    
    # Get the coordinates of non-zero elements in the mask
    h, w = mask.shape
    xs, ys = np.where(mask)
    # Calculate the bounding box for the mask
    xmin, xmax = xs.min(), xs.max()
    ymin, ymax = ys.min(), ys.max()
    # Calculate the size of the bounding square for the mask
    mask_size = max(xmax - xmin, ymax - ymin)
    # Check the size
    if max(xmax - xmin, ymax - ymin) > min(h, w) / r:
        return mask, None, None
    # Calculate the distance of each side of the square from the center
    center_distance = int(square_to_mask_ratio * mask_size // 2) + 1

    # Calculate the center of the mask
    center_x = xmin + (xmax - xmin) // 2
    center_y = ymin + (ymax - ymin) // 2

    # Determine the coordinates of the cropping rectangle
    y1, x1 = center_y - center_distance, center_x - center_distance
    y2, x2 = center_y + center_distance, center_x + center_distance

    # Ensure coordinates are within bounds of the mask
    if y1 < 0:
        y2 -= y1
        y1 = 0
    if x1 < 0:
        x2 -= x1
        x1 = 0
    if y2 > w:
        y1 -= y2 - w
        y2 = w
    if x2 > h:
        x1 -= x2 - h
        x2 = h

    # Ensure crop is square
    if x2 - x1 != y2 - y1:
        if (x2 - x1) < (y2 - y1):
            x1 += (y2 - y1) - (x2 - x1)
        else:
            y1 += (x2 - x1) - (y2 - x1)

    crop_image_points = (x1, y1, x2, y2)
    mask_points_in_cropped = (xmin - x1, ymin - y1, xmax - x1, ymax - y1)
    return mask[x1:x2, y1:y2], crop_image_points, mask_points_in_cropped


def resize_and_pad(image: np.ndarray, mask=None, target_size=(1024, 1024)):
    height, width, _ = image.shape
    scale = min(target_size) / max(height, width)
    height = int(height * scale)
    width = int(width * scale)
    new_image = cv2.resize(image, (width, height), interpolation=cv2.INTER_LINEAR)
    pad_height = target_size[0] - height
    pad_width = target_size[1] - width
    top_pad = pad_height // 2
    bottom_pad = pad_height - top_pad
    left_pad = pad_width // 2
    right_pad = pad_width - left_pad
    new_image = np.pad(
        new_image,
        ((top_pad, bottom_pad), (left_pad, right_pad), (0, 0)),
        mode="constant",
    )
    if mask is not None:
        new_mask = cv2.resize(
            mask.astype(np.uint8),
            (width, height),
            interpolation=cv2.INTER_LINEAR,
        )
        new_mask = np.pad(
            new_mask,
            ((top_pad, bottom_pad), (left_pad, right_pad)),
            mode="constant",
        )
        return new_image, new_mask, (top_pad, bottom_pad, left_pad, right_pad)

    return new_image, (top_pad, bottom_pad, left_pad, right_pad)


model_path = "black-forest-labs/FLUX.1-Fill-dev"
pipe = FluxFillPipeline.from_pretrained(model_path,
                                        torch_dtype=torch.float16).to("cuda")
img_path = "image05.jpg"
brush_path = "brush05.jpg"

image, brush = read_and_check_input_parameters(
                img_path, brush_path
            )
brush = ((brush > 100) * 255).astype("uint8")
original_image = image.copy()
brush, pts, mask_pts = crop_around_mask(brush)

if pts is not None:  # Check whether it is cropped
    image = image[pts[0] : pts[2], pts[1] : pts[3]]

image_padded, brush_padded, padding_factors = resize_and_pad(
            image, brush, [1024, 1024]
        )
image_padded = image_padded[:,:,::-1]
image_padded = Image.fromarray(image_padded)
brush_padded = Image.fromarray(brush_padded)
flux_output_img = pipe(
    prompt="glass",
    image=image_padded,
    mask_image=brush_padded,
    height=1024,
    width=1024,
    guidance_scale=30,
    num_inference_steps=8,
    max_sequence_length=512,
    generator=torch.Generator("cpu").manual_seed(0)
).images[0]

flux_output_img.save('result.png')

Logs

Loading pipeline components...:   0%|                                                                                                                                 | 0/7 [00:00<?, ?it/s]You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:02<00:00,  1.12s/it]
Loading pipeline components...: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:07<00:00,  1.12s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:11<00:00,  1.48s/it]
/diffusers/src/diffusers/image_processor.py:147: RuntimeWarning: invalid value encountered in cast
  images = (images * 255).round().astype("uint8")

System Info

cuda 12.1
diffusers==0.33.0.dev0 ( build from source)
torch==2.4.1
torchvision==0.19.1
pillow==11.1.0
opencv-python==4.10.0.84

Who can help?

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workingstaleIssues that haven't received updates

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions