In [1]:
from roboflow import Roboflow
rf = Roboflow(api_key="Ysf9dN7vMjK2l5vwKfaO")
project = rf.workspace("ai-blueberry-detection-analysis").project("blueberry-detection-gt")
version = project.version(10)
dataset = version.download("yolov8")

ModuleNotFoundError: No module named 'roboflow'

In [2]:
import warnings
import torch
import numpy as np
from PIL import Image, ImageChops
from diffusers import StableDiffusionXLImg2ImgPipeline
from torchvision.ops import box_convert
from huggingface_hub import hf_hub_download
from groundingdino.models import build_model
from groundingdino.util.slconfig import SLConfig
from groundingdino.util.utils import clean_state_dict
from groundingdino.util.inference import annotate, load_image, predict

warnings.filterwarnings("ignore")

# --------------------------- Model Loading ---------------------------

def load_model_hf(repo_id, filename, ckpt_config_filename, device='cuda'):
    cache_config_file = hf_hub_download(repo_id=repo_id, filename=ckpt_config_filename)
    args = SLConfig.fromfile(cache_config_file)
    model = build_model(args)
    args.device = device
    cache_file = hf_hub_download(repo_id=repo_id, filename=filename)
    checkpoint = torch.load(cache_file, map_location='cpu')
    log = model.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
    print(f"Model loaded from {cache_file}\n => {log}")
    model.eval()
    return model
# --------------------------- Mask Generation ---------------------------

def generate_mask_from_boxes(image, boxes):
    h, w, _ = image.shape
    boxes_unnorm = boxes * torch.Tensor([w, h, w, h])
    boxes_xyxy = box_convert(boxes=boxes_unnorm, in_fmt="cxcywh", out_fmt="xyxy").numpy()

    mask = np.zeros((h, w), dtype=np.uint8)
    for x0, y0, x1, y1 in boxes_xyxy:
        mask[int(y0):int(y1), int(x0):int(x1)] = 255
    return mask

ModuleNotFoundError: No module named 'torch'

In [None]:
# --------------------------- Grounding DINO Configuration ---------------------------

ckpt_repo_id = "ShilongLiu/GroundingDINO"
ckpt_filename = "groundingdino_swint_ogc.pth"
ckpt_config_filename = "GroundingDINO_SwinT_OGC.cfg.py"

model = load_model_hf(ckpt_repo_id, ckpt_filename, ckpt_config_filename)

# --------------------------- Inpainting Model ---------------------------

pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
pipe = pipe.to("cuda")

In [None]:
# --------------------------- Image and Prompt ---------------------------

local_image_path = r"C:\Users\Mechanized Systems\DataspellProjects\AutoAnnotate\autoannotate study\berries-bounding-box-1\train\images\IMG_9331_jpg.rf.20009327b80c55eec840b8b4f5cddf57.jpg"
TEXT_PROMPT = "blueberry"
BOX_THRESHOLD = 0.3
TEXT_THRESHOLD = 0.25
prompt= "generate variation"
# --------------------------- Inference ---------------------------

image_source, image_tensor = load_image(local_image_path)
boxes, logits, phrases = predict(
    model=model,
    image=image_tensor,
    caption=TEXT_PROMPT,
    box_threshold=BOX_THRESHOLD,
    text_threshold=TEXT_THRESHOLD
)
annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases)
annotated_frame = annotated_frame[...,::-1] # BGR to RGB

In [None]:
binary_mask_np = generate_mask_from_boxes(image_source, boxes)
image_mask = Image.fromarray(binary_mask_np)
image_source = Image.fromarray(image_source)

Image.fromarray(annotated_frame)

In [None]:
# --------------------------- Resize for Inpainting ---------------------------
image_source_for_inpaint = image_source.resize((512, 512))
image_mask_for_inpaint = image_mask.resize((512, 512))

In [None]:

# Invert the mask: 255 -> 0, 0 -> 255
image_mask_inverted = ImageChops.invert(image_mask.convert("L"))
# --------------------------- Inpainting ---------------------------

image_inpainting = pipe(
    prompt=prompt,
    image=image_source_for_inpaint,
    mask_image=image_mask_inverted
).images[0]

# --------------------------- Resize Back ---------------------------

image_inpainting = image_inpainting.resize(image_source.size)

# --------------------------- Composite with Preserved Objects ---------------------------
# Convert to RGBA
image_source_rgba = image_source.convert("RGBA")
image_inpainting_rgba = image_inpainting.convert("RGBA")

# Prepare alpha mask
alpha_mask = Image.new("RGBA", image_source.size)
alpha_mask.putalpha(image_mask_inverted)

# Extract preserved object regions from original image
cutout = Image.composite(
    image_source_rgba,
    Image.new("RGBA", image_source.size, (0, 0, 0, 0)),
    mask=image_mask.convert("L")
)

# Paste preserved objects onto inpainted image
final_composite = Image.alpha_composite(image_inpainting_rgba, cutout)

# --------------------------- Output ---------------------------

final_composite.show()
# final_composite.save("output_with_overlay.png")

In [2]:
import os
from PIL import Image, ImageChops
from diffusers import StableDiffusionXLImg2ImgPipeline
from torchvision.ops import box_convert
import torch
import numpy as np

# Configuration
source_folder = r"C:\Users\Mechanized Systems\DataspellProjects\AutoAnnotate\GUI and Pipeline\blueberry-detection---GT-10\train\images"
output_folder = "./variations"
prompt = "generate accurate variation"
yolo_labels_folder = r"C:\Users\Mechanized Systems\DataspellProjects\AutoAnnotate\GUI and Pipeline\blueberry-detection---GT-10\train\labels"

# Ensure output directory exists
os.makedirs(output_folder, exist_ok=True)

# Load Stable Diffusion XL pipeline
pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-refiner-1.0",
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True
)
pipe = pipe.to("cuda")

# Function to load YOLO bounding boxes from a text file
def load_yolo_boxes(label_path, img_width, img_height):
    boxes = []
    with open(label_path, 'r') as file:
        for line in file:
            parts = line.strip().split()
            _, x_center, y_center, width, height = map(float, parts)
            boxes.append([x_center, y_center, width, height])

    boxes_tensor = torch.tensor(boxes)
    boxes_unnorm = boxes_tensor * torch.Tensor([img_width, img_height, img_width, img_height])
    boxes_xyxy = box_convert(boxes=boxes_unnorm, in_fmt="cxcywh", out_fmt="xyxy").numpy()

    return boxes_xyxy

# Loop through images in folder
for filename in os.listdir(source_folder):
    if filename.lower().endswith((".png", ".jpg", ".jpeg", ".bmp")):
        input_image_path = os.path.join(source_folder, filename)
        label_filename = os.path.splitext(filename)[0] + ".txt"
        label_path = os.path.join(yolo_labels_folder, label_filename)
        output_image_path = os.path.join(output_folder, f"variation_{filename}")

        # Load image
        image = Image.open(input_image_path).convert("RGB")
        img_width, img_height = image.size

        # Load YOLO boxes
        boxes_xyxy = load_yolo_boxes(label_path, img_width, img_height)

        # Generate mask from boxes
        mask = np.zeros((img_height, img_width), dtype=np.uint8)
        for x0, y0, x1, y1 in boxes_xyxy:
            mask[int(y0):int(y1), int(x0):int(x1)] = 255

        image_mask = Image.fromarray(mask)

        # Resize images and masks for inpainting
        image_resized = image.resize((512, 512))
        mask_resized = image_mask.resize((512, 512))
        mask_inverted = ImageChops.invert(mask_resized)

        # Generate variation
        variation = pipe(prompt=prompt, image=image_resized, mask_image=mask_inverted).images[0]

        # Resize variation back to original size
        variation = variation.resize(image.size)

        # Overlay original box components onto the variation
        original_boxes_image = Image.composite(image, variation, image_mask)

        # Save output
        original_boxes_image.save(output_image_path)


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

RuntimeError: Failed to import diffusers.pipelines.onnx_utils because of the following error (look up to see its traceback):
DLL load failed while importing onnxruntime_pybind11_state: A dynamic link library (DLL) initialization routine failed.

In [1]:
import os
from PIL import Image, ImageChops
from diffusers import StableDiffusionXLImg2ImgPipeline
from torchvision.ops import box_convert
import torch
import numpy as np
import shutil

# Configuration
source_folder = r"C:\Users\Mechanized Systems\DataspellProjects\AutoAnnotate\GUI and Pipeline\blueberry-detection---GT-10\train\images"
output_folder = "./variations/images"
output_label_folder = "./variations/labels"
prompt = "generate accurate variation"
yolo_labels_folder = r"C:\Users\Mechanized Systems\DataspellProjects\AutoAnnotate\GUI and Pipeline\blueberry-detection---GT-10\train\labels"

# Ensure output directories exist
os.makedirs(output_folder, exist_ok=True)
os.makedirs(output_label_folder, exist_ok=True)

# Load Stable Diffusion XL pipeline
pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-refiner-1.0",
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True
)
pipe = pipe.to("cuda")

# Function to load YOLO bounding boxes from a text file
def load_yolo_boxes(label_path, img_width, img_height):
    boxes = []
    with open(label_path, 'r') as file:
        for line in file:
            parts = line.strip().split()
            _, x_center, y_center, width, height = map(float, parts)
            boxes.append([x_center, y_center, width, height])

    boxes_tensor = torch.tensor(boxes)
    boxes_unnorm = boxes_tensor * torch.Tensor([img_width, img_height, img_width, img_height])
    boxes_xyxy = box_convert(boxes=boxes_unnorm, in_fmt="cxcywh", out_fmt="xyxy").numpy()

    return boxes_xyxy

# Loop through images in folder
for filename in os.listdir(source_folder):
    if filename.lower().endswith((".png", ".jpg", ".jpeg", ".bmp")):
        input_image_path = os.path.join(source_folder, filename)
        label_filename = os.path.splitext(filename)[0] + ".txt"
        label_path = os.path.join(yolo_labels_folder, label_filename)
        output_image_name = f"variation_{filename}"
        output_image_path = os.path.join(output_folder, output_image_name)
        output_label_path = os.path.join(output_label_folder, f"variation_{os.path.splitext(filename)[0]}.txt")

        # Load image
        image = Image.open(input_image_path).convert("RGB")
        img_width, img_height = image.size

        # Load YOLO boxes
        boxes_xyxy = load_yolo_boxes(label_path, img_width, img_height)

        # Generate mask from boxes
        mask = np.zeros((img_height, img_width), dtype=np.uint8)
        for x0, y0, x1, y1 in boxes_xyxy:
            mask[int(y0):int(y1), int(x0):int(x1)] = 255

        image_mask = Image.fromarray(mask)

        # Resize images and masks for inpainting
        image_resized = image.resize((512, 512))
        mask_resized = image_mask.resize((512, 512))
        mask_inverted = ImageChops.invert(mask_resized)

        # Generate variation
        variation = pipe(prompt=prompt, image=image_resized, mask_image=mask_inverted).images[0]

        # Resize variation back to original size
        variation = variation.resize(image.size)

        # Overlay original box components onto the variation
        original_boxes_image = Image.composite(image, variation, image_mask)

        # Save output image
        original_boxes_image.save(output_image_path)

        # Copy corresponding YOLO label file to new label path
        shutil.copy(label_path, output_label_path)


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

RuntimeError: Failed to import diffusers.pipelines.onnx_utils because of the following error (look up to see its traceback):
DLL load failed while importing onnxruntime_pybind11_state: A dynamic link library (DLL) initialization routine failed.