In [1]:
from g4f.client import Client
import requests
from tqdm.auto import tqdm
import nest_asyncio
import os
from concurrent.futures import ThreadPoolExecutor, as_completed

nest_asyncio.apply()
os.makedirs("generated", exist_ok=True)

client = Client()
objs = ["bicycle", "e-bike electric bicycle"]
prompts = [
    "a person pushing {obj} in entrance lobby, CCTV footage",
    "a person riding {obj} in city, photo-realistic",
    "a person riding {obj}",
]

def generate_image(obj, i, j, prompt):
    response = client.images.generate(
        model="flux",
        prompt=prompt.format(obj=obj),
        response_format="url",
    )
    image_url = response.data[0].url
    response = requests.get(image_url)
    with open(f"generated/{obj}_{i}_{j}.jpg", "wb") as f:
        f.write(response.content)

def generate_images():
    tasks = []
    with ThreadPoolExecutor(max_workers=50) as executor:
        for i in range(1000):
            for obj in objs:
                for j, prompt in enumerate(prompts):
                    tasks.append(executor.submit(generate_image, obj, i, j, prompt))

        for task in tqdm(as_completed(tasks), total=len(tasks)):
            _ = task.result()

generate_images()

  0%|          | 0/6000 [00:00<?, ?it/s]

Exception ignored in: <async_generator object Blackbox.create_async_generator at 0x70db5ab47c40>
Traceback (most recent call last):
  File "/home/alkin/.local/lib/python3.12/site-packages/g4f/client/__init__.py", line 348, in _generate_image_response
    break
RuntimeError: async generator ignored GeneratorExit
Exception ignored in: <async_generator object Blackbox.create_async_generator at 0x70db5a46dc60>
Traceback (most recent call last):
  File "/home/alkin/.local/lib/python3.12/site-packages/g4f/client/__init__.py", line 348, in _generate_image_response
    break
RuntimeError: async generator ignored GeneratorExit
Exception ignored in: <async_generator object Blackbox.create_async_generator at 0x70db5a334f40>
Traceback (most recent call last):
  File "/home/alkin/.local/lib/python3.12/site-packages/g4f/client/__init__.py", line 348, in _generate_image_response
    break
RuntimeError: async generator ignored GeneratorExit
Exception ignored in: <async_generator object Blackbox.create

In [2]:
# Label the images automatically with YOLOvv11-world

from ultralytics import YOLO
import os
import cv2

model = YOLO("yolov8x-worldv2.pt")
model.set_classes(["bicycle"])

generated_images_dir = "generated"
dataset_dir = "dataset"
images_dir = os.path.join(dataset_dir, "images")
labels_dir = os.path.join(dataset_dir, "labels")

os.makedirs(images_dir, exist_ok=True)
os.makedirs(labels_dir, exist_ok=True)

valid_extensions = [".jpg", ".jpeg", ".png", ".bmp", ".tiff"]

for image_name in os.listdir(generated_images_dir):
    if not any(image_name.lower().endswith(ext) for ext in valid_extensions):
        print(f"Skipping non-image file: {image_name}")
        continue

    image_path = os.path.join(generated_images_dir, image_name)
    print(f"Processing: {image_path}")

    try:
        img = cv2.imread(image_path)
        if img is None:
            print(f"Invalid image file: {image_path}")
            continue
    except Exception as e:
        print(f"Error reading image {image_path}: {e}")
        continue

    try:
        results = model(source=image_path)
    except Exception as e:
        print(f"Error processing image {image_path}: {e}")
        continue

    for result in results:
        boxes = result.boxes
        image = result.orig_img

        output_image_path = os.path.join(images_dir, image_name)
        cv2.imwrite(output_image_path, image)

        label_file_path = os.path.join(labels_dir, os.path.splitext(image_name)[0] + ".txt")
        with open(label_file_path, "w") as label_file:
            for box in boxes:

                ################ IMPORTANT ################
                if "e-bike" in image_name:
                    print("e-bike")
                    class_id = 1
                elif "bicycle" in image_name:
                    print("bicycle")
                    class_id = 0
                ################ IMPORTANT ################

                x_center = (box.xywh[0][0] / result.orig_shape[1]).item()
                y_center = (box.xywh[0][1] / result.orig_shape[0]).item()
                width = (box.xywh[0][2] / result.orig_shape[1]).item()
                height = (box.xywh[0][3] / result.orig_shape[0]).item()

                label_file.write(f"{class_id} {x_center} {y_center} {width} {height}\n")

print("Dataset generation complete.")

Processing: generated/e-bike electric bicycle_450_2.jpg

image 1/1 /home/alkin/Desktop/upwork/scrape-fly-tipper/generated/e-bike electric bicycle_450_2.jpg: 480x640 1 bicycle, 28.7ms
Speed: 0.9ms preprocess, 28.7ms inference, 20.2ms postprocess per image at shape (1, 3, 480, 640)
e-bike
Processing: generated/bicycle_873_2.jpg

image 1/1 /home/alkin/Desktop/upwork/scrape-fly-tipper/generated/bicycle_873_2.jpg: 480x640 1 bicycle, 15.6ms
Speed: 0.9ms preprocess, 15.6ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)
bicycle
Processing: generated/e-bike electric bicycle_279_0.jpg

image 1/1 /home/alkin/Desktop/upwork/scrape-fly-tipper/generated/e-bike electric bicycle_279_0.jpg: 480x640 1 bicycle, 16.4ms
Speed: 0.8ms preprocess, 16.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)
e-bike
Processing: generated/e-bike electric bicycle_407_2.jpg

image 1/1 /home/alkin/Desktop/upwork/scrape-fly-tipper/generated/e-bike electric bicycle_407_2.jpg: 480x640 1 bi

In [3]:
# Split the dataset

import os
import random
import shutil

dataset_dir = "dataset"
new_dir = "split_dataset"
train_dir = os.path.join(new_dir, "train")
val_dir = os.path.join(new_dir, "val")

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(os.path.join(train_dir, "images"), exist_ok=True)
os.makedirs(os.path.join(train_dir, "labels"), exist_ok=True)
os.makedirs(os.path.join(val_dir, "images"), exist_ok=True)
os.makedirs(os.path.join(val_dir, "labels"), exist_ok=True)

split_ratio = .90

image_files = [f for f in os.listdir(os.path.join(dataset_dir, "images")) if f.endswith(('.jpg', '.png', '.jpeg'))]
random.shuffle(image_files)

split_index = int(len(image_files) * split_ratio)
train_files = image_files[:split_index]
val_files = image_files[split_index:]

def copy_files(files, source_dir, target_dir):
    for file in files:
        shutil.copy(os.path.join(source_dir, "images", file), os.path.join(target_dir, "images", file))
        label_file = os.path.splitext(file)[0] + ".txt"
        shutil.copy(os.path.join(source_dir, "labels", label_file), os.path.join(target_dir, "labels", label_file))

copy_files(train_files, dataset_dir, train_dir)
copy_files(val_files, dataset_dir, val_dir)

print(f"Dataset split complete. Train: {len(train_files)}, Val: {len(val_files)}")

Dataset split complete. Train: 5400, Val: 600


In [4]:
# Add the train and val directories to the dataset.yaml

file_path = "/home/alkin/Desktop/upwork/scrape-fly-tipper/split_dataset/data.yaml"
content = """train: ../split_dataset/train/images
val: ../split_dataset/val/images
nc: 2
names: ["bicycle", "e-bike"]
"""
with open(file_path, 'w') as file:
    file.write(content)
print(f"File '{file_path}' has been created and written successfully.")

File '/home/alkin/Desktop/upwork/scrape-fly-tipper/split_dataset/data.yaml' has been created and written successfully.
