In [6]:
import cv2
import numpy as np
import os
import random
from tqdm import tqdm
import glob
from sklearn.model_selection import train_test_split
from PIL import Image, ImageEnhance

# CONFIG
output_dir = "synthetic_squares"
img_dir = os.path.join(output_dir, "images")
lbl_dir = os.path.join(output_dir, "labels")
os.makedirs(img_dir, exist_ok=True)
os.makedirs(lbl_dir, exist_ok=True)

img_size = 224
total_images = 2000           # Total images (across train/val)
min_squares = 1
max_squares = 3               # Up to N colored squares per image
distractor_shapes = True      # Add circles/triangles for difficulty
blur_prob = 0.3
brightness_jitter = 0.25
real_bg_prob = 0.4            # Use real backgrounds X% of the time

colors = {
    "blue":   ((255,  30,  30), 0),
    "red":    ((30,  30, 255), 1),
    "orange": ((0, 165, 255), 2),
    "yellow": ((0, 255, 255), 3),
    "purple": ((200,  80, 200), 4),
    "brown":  ((19,  69, 139), 5)
}
class_names = list(colors.keys())

backgrounds = [
    (240,240,240), (255,255,255), (200,200,200), (160,160,160),
    (220,200,170), (230,230,210)
]

# Optional: supply your own real image backgrounds (landscape, desk, etc)
real_bg_folder = "real_backgrounds"
real_bg_files = glob.glob(os.path.join(real_bg_folder, "*.jpg"))

def make_background():
    if random.random() < real_bg_prob and real_bg_files:
        bg = Image.open(random.choice(real_bg_files)).resize((img_size, img_size)).convert("RGB")
        bg = np.array(bg)
    else:
        bg = np.full((img_size, img_size, 3), random.choice(backgrounds), dtype=np.uint8)
        # Add noise
        if random.random() < 0.4:
            noise = np.random.normal(0, 10, (img_size, img_size, 3))
            bg = np.clip(bg + noise, 0, 255).astype(np.uint8)
    return bg

def random_square_params():
    cx = random.randint(int(0.2*img_size), int(0.8*img_size))
    cy = random.randint(int(0.2*img_size), int(0.8*img_size))
    size = random.randint(int(0.18*img_size), int(0.5*img_size))
    
    # MODIFIED: Apply perspective transform with primary x-direction squeeze
    # and slight y-direction squeeze for more realistic perspective
    x_squeeze = random.uniform(0.4, 1.0)  # Primary squeeze in x direction (1.0 = no squeeze)
    y_squeeze = random.uniform(0.85, 1.0)  # Slight squeeze in y direction (1.0 = no squeeze)
    
    return cx, cy, size, x_squeeze, y_squeeze

def apply_random_blur(img):
    if random.random() < blur_prob:
        ksize = random.choice([3,5,7])
        img = cv2.GaussianBlur(img, (ksize,ksize), 0)
    if random.random() < 0.15:
        # Simulate motion blur
        size = random.choice([5,9,15])
        kernel_motion_blur = np.zeros((size, size))
        kernel_motion_blur[int((size-1)/2), :] = np.ones(size)
        kernel_motion_blur = kernel_motion_blur / size
        img = cv2.filter2D(img, -1, kernel_motion_blur)
    return img

def apply_brightness(img):
    if random.random() < brightness_jitter:
        factor = random.uniform(0.7, 1.3)
        pil_img = Image.fromarray(img)
        enhancer = ImageEnhance.Brightness(pil_img)
        img = np.array(enhancer.enhance(factor))
    return img

def random_distractor(img):
    shape = random.choice(["circle","triangle"])
    color = tuple(np.random.randint(0,255,3).tolist())
    thickness = random.choice([-1,2])
    if shape == "circle":
        cx, cy = random.randint(0,img_size), random.randint(0,img_size)
        r = random.randint(15,50)
        cv2.circle(img, (cx,cy), r, color, thickness)
    elif shape == "triangle":
        pts = np.random.randint(0,img_size, (3,2))
        cv2.polylines(img, [pts], isClosed=True, color=color, thickness=thickness if thickness>0 else 2)
        if thickness == -1:
            cv2.fillPoly(img, [pts], color)
    return img

# --- Generation Loop ---
print("Generating images...")
all_image_paths = []
for i in tqdm(range(total_images)):
    img = make_background()
    label_lines = []
    nsq = random.randint(min_squares, max_squares)
    used_classes = random.sample(list(colors.keys()), nsq)
    for cname in used_classes:
        bgr, classid = colors[cname]
        cx, cy, size, x_squeeze, y_squeeze = random_square_params()
        half = size / 2
        
        # MODIFIED: Create square with primary x-dimension squeeze and slight y-dimension squeeze
        # This simulates more realistic perspective from floor where squares get thinner
        # mostly horizontally and slightly vertically
        pts = np.array([
            [-half * x_squeeze, -half * y_squeeze],  # Top-left - squeezed in x and slightly in y
            [half * x_squeeze, -half * y_squeeze],   # Top-right - squeezed in x and slightly in y
            [half, half],                           # Bottom-right - normal
            [-half, half]                           # Bottom-left - normal
        ], dtype=np.float32)
        
        # Apply random horizontal shear to simulate different viewing angles
        if random.random() < 0.7:
            shear_factor = random.uniform(-0.3, 0.3)
            shear_matrix = np.array([
                [1, shear_factor, 0],
                [0, 1, 0]
            ], dtype=np.float32)
            pts = np.dot(pts, shear_matrix[:,:2].T)
        
        pts += np.array([cx, cy])
        pts = pts.astype(np.int32)
        cv2.fillConvexPoly(img, pts, bgr)

        x_min, y_min = np.min(pts, axis=0)
        x_max, y_max = np.max(pts, axis=0)
        x_center = ((x_min + x_max) / 2) / img_size
        y_center = ((y_min + y_max) / 2) / img_size
        w = (x_max - x_min) / img_size
        h = (y_max - y_min) / img_size
        label_lines.append(f"{classid} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}")

    # Add distractor shapes
    if distractor_shapes and random.random() < 0.5:
        n = random.randint(1,3)
        for _ in range(n):
            img = random_distractor(img)

    # Blurring & brightness
    img = apply_random_blur(img)
    img = apply_brightness(img)

    fname = f"synthetic_{i:05d}.jpg"
    img_path = os.path.join(img_dir, fname)
    lbl_path = os.path.join(lbl_dir, fname.replace(".jpg", ".txt"))
    cv2.imwrite(img_path, img)
    with open(lbl_path, "w") as f:
        for line in label_lines:
            f.write(line + "\n")
    all_image_paths.append(img_path)

print("Dataset generated. Splitting into train/val...")

# --- Train/Val Split ---
train_imgs, val_imgs = train_test_split(all_image_paths, test_size=0.15, random_state=42)
for split, img_list in [("train", train_imgs), ("val", val_imgs)]:
    split_img_dir = os.path.join(output_dir, "images", split)
    split_lbl_dir = os.path.join(output_dir, "labels", split)
    os.makedirs(split_img_dir, exist_ok=True)
    os.makedirs(split_lbl_dir, exist_ok=True)
    for img_path in img_list:
        base = os.path.basename(img_path)
        lbl_path = os.path.join(lbl_dir, base.replace(".jpg",".txt"))
        os.rename(img_path, os.path.join(split_img_dir, base))
        os.rename(lbl_path, os.path.join(split_lbl_dir, base.replace(".jpg",".txt")))

print("Done! Final structure is:")
print(f"{output_dir}/images/train/  (images for training)")
print(f"{output_dir}/images/val/    (images for validation)")
print(f"{output_dir}/labels/train/  (labels for training)")
print(f"{output_dir}/labels/val/    (labels for validation)")

# --- YAML for YOLOv8 ---
yaml = f"""
path: {os.path.abspath(output_dir)}
train: images/train
val: images/val
nc: {len(colors)}
names: {class_names}
"""
with open(os.path.join(output_dir, "data.yaml"), "w") as f:
    f.write(yaml)

print("data.yaml written!")

Generating images...


  0%|          | 0/2000 [00:00<?, ?it/s]

100%|██████████| 2000/2000 [00:07<00:00, 270.38it/s]


Dataset generated. Splitting into train/val...
Done! Final structure is:
synthetic_squares/images/train/  (images for training)
synthetic_squares/images/val/    (images for validation)
synthetic_squares/labels/train/  (labels for training)
synthetic_squares/labels/val/    (labels for validation)
data.yaml written!


In [13]:
# Training YOLOv8 on the synthetic dataset
import torch
from ultralytics import YOLO

# Configuration for YOLOv8 training
config = {
    "model_size": "n",        # n (nano), s (small), m (medium), l (large), x (xlarge)
    "epochs": 50,             # Number of training epochs
    "batch_size": 16,         # Batch size
    "img_size": img_size,     # Use same image size as generated dataset
    "patience": 10,           # Early stopping patience
    "device": 0 if torch.cuda.is_available() else "cpu"  # Use GPU if available
}

print(f"Training YOLOv8{config['model_size']} model on synthetic squares dataset")
print(f"Using device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")

# Initialize pre-trained YOLOv8 model
model = YOLO(f"yolov8{config['model_size']}.pt")

# Train the model
results = model.train(
    data=os.path.join(output_dir, "data.yaml"),
    epochs=config['epochs'],
    imgsz=config['img_size'],
    batch=config['batch_size'],
    patience=config['patience'],
    device=config['device'],
    verbose=True
)

print("\nTraining complete!")
print(f"Model saved at: {os.path.join(model.trainer.save_dir, 'weights', 'best.pt')}")

# Validate the model
val_results = model.val()
print(f"Validation metrics: mAP@0.5={val_results.box.map50:.4f}, mAP@0.5:0.95={val_results.box.map:.4f}")

# Optional: Run inference on a sample image
sample_img = os.path.join(output_dir, "images", "val", os.listdir(os.path.join(output_dir, "images", "val"))[0])
results = model.predict(source=sample_img, save=True, conf=0.25)
print(f"Inference test complete. Results saved at: {model.predictor.save_dir}")

Creating new Ultralytics Settings v0.0.6 file  
View Ultralytics Settings with 'yolo settings' or at 'C:\Users\frjh19\AppData\Roaming\Ultralytics\settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Training YOLOv8n model on synthetic squares dataset
Using device: CPU
Ultralytics 8.3.141  Python-3.10.4 torch-2.7.0+cpu CPU (13th Gen Intel Core(TM) i9-13900)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=synthetic_squares\data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=No

100%|██████████| 755k/755k [00:00<00:00, 11.7MB/s]

Overriding model.yaml nc=80 with nc=6

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics




Transferred 319/355 items from pretrained weights
Freezing layer 'model.22.dfl.conv.weight'
[34m[1mtrain: [0mFast image access  (ping: 0.00.0 ms, read: 3.42.3 MB/s, size: 13.5 KB)


[34m[1mtrain: [0mScanning C:\Users\frjh19\RoboticsProject\Robotics\synthetic_squares\labels\train... 1700 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1700/1700 [00:01<00:00, 1089.55it/s]


[34m[1mtrain: [0mNew cache created: C:\Users\frjh19\RoboticsProject\Robotics\synthetic_squares\labels\train.cache
[34m[1mval: [0mFast image access  (ping: 0.00.0 ms, read: 6.83.3 MB/s, size: 23.3 KB)


[34m[1mval: [0mScanning C:\Users\frjh19\RoboticsProject\Robotics\synthetic_squares\labels\val... 300 images, 0 backgrounds, 0 corrupt: 100%|██████████| 300/300 [00:00<00:00, 1113.54it/s]


[34m[1mval: [0mNew cache created: C:\Users\frjh19\RoboticsProject\Robotics\synthetic_squares\labels\val.cache




Plotting labels to runs\detect\train\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 224 train, 224 val
Using 0 dataloader workers
Logging results to [1mruns\detect\train[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50         0G     0.9239      2.745      1.149         10        224: 100%|██████████| 107/107 [00:24<00:00,  4.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:01<00:00,  5.34it/s]

                   all        300        605      0.663      0.594      0.635       0.51






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/50         0G     0.7583      1.107      1.086         14        224: 100%|██████████| 107/107 [00:24<00:00,  4.45it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:01<00:00,  5.65it/s]

                   all        300        605      0.876      0.844      0.912      0.768






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/50         0G     0.7274     0.9635      1.076         10        224: 100%|██████████| 107/107 [00:24<00:00,  4.45it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:01<00:00,  5.62it/s]

                   all        300        605      0.893      0.881      0.942      0.804






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/50         0G     0.7038     0.8929      1.059         14        224: 100%|██████████| 107/107 [00:23<00:00,  4.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:01<00:00,  5.70it/s]

                   all        300        605      0.946      0.911      0.953      0.839






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/50         0G     0.6533     0.8191      1.037          8        224: 100%|██████████| 107/107 [00:24<00:00,  4.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:01<00:00,  5.60it/s]

                   all        300        605      0.933      0.917      0.957      0.835






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/50         0G     0.6192     0.7535      1.008         18        224: 100%|██████████| 107/107 [00:24<00:00,  4.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:01<00:00,  5.77it/s]

                   all        300        605      0.954      0.906      0.959      0.852






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/50         0G     0.5989     0.7419      1.009         14        224: 100%|██████████| 107/107 [00:24<00:00,  4.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:01<00:00,  5.78it/s]

                   all        300        605      0.953      0.928      0.969      0.857






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/50         0G     0.5967     0.7041      1.005         17        224: 100%|██████████| 107/107 [00:24<00:00,  4.34it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:01<00:00,  5.49it/s]

                   all        300        605      0.979      0.921      0.969      0.865






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/107 [00:00<?, ?it/s]


KeyboardInterrupt: 

In [12]:
!pip install ultralytics

Defaulting to user installation because normal site-packages is not writeable
Collecting ultralytics
  Downloading ultralytics-8.3.141-py3-none-any.whl.metadata (37 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Downloading torchvision-0.22.0-cp310-cp310-win_amd64.whl.metadata (6.3 kB)
Collecting py-cpuinfo (from ultralytics)
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.141-py3-none-any.whl (1.0 MB)
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ---------------------------------------- 1.0/1.0 MB 23.7 MB/s eta 0:00:00
Downloading torchvision-0.22.0-cp310-cp310-win_amd64.whl (1.7 MB)
   ---------------------------------------- 0.0/1.7 MB ? eta -:--:--
   ---------------------------------------- 1.7/1.7 MB 46.9 MB/s eta 0:00:00
Downloading ultralytics_thop-2.0.14-py3-none-any.whl