In [203]:
import subprocess
import cv2 
import numpy as np
import shutil
import os 

In [204]:
def run_conv2d(mode, engine, kernel_type, input_path, output_path): 
    cmd = [
        "./02-run.sh",
        "--mode", mode,
        "--engine", engine,
        "--ktype", kernel_type,
        "--ksize", "3",
        "--input", input_path,
        "--output", output_path,
        "--color", "rgb"
    ]
    
    result = subprocess.run(cmd, capture_output=True, text=True)
    
    if result.returncode != 0:
        print("Error running C++ program:")
        print(result.stderr)
        return None 
    
    return cv2.imread(output_path, cv2.IMREAD_GRAYSCALE)

In [205]:
def detect_rectangle(mode, engine, input_image):
    
    tmp_dir = "tmp"
    os.makedirs(tmp_dir, exist_ok=True)

    gx_img = run_conv2d(mode, engine, "sobel_x", input_image, tmp_dir + "/gx.png")
    gy_img = run_conv2d(mode, engine, "sobel_y", input_image, tmp_dir + "/gy.png")

    gradient = np.sqrt(gx_img.astype(float) ** 2 + gy_img.astype(float) ** 2)
    max_val = gradient.max()
    if max_val > 0:
        gradient = (gradient / max_val) * 255
        
    gradient = gradient.astype(np.uint8)

    shutil.rmtree(tmp_dir)

    # Edge detection
    binary = cv2.Canny(gradient, 50, 150)

    kernel = np.ones((3,3), np.uint8)
    binary = cv2.dilate(binary, kernel, iterations=2)

    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary, connectivity=8)

    best_box = None
    best_area = 0

    for i in range(1, num_labels):
        x = stats[i, cv2.CC_STAT_LEFT]
        y = stats[i, cv2.CC_STAT_TOP]
        w = stats[i, cv2.CC_STAT_WIDTH]
        h = stats[i, cv2.CC_STAT_HEIGHT]
        area = stats[i, cv2.CC_STAT_AREA]

        if area > best_area:
            best_area = area
            best_box = (x, y, w, h)

    return best_box



In [206]:
def generate_dataset(output_dir="dataset", num_images=300):
    os.makedirs(output_dir, exist_ok=True)

    ground_truths = {}

    IMG_SIZE = 256

    for i in range(num_images):
        # White background
        img = np.ones((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8) * 255

        shape_type = random.choice(["rectangle", "circle"])

        # Random color (avoid white)
        color = (
            random.randint(0, 200),
            random.randint(0, 200),
            random.randint(0, 200)
        )

        if shape_type == "rectangle":
            w = random.randint(40, 120)
            h = random.randint(40, 120)
            x = random.randint(0, IMG_SIZE - w - 1)
            y = random.randint(0, IMG_SIZE - h - 1)

            cv2.rectangle(img, (x, y), (x+w, y+h), color, -1)

            gt_box = (x, y, w, h)

        elif shape_type == "circle":
            r = random.randint(20, 60)
            cx = random.randint(r, IMG_SIZE - r - 1)
            cy = random.randint(r, IMG_SIZE - r - 1)

            cv2.circle(img, (cx, cy), r, color, -1)

            x = cx - r
            y = cy - r
            w = 2 * r
            h = 2 * r

            gt_box = (x, y, w, h)

        filename = f"img_{i:04d}.png"
        cv2.imwrite(os.path.join(output_dir, filename), img)

        ground_truths[filename] = {
            "shape": shape_type,
            "bbox": gt_box
        }

    with open(os.path.join(output_dir, "ground_truth.json"), "w") as f:
        json.dump(ground_truths, f, indent=4)

    print(f"Generated {num_images} images.")
    return ground_truths


In [207]:
def add_gaussian_noise_to_dataset(input_dir, output_dir, mean=0, std=15):
    """
    Adds Gaussian noise to all images in input_dir
    and saves them to output_dir.
    """

    os.makedirs(output_dir, exist_ok=True)

    for filename in os.listdir(input_dir):
        if filename.lower().endswith((".png", ".jpg", ".jpeg")):
            input_path = os.path.join(input_dir, filename)
            output_path = os.path.join(output_dir, filename)

            img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)

            if img is None:
                continue

            # Generate noise
            noise = np.random.normal(mean, std, img.shape)
            noisy = img.astype(np.float32) + noise

            # Clip to valid range
            noisy = np.clip(noisy, 0, 255).astype(np.uint8)

            cv2.imwrite(output_path, noisy)

    print(f"Finished adding Gaussian noise (std={std}) to dataset.")


In [209]:
clean_dataset_dir = "dataset_clean"
noisy_dataset_dir = "dataset_noisy"

gt = generate_dataset(clean_dataset_dir, 10)

add_gaussian_noise_to_dataset(
    clean_dataset_dir,
    noisy_dataset_dir,
    std=15
)

Generated 10 images.
Finished adding Gaussian noise (std=15) to dataset.


In [210]:
import json

with open(clean_dataset_dir + "/ground_truth.json") as f:
    ground_truths = json.load(f)

In [211]:
def compute_iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[0] + boxA[2], boxB[0] + boxB[2])
    yB = min(boxA[1] + boxA[3], boxB[1] + boxB[3])

    inter_w = max(0, xB - xA)
    inter_h = max(0, yB - yA)
    inter_area = inter_w * inter_h

    areaA = boxA[2] * boxA[3]
    areaB = boxB[2] * boxB[3]

    union = areaA + areaB - inter_area

    if union == 0:
        return 0

    return inter_area / union


In [212]:
TP = 0
FP = 0
FN = 0
ious = []

for filename, data in ground_truths.items():
    path = os.path.join(clean_dataset_dir, filename)

    gt_box = tuple(data["bbox"]) 
    
    pred_box = detect_rectangle("functional", "avx", path)

    if pred_box is None:
        FN += 1
        continue

    iou = compute_iou(gt_box, pred_box)
    ious.append(iou)

    if iou >= 0.8:
        TP += 1
    else:
        FP += 1



In [213]:
total = len(ground_truths)

accuracy = TP / total
precision = TP / (TP + FP) if (TP + FP) > 0 else 0
recall = TP / (TP + FN) if (TP + FN) > 0 else 0
mean_iou = np.mean(ious)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("Mean IoU:", mean_iou)


Accuracy: 0.7
Precision: 0.7
Recall: 1.0
Mean IoU: 0.8316490910721328


In [214]:
import cv2
import os

def visualize_detection(image_path, gt_box=None, pred_box=None, save_path="debug_detection.png"):
    """
    Draw ground truth and predicted bounding boxes on image.
    
    Green  -> Ground truth
    Red    -> Prediction
    """

    # Load original image (color for visualization)
    img = cv2.imread(image_path)
    vis = img.copy()

    # Draw GT box
    if gt_box is not None:
        x, y, w, h = gt_box
        cv2.rectangle(vis, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(vis, "GT", (x, y - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

    # Draw predicted box
    if pred_box is not None:
        x, y, w, h = pred_box
        cv2.rectangle(vis, (x, y), (x + w, y + h), (0, 0, 255), 2)
        cv2.putText(vis, "PRED", (x, y - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)

    cv2.imwrite(save_path, vis)
    print(f"Saved visualization to {save_path}")


In [215]:
filename, data = next(iter(ground_truths.items()))
image_path = os.path.join(clean_dataset_dir, filename)

gt_box = tuple(data["bbox"])
pred_box = detect_rectangle("functional", "avx", image_path)

print("GT:", gt_box)
print("PRED:", pred_box)

visualize_detection(
    image_path,
    gt_box=gt_box,
    pred_box=pred_box,
    save_path="debug_result.png"
)

GT: (1, 69, 94, 94)
PRED: (np.int32(0), np.int32(66), np.int32(98), np.int32(100))
Saved visualization to debug_result.png
