In [204]:
import subprocess
import cv2 
import numpy as np
import shutil
import os 
import random
import json
import time

In [205]:
def run_conv2d(
    engine, 
    kernel_type, 
    kernel_size,
    input_path, 
    output_path,
    mode="functional"): 
    
    kernel_size_str = str(kernel_size)
    
    cmd = [
        "./02-run.sh",
        "--mode", mode,
        "--engine", engine,
        "--ktype", kernel_type,
        "--ksize", kernel_size_str,
        "--input", input_path,
        "--output", output_path,
        "--color", "rgb"
    ]
    
    result = subprocess.run(cmd, capture_output=True, text=True)
    
    if result.returncode != 0:
        print("Error running C++ program:")
        print(result.stderr)
        return None 
    
    return cv2.imread(output_path, cv2.IMREAD_GRAYSCALE)

In [206]:
def detect_rectangle(engine, input_image, mode="functional"):
        
    tmp_dir = "tmp"
    os.makedirs(tmp_dir, exist_ok=True)
    
    blur_img = run_conv2d(engine, "gaussian_blur", 7, input_image, tmp_dir + "/blur.png")

    gx_img = run_conv2d(engine, "sobel_x", 3, tmp_dir + "/blur.png", tmp_dir + "/gx.png")
    gy_img = run_conv2d(engine, "sobel_y", 3, tmp_dir + "/blur.png", tmp_dir + "/gy.png")
 
    gradient = np.sqrt(gx_img.astype(float) ** 2 + gy_img.astype(float) ** 2)
    max_val = gradient.max()
    if max_val > 0:
        gradient = (gradient / max_val) * 255
        
    gradient = gradient.astype(np.uint8)

    shutil.rmtree(tmp_dir)

    # Edge detection
    binary = cv2.Canny(gradient, 50, 150)

    kernel = np.ones((3,3), np.uint8)
    binary = cv2.dilate(binary, kernel, iterations=2)

    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary, connectivity=8)

    best_box = None
    best_area = 0

    for i in range(1, num_labels):
        x = stats[i, cv2.CC_STAT_LEFT]
        y = stats[i, cv2.CC_STAT_TOP]
        w = stats[i, cv2.CC_STAT_WIDTH]
        h = stats[i, cv2.CC_STAT_HEIGHT]
        area = stats[i, cv2.CC_STAT_AREA]

        if area > best_area:
            best_area = area
            best_box = (x, y, w, h)

    return best_box



In [207]:
def generate_dataset(output_dir="dataset", num_images=300):
    os.makedirs(output_dir, exist_ok=True)

    ground_truths = {}

    IMG_SIZE = 256

    for i in range(num_images):
        # White background
        img = np.ones((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8) * 255

        shape_type = random.choice(["rectangle", "circle"])

        # Random color (avoid white)
        color = (
            random.randint(0, 200),
            random.randint(0, 200),
            random.randint(0, 200)
        )

        if shape_type == "rectangle":
            w = random.randint(40, 120)
            h = random.randint(40, 120)
            x = random.randint(0, IMG_SIZE - w - 1)
            y = random.randint(0, IMG_SIZE - h - 1)

            cv2.rectangle(img, (x, y), (x+w, y+h), color, -1)

            gt_box = (x, y, w, h)

        elif shape_type == "circle":
            r = random.randint(20, 60)
            cx = random.randint(r, IMG_SIZE - r - 1)
            cy = random.randint(r, IMG_SIZE - r - 1)

            cv2.circle(img, (cx, cy), r, color, -1)

            x = cx - r
            y = cy - r
            w = 2 * r
            h = 2 * r

            gt_box = (x, y, w, h)

        filename = f"img_{i:04d}.png"
        cv2.imwrite(os.path.join(output_dir, filename), img)

        ground_truths[filename] = {
            "shape": shape_type,
            "bbox": gt_box
        }

    with open(os.path.join(output_dir, "ground_truth.json"), "w") as f:
        json.dump(ground_truths, f, indent=4)

    return ground_truths


In [208]:
def add_gaussian_noise_to_dataset(input_dir, output_dir, mean=0, std=15):
    """
    Adds Gaussian noise to all images in input_dir
    and saves them to output_dir.
    """

    os.makedirs(output_dir, exist_ok=True)
    
    shutil.copyfile(src=input_dir + "/ground_truth.json", dst=output_dir + "/ground_truth.json")

    for filename in os.listdir(input_dir):
        if filename.lower().endswith((".png", ".jpg", ".jpeg")):
            input_path = os.path.join(input_dir, filename)
            output_path = os.path.join(output_dir, filename)

            img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)

            if img is None:
                continue

            # Generate noise
            noise = np.random.normal(mean, std, img.shape)
            noisy = img.astype(np.float32) + noise

            # Clip to valid range
            noisy = np.clip(noisy, 0, 255).astype(np.uint8)

            cv2.imwrite(output_path, noisy)

In [209]:
clean_dataset_dir = "dataset_clean"
noisy_dataset_dir = "dataset_noisy"

n_images = 100

gt = generate_dataset(clean_dataset_dir, n_images)

add_gaussian_noise_to_dataset(
    clean_dataset_dir,
    noisy_dataset_dir,
    std=15
)

In [210]:
def compute_iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[0] + boxA[2], boxB[0] + boxB[2])
    yB = min(boxA[1] + boxA[3], boxB[1] + boxB[3])

    inter_w = max(0, xB - xA)
    inter_h = max(0, yB - yA)
    inter_area = inter_w * inter_h

    areaA = boxA[2] * boxA[3]
    areaB = boxB[2] * boxB[3]

    union = areaA + areaB - inter_area

    if union == 0:
        return 0

    return inter_area / union


In [211]:
def compute_metrics(dataset, engine, threshold=0.75):

    TP = 0
    FP = 0
    FN = 0
    ious = []

    with open(os.path.join(dataset, "ground_truth.json")) as f:
        ground_truths = json.load(f)
        
    start_time = time.perf_counter()

    for filename, data in ground_truths.items():

        path = os.path.join(dataset, filename)
        gt_box = tuple(data["bbox"]) 

        pred_box = detect_rectangle(engine, path)

        if pred_box is None:
            FN += 1
            continue

        iou = compute_iou(gt_box, pred_box)
        ious.append(iou)

        if iou >= threshold:
            TP += 1
        else:
            FP += 1

    elapsed = time.perf_counter() - start_time
    
    total = len(ground_truths)

    accuracy  = TP / total if total > 0 else 0
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall    = TP / (TP + FN) if (TP + FN) > 0 else 0
    mean_iou  = np.mean(ious) if ious else 0

    return accuracy, precision, recall, mean_iou, elapsed


In [214]:
engine  = "avx"
dataset = noisy_dataset_dir

accuracy, precision, recall, mean_iou, _ = compute_metrics(dataset, engine)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("Mean IoU:", mean_iou)


Accuracy: 0.73
Precision: 0.73
Recall: 1.0
Mean IoU: 0.7781965604556355


In [215]:
import cv2
import os

def visualize_detection(image_path, gt_box=None, pred_box=None, save_path="debug_detection.png"):
    """
    Draw ground truth and predicted bounding boxes on image.
    
    Green  -> Ground truth
    Red    -> Prediction
    """

    # Load original image 
    img = cv2.imread(image_path)
    vis = img.copy()

    # Draw GT box
    if gt_box is not None:
        x, y, w, h = gt_box
        cv2.rectangle(vis, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(vis, "GT", (x, y - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

    # Draw predicted box
    if pred_box is not None:
        x, y, w, h = pred_box
        cv2.rectangle(vis, (x, y), (x + w, y + h), (0, 0, 255), 2)
        cv2.putText(vis, "PRED", (x, y - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)

    cv2.imwrite(save_path, vis)

In [216]:
def visualize_detection_n(
    dataset, 
    engine,
    output_dir="detection",
    n=10):
    
    with open(dataset + "/ground_truth.json") as f:
        ground_truths = json.load(f)
    
    os.makedirs(output_dir, exist_ok=True)

    for _, (filename, data) in zip(range(n), ground_truths.items()):
        
        image_path = os.path.join(dataset, filename)

        gt_box = tuple(data["bbox"])
        pred_box = detect_rectangle(engine, image_path)
        
        output_path = output_dir + "/" + filename 
        
        visualize_detection(
            image_path,
            gt_box=gt_box,
            pred_box=pred_box,
            save_path=output_path
        )
        

In [203]:
engine  = "avx"
dataset = noisy_dataset_dir

visualize_detection_n(dataset, engine, n=5)

In [217]:
engines = ["baseline", "sse", "avx"]
dataset = noisy_dataset_dir

for engine in engines:
    acc, prec, rec, miou, t = compute_metrics(dataset, engine)

    print(f"\n=== {engine.upper()} ===")
    print(f"time      : {t:.4f}s")
    print(f"accuracy  : {acc:.3f}")
    print(f"precision : {prec:.3f}")
    print(f"recall    : {rec:.3f}")
    print(f"mean IoU  : {miou:.3f}")



=== BASELINE ===
time      : 20.2938s
accuracy  : 0.730
precision : 0.730
recall    : 1.000
mean IoU  : 0.778

=== SSE ===
time      : 20.0794s
accuracy  : 0.730
precision : 0.730
recall    : 1.000
mean IoU  : 0.778

=== AVX ===
time      : 20.1451s
accuracy  : 0.730
precision : 0.730
recall    : 1.000
mean IoU  : 0.778
