In [3]:
import os
import time
import numpy as np
import cv2
from glob import glob
from tqdm import tqdm
import tensorflow as tf
import torch
from ultralytics import YOLO

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/home/ahsan/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [4]:
# Seeding for reproducibility
os.environ["PYTHONHASHSEED"] = str(42)
np.random.seed(42)
tf.random.set_seed(42)

# Hyperparameters
height = 512
width = 768
num_classes = 3

# Paths
dataset_path = "/home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test"
save_path = "/home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/combined_result"
yolo_model_path = "/home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/files/aug/best.pt"
unet_model_path = "/home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/files/aug/unet-multiclass.keras"

In [5]:
# Create Save Directory
os.makedirs(save_path, exist_ok=True)

In [9]:
# Load YOLO Model
yolo_model = YOLO(yolo_model_path)

# Load U-Net Model
unet_model = tf.keras.models.load_model(unet_model_path, compile=False)

In [10]:
# Load test images
test_x = sorted(glob(os.path.join(dataset_path, "images", "*.png")))

In [11]:


# Mapping grayscale values to class labels for segmentation mask
class_colors = {
    0: (0, 0, 0),       # Non-Drivable Area (black)
    1: (79, 247, 211),  # My Way (Greenish-Yellow)
    2: (247, 93, 79)    # Other Way (Red)
}

# Track inference times
time_taken = []

for x_path in tqdm(test_x, desc="Processing Images"):
    name = os.path.basename(x_path)
    original_img = cv2.imread(x_path, cv2.IMREAD_COLOR)

    if original_img is None:
        print(f"Error loading image {x_path}, skipping.")
        continue

    # Resize image for model input
    resized_img = cv2.resize(original_img, (width, height))
    img_input = resized_img / 255.0
    img_input = np.expand_dims(img_input, axis=0)

    ####### 1. YOLO Object Detection #######
    yolo_results = yolo_model(x_path)[0]
    yolo_output = original_img.copy()

    for result in yolo_results.boxes:
        x1, y1, x2, y2 = map(int, result.xyxy[0].tolist())  # Bounding box coordinates
        confidence = result.conf[0].item()
        label = int(result.cls[0].item())

        # Draw bounding boxes
        cv2.rectangle(yolo_output, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(yolo_output, f"Class {label}: {confidence:.2f}", 
                    (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Save YOLO output image
    yolo_save_path = os.path.join(save_path, f"{name.split('.')[0]}_yolo.png")
    cv2.imwrite(yolo_save_path, yolo_output)

    ####### 2. U-Net Road Segmentation #######
    start_time = time.time()
    unet_prediction = unet_model.predict(img_input)[0]
    time_taken.append(time.time() - start_time)

    # Convert model output to mask
    predicted_mask = np.argmax(unet_prediction, axis=-1).astype(np.uint8)

    # Convert mask to color format
    seg_colored = np.zeros((height, width, 3), dtype=np.uint8)
    for label, color in class_colors.items():
        seg_colored[predicted_mask == label] = color

    # Save segmentation mask
    seg_save_path = os.path.join(save_path, f"{name.split('.')[0]}_segmentation.png")
    cv2.imwrite(seg_save_path, seg_colored)

    ####### 3. Combined Image (YOLO + Segmentation) #######
    combined_output = cv2.addWeighted(yolo_output, 0.6, seg_colored, 0.4, 0)

    # Save combined output
    combined_save_path = os.path.join(save_path, f"{name.split('.')[0]}_combined.png")
    cv2.imwrite(combined_save_path, combined_output)

Processing Images:   0%|          | 0/19 [00:00<?, ?it/s]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0008.png: 448x640 (no detections), 139.0ms
Speed: 3.1ms preprocess, 139.0ms inference, 0.3ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 956ms/step


Processing Images:   5%|▌         | 1/19 [00:02<00:51,  2.85s/it]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0010.png: 448x640 1 person, 84.3ms
Speed: 15.3ms preprocess, 84.3ms inference, 0.4ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 735ms/step


Processing Images:  11%|█         | 2/19 [00:03<00:28,  1.70s/it]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0014.png: 448x640 (no detections), 85.7ms
Speed: 0.8ms preprocess, 85.7ms inference, 0.2ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 742ms/step


Processing Images:  16%|█▌        | 3/19 [00:04<00:21,  1.33s/it]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0016.png: 448x640 (no detections), 91.0ms
Speed: 15.8ms preprocess, 91.0ms inference, 0.2ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 739ms/step


Processing Images:  21%|██        | 4/19 [00:05<00:17,  1.16s/it]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0035.png: 448x640 (no detections), 83.6ms
Speed: 0.8ms preprocess, 83.6ms inference, 0.2ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 756ms/step


Processing Images:  26%|██▋       | 5/19 [00:06<00:14,  1.06s/it]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0039.png: 448x640 (no detections), 82.3ms
Speed: 15.7ms preprocess, 82.3ms inference, 0.2ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 723ms/step


Processing Images:  32%|███▏      | 6/19 [00:07<00:12,  1.00it/s]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0040.png: 448x640 (no detections), 83.6ms
Speed: 0.9ms preprocess, 83.6ms inference, 0.2ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 730ms/step


Processing Images:  37%|███▋      | 7/19 [00:08<00:11,  1.05it/s]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0050.png: 448x640 1 person, 1 bicycle, 103.5ms
Speed: 17.6ms preprocess, 103.5ms inference, 0.3ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 740ms/step


Processing Images:  42%|████▏     | 8/19 [00:09<00:10,  1.06it/s]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0057.png: 448x640 (no detections), 102.0ms
Speed: 0.9ms preprocess, 102.0ms inference, 0.2ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 741ms/step


Processing Images:  47%|████▋     | 9/19 [00:09<00:09,  1.07it/s]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0060.png: 448x640 1 person, 1 rickshaw van, 92.5ms
Speed: 0.8ms preprocess, 92.5ms inference, 0.3ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 729ms/step


Processing Images:  53%|█████▎    | 10/19 [00:11<00:09,  1.08s/it]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0062.png: 448x640 1 person, 1 motorcycle, 87.0ms
Speed: 16.0ms preprocess, 87.0ms inference, 0.4ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 728ms/step


Processing Images:  58%|█████▊    | 11/19 [00:12<00:08,  1.02s/it]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0074.png: 448x640 (no detections), 106.8ms
Speed: 0.8ms preprocess, 106.8ms inference, 0.2ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 721ms/step


Processing Images:  63%|██████▎   | 12/19 [00:13<00:06,  1.02it/s]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0083.png: 448x640 (no detections), 84.4ms
Speed: 15.4ms preprocess, 84.4ms inference, 0.2ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 738ms/step


Processing Images:  68%|██████▊   | 13/19 [00:14<00:06,  1.12s/it]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0084.png: 448x640 1 person, 1 motorcycle, 88.2ms
Speed: 15.5ms preprocess, 88.2ms inference, 0.3ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 725ms/step


Processing Images:  74%|███████▎  | 14/19 [00:15<00:05,  1.05s/it]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0093.png: 448x640 1 auto rickshaw, 1 private car, 90.6ms
Speed: 0.9ms preprocess, 90.6ms inference, 0.3ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 735ms/step


Processing Images:  79%|███████▉  | 15/19 [00:16<00:03,  1.00it/s]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0099.png: 448x640 1 rickshaw van, 92.1ms
Speed: 19.5ms preprocess, 92.1ms inference, 0.3ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 737ms/step


Processing Images:  84%|████████▍ | 16/19 [00:17<00:02,  1.03it/s]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0104.png: 448x640 (no detections), 114.2ms
Speed: 0.9ms preprocess, 114.2ms inference, 0.2ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 743ms/step


Processing Images:  89%|████████▉ | 17/19 [00:18<00:01,  1.05it/s]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0109.png: 448x640 (no detections), 86.3ms
Speed: 0.8ms preprocess, 86.3ms inference, 0.2ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 733ms/step


Processing Images:  95%|█████████▍| 18/19 [00:19<00:00,  1.08it/s]


image 1/1 /home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug/test/images/0115.png: 448x640 (no detections), 92.1ms
Speed: 15.4ms preprocess, 92.1ms inference, 0.2ms postprocess per image at shape (1, 3, 448, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 726ms/step


Processing Images: 100%|██████████| 19/19 [00:19<00:00,  1.05s/it]


In [None]:
# FPS Calculation
mean_time = np.mean(time_taken)
mean_fps = 1 / mean_time
print(f"Mean time taken per image: {mean_time:.4f} seconds")
print(f"Mean FPS: {mean_fps:.2f}")
