In [55]:
import numpy as np
import torch

from PIL import Image, ImageDraw, ImageFont
from torchvision import transforms

from model.model import DTSegmentationNetwork
from dataloader.dataloader import DTSegmentationDataset

import os
import time

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
# Load the model
model = torch.load('model_v8_0_029.pt')

In [5]:
PATH_TO_IMAGES = "testing/out/"
# Fetch all image names from the folder
image_names = [f for f in os.listdir(PATH_TO_IMAGES) if os.path.isfile(os.path.join(PATH_TO_IMAGES, f)) and f.startswith('image_')]
print(f"Found {len(image_names)} images")

Found 19 images


### Benchmarking

In [6]:
# For all images loaded, measure the average time needed to feed the image through the network
# device = torch.device('cpu')
# print(f"Using device {device}")
# model = model.to(device)
model.eval()
avg_time = 0
tensor_transform = transforms.ToTensor()
for image_name in image_names:
    # Load the image
    print(f"Loading image {image_name}...")
    start_time = time.time()
    img = Image.open(os.path.join(PATH_TO_IMAGES, image_name)).convert('RGB')
    # Convert the image to a tensor
    img = tensor_transform(img)
    convert_time = time.time() - start_time
    # Move the image to the device
    # img = img.to(device)
    # Feed the image through the network
    prediction = torch.argmax(model(img.unsqueeze(0)), dim=1)[0]
    prediction_time = time.time() - (start_time + convert_time)    
    avg_time += prediction_time

    # pil_prediction = transforms.ToPILImage()(DTSegmentationDataset.label_img_to_rgb(prediction))
    # pil_prediction.save(f"testing/out/{image_name.replace('image_', 'prediction_middlelane_')}")
    
avg_time /= len(image_names)
print(f"Average time needed to feed an image through the network: {avg_time:.3f} seconds")

Loading image image_1673208199.9477482.png...


[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.


Loading image image_1673208177.049033.png...
Loading image image_1673208078.994764.png...
Loading image image_1673208217.92832.png...
Loading image image_1673208224.555704.png...
Loading image image_1673208134.1926.png...
Loading image image_1673208122.1896372.png...
Loading image image_1673208206.550975.png...
Loading image image_1673208168.844874.png...
Loading image image_1673208193.854777.png...
Loading image image_1673208243.706476.png...
Loading image image_1673208152.717825.png...
Loading image image_1673208161.065065.png...
Loading image image_1673208187.4467032.png...
Loading image image_1673208237.516394.png...
Loading image image_1673208250.310034.png...
Loading image image_1673208181.921555.png...
Loading image image_1673208143.3403552.png...
Loading image image_1673208231.460027.png...
Average time needed to feed an image through the network: 0.165 seconds


### Trajectory calculation

In [None]:
# Calculate the mass center of green pixels in the prediction
# (the mass center of the ego lane)
# mass_center = np.array(np.where(np.array(prediction) == DTSegmentationDataset.SEGM_LABELS['Ego Lane']['rgb_value'])).mean(axis=1).astype(int)

# # Draw a circle where the mass center is (used when detecting road and not lane (!))
# drawer = ImageDraw.Draw(merged)
# drawer.ellipse((mass_center[1] - 5, mass_center[0] - 5, mass_center[1] + 5, mass_center[0] + 5), fill=(255, 0, 0))
# # Draw a circle in the image middle + a vertical line in the middle
# drawer.ellipse((image.width // 2 - 5, image.height // 2 - 5, image.width // 2 + 5, image.height // 2 + 5), fill=(0, 0, 0))
# drawer.line((image.width // 2, 0, image.width // 2, image.height), fill=(0, 0, 0), width=3)
# # Draw a line from the center of the image to the mass center + a horizontal line from the mass center to the middle line
# drawer.line((image.width // 2, image.height // 2, mass_center[1], mass_center[0]), fill=(0, 0, 0), width=3)
# drawer.line((mass_center[1], mass_center[0], image.width // 2, mass_center[0]), fill=(0, 0, 0), width=3)
# # Add a caption to the line from the mass center to the middle line with its length
# drawer.text((mass_center[1] + 10, mass_center[0] - 10), f"{abs(mass_center[1] - image.width // 2)} px", fill=(0, 0, 0))

In [58]:
CUT_TOP_FACTOR = 0.2

# Generate a single image that has all images with their predictions overlayed in a grid (4 pictures per row)
final_image = Image.new('RGB', (640 * 4, int(480 * (1 - CUT_TOP_FACTOR)) * (len(image_names) // 4 + 1)))

for index, image_name in enumerate(image_names):
    # Load the image and the prediction
    image = Image.open(PATH_TO_IMAGES + image_name).convert("RGB")
    prediction = Image.open(PATH_TO_IMAGES + image_name.replace('image_', 'prediction_middlelane_')).convert("RGB")
    # Cut the top CUT_TOP_FACTOR % of the image and the prediction
    image = image.crop((0, int(image.height * CUT_TOP_FACTOR), image.width, image.height))
    prediction = prediction.crop((0, int(prediction.height * CUT_TOP_FACTOR), prediction.width, prediction.height))

    # Overlay the prediction on the image
    merged = Image.blend(image, prediction, 0.5)
    
    # Draw a line fitted all green pixels in the prediction if there are any
    if np.any(np.array(prediction) == DTSegmentationDataset.SEGM_LABELS['Ego Lane']['rgb_value']):
        drawer = ImageDraw.Draw(merged)
        # Get all green pixels in the prediction
        lane_pixels = np.where(np.array(prediction) == DTSegmentationDataset.SEGM_LABELS['Ego Lane']['rgb_value'])
        
        # Use a fitting function that is robust for outliers
        # (https://stackoverflow.com/questions/22239691/code-for-line-of-best-fit-of-a-scatter-plot-in-python)
        line = np.polyfit(lane_pixels[1], lane_pixels[0], 1, full=True)
        # Draw the line
        drawer.line((0, line[0][1], image.width, line[0][0] * image.width + line[0][1]), fill=(255, 0, 0), width=5)
        # # Add a caption with bigger font size to the line with its angle
        angle = np.arctan(line[0][0]) * 180 / np.pi + 90
        drawer.text((10, 10), f"Angle: {angle:.2f}°", fill=(255, 0, 0))
    
    if index == 0:
        print(f"Showing image {image_name} and its prediction")
        image.show()
        prediction.show()
    # Paste the image with the prediction and the lines on the final image
    width_offset, height_offset = index % 4 * 640, (index // 4) * int(480 * (1 - CUT_TOP_FACTOR))
    print(f"Pasting image {image_name} at ({width_offset}, {height_offset})")
    final_image.paste(merged, (width_offset, height_offset))

final_image.show()

Showing image image_1673208199.9477482.png and its prediction
Pasting image image_1673208199.9477482.png at (0, 0)
Pasting image image_1673208177.049033.png at (640, 0)
Pasting image image_1673208078.994764.png at (1280, 0)
Pasting image image_1673208217.92832.png at (1920, 0)
Pasting image image_1673208224.555704.png at (0, 384)
Pasting image image_1673208134.1926.png at (640, 384)
Pasting image image_1673208122.1896372.png at (1280, 384)
Pasting image image_1673208206.550975.png at (1920, 384)
Pasting image image_1673208168.844874.png at (0, 768)
Pasting image image_1673208193.854777.png at (640, 768)
Pasting image image_1673208243.706476.png at (1280, 768)
Pasting image image_1673208152.717825.png at (1920, 768)
Pasting image image_1673208161.065065.png at (0, 1152)
Pasting image image_1673208187.4467032.png at (640, 1152)
Pasting image image_1673208237.516394.png at (1280, 1152)
Pasting image image_1673208250.310034.png at (1920, 1152)
Pasting image image_1673208181.921555.png at (