In [1]:
%cd ..

/home/romet/projects/ut/wp4/nvidia-e2e


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import random
import cv2


#import torch
#import torch.nn as nn
#import torchvision
#from torchvision import transforms

from tqdm.auto import tqdm

from dataloading.nvidia import NvidiaCropWide, Normalize, NvidiaDataset
from trainer import Trainer

%load_ext autoreload
%autoreload 2

In [3]:
xmin = 300
xmax = 1620

ymin = 520
ymax = 864

height = ymax - ymin
width = xmax - xmin

scale = 0.2
scaled_width = int(scale*width)
scaled_height = int(scale*height)

def crop(img):
    return img[ymin:ymax, xmin:xmax, :]

def resize(img):
    return cv2.resize(img, dsize=(scaled_width, scaled_height), interpolation=cv2.INTER_LINEAR)

def normalise(img):
    return (img / 255)

def preprocess(img):
    img = crop(img)
    img = resize(img)
    img = normalise(img)
    return img

def read_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = preprocess(img)
    return img

def create_tensor(img):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')    
    img_tensor = torch.FloatTensor(img).to(device).permute(2, 0, 1).unsqueeze(0)
    return img_tensor

In [4]:
# image_path = validset.image_paths[0]
# img = read_image(image_path)
# plt.imshow(img)

In [5]:
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit

import os

# Simple helper data class that's a little nicer to use than a 2-tuple.
class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()

class TensorrtModel:
    def __init__(self, onnx_file_path):
        self.init_tensorrt(onnx_file_path)
        
    def predict(self, img):
        self.inputs[0].host = img
        out = self.do_inference_v2(self.context, self.bindings, self.inputs, self.outputs, self.stream)[0]
        if type(out) == np.ndarray:
            out = out[0]
        return out
    
    # Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
    def allocate_buffers(self, engine):
        inputs = []
        outputs = []
        bindings = []
        stream = cuda.Stream()
        for binding in engine:
            size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            bindings.append(int(device_mem))
            # Append to the appropriate list.
            if engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))
        return inputs, outputs, bindings, stream

    # This function is generalized for multiple inputs/outputs for full dimension networks.
    # inputs and outputs are expected to be lists of HostDeviceMem objects.
    def do_inference_v2(self, context, bindings, inputs, outputs, stream):
        # Transfer input data to the GPU.
        [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
        # Run inference.
        context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
        # Transfer predictions back from the GPU.
        [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
        # Synchronize the stream
        stream.synchronize()
        # Return only the host outputs.
        return [out.host for out in outputs]

    def build_engine_from_onnx(self, onnx_file_path):
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
        with trt.Logger() as trt_logger, trt.Builder(trt_logger) as builder, builder.create_network(
                EXPLICIT_BATCH) as network, trt.OnnxParser(network, trt_logger) as parser:
            builder.max_workspace_size = 1 << 28  # 256MiB
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found.'.format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                if not parser.parse(model.read()):
                    print('ERROR: Failed to parse the ONNX file.')
                    for error in range(parser.num_errors):
                        print(parser.get_error(error))
                    return None
            # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
            # network.get_input(0).shape = [1, 3, 128, 1024 ]
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            return engine

    def init_tensorrt(self, onnx_file_path):
        engine = self.build_engine_from_onnx(onnx_file_path)
        self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers(engine)
        self.context = engine.create_execution_context()

In [6]:
model_path = "models/20211124220319_nvidia-v3"
tensorrt_model = TensorrtModel(f"{model_path}/best.onnx")

Loading ONNX file from path models/20211124220319_nvidia-v3/best.onnx...
Beginning ONNX file parsing
Completed parsing of ONNX file
Building an engine from file models/20211124220319_nvidia-v3/best.onnx; this may take a while...
Completed creating Engine


In [8]:
root_path = Path("/home/romet/data2/datasets/rally-estonia/dataset")
valid_paths = [root_path / "2021-10-26-10-49-06_e2e_rec_ss20_elva"]
validset = NvidiaDataset(valid_paths, camera="front_wide")

/home/romet/data2/datasets/rally-estonia/dataset/2021-10-26-10-49-06_e2e_rec_ss20_elva: 33045
/home/romet/data2/datasets/rally-estonia/dataset/2021-10-26-10-49-06_e2e_rec_ss20_elva: 33045


In [9]:
tensorrt_predictions = []

for path in tqdm(validset.frames.image_path.to_numpy()):
    img = read_image(path)
    img = np.transpose(img, [2, 0, 1])
    img = np.expand_dims(img, axis=0)
    img = np.array(img, dtype=np.float32, order='C')
    prediction = tensorrt_model.predict(img)
    tensorrt_predictions.append(prediction)

  0%|          | 0/33045 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
trainer = Trainer("")
torch_model = trainer.load_model(f"{model_path}/best.pt")
torch_model.eval()

In [None]:
torch_predictions = []

for path in tqdm(validset.frames.image_path.to_numpy()):
    img = read_image(path)
    img_tensor = create_tensor(img)
    prediction = torch_model(img_tensor).squeeze(1)
    torch_predictions.append(prediction.item())

In [None]:
tr = transforms.Compose([NvidiaCropWide(), Normalize()])
validset_tr = NvidiaDataset(valid_paths, tr)
torch_transforms_predictions = trainer.predict(torch_model, validset_tr)

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 30))

ax1.plot(validset.vehicle_speed)
ax1.plot(torch_transforms_predictions)
ax1.legend(["ground truth", "predictions"])
ax1.set_title("PyTorch model with transforms")

ax2.plot(validset.vehicle_speed)
ax2.plot(torch_predictions)
ax2.legend(["ground truth", "predictions"])
ax2.set_title("PyTorch model with numpy/cv2 preprocessing")

ax3.plot(validset.vehicle_speed)
ax3.plot(tensorrt_predictions)
ax3.legend(["ground truth", "predictions"])
ax3.set_title("TensorRT");

In [None]:
import math
import cv2
import shutil
from skimage import io
import os
from moviepy.editor import ImageSequenceClip


def draw_steering_angle(frame, steering_angle, steering_wheel_radius, steering_position, size, color):
    steering_angle_rad = math.radians(steering_angle)
    x = steering_wheel_radius * np.cos(np.pi / 2 + steering_angle_rad)
    y = steering_wheel_radius * np.sin(np.pi / 2 + steering_angle_rad)
    cv2.circle(frame, (steering_position[0] + int(x), steering_position[1] - int(y)), size, color, thickness=-1)
    
def draw_frames(dataset, predicted_angles, temp_frames_folder):
    
    for frame_index, data in tqdm(enumerate(dataset), total=len(dataset)):
        frame = data["image"].permute(1, 2, 0).cpu().numpy()
        true_angle = math.degrees(data["steering_angle"])
        pred_angle = math.degrees(predicted_angles[frame_index])
        
        cv2.putText(frame, 'True: {:.2f}deg'.format(true_angle), (10, 1150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2,
                    cv2.LINE_AA)
        cv2.putText(frame, 'Pred: {:.2f}deg'.format(pred_angle), (10, 1200), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2,
                    cv2.LINE_AA)
    
        radius = 200
        steering_pos = (960, 1200)
        cv2.circle(frame, steering_pos, radius, (255, 255, 255), 7)

        draw_steering_angle(frame, true_angle, radius, steering_pos, 13, (0, 255, 0))
        draw_steering_angle(frame, pred_angle, radius, steering_pos, 9, (255, 0, 0))
            
        io.imsave(f"{temp_frames_folder}/{frame_index + 1:05}.jpg", frame)
        

def convert_frames_to_video(frames_folder, output_video_path, fps=25):
    output_folder = Path(os.path.split(output_video_path)[:-1][0])
    output_folder.mkdir(parents=True, exist_ok=True)

    p = Path(frames_folder).glob('**/*.jpg')
    image_list = sorted([str(x) for x in p if x.is_file()])

    print("Creating video {}, FPS={}".format(frames_folder, fps))
    clip = ImageSequenceClip(image_list, fps=fps)
    clip.write_videofile(output_video_path)


In [None]:
temp_frames_folder = Path("./temp_frames")
shutil.rmtree(temp_frames_folder, ignore_errors=True)
temp_frames_folder.mkdir()

draw_frames(validset, tensorrt_predictions, temp_frames_folder)
output_video = "output/wide-v2-ss6.mp4"
convert_frames_to_video(temp_frames_folder, output_video, fps=30.0)

shutil.rmtree(temp_frames_folder, ignore_errors=True)

In [None]:
from IPython.display import HTML


HTML(f"""
<video width="640" height="480" controls>
  <source src="{output_video}" type="video/mp4">
</video>
""")

In [None]:
output_video