# Install TensorRT and Cuda Using Pip

In [None]:
!pip install nvidia-pyindex
!pip install --upgrade nvidia-tensorrt
!pip install pycuda

Collecting nvidia-pyindex
  Downloading nvidia-pyindex-1.0.9.tar.gz (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: nvidia-pyindex
  Building wheel for nvidia-pyindex (setup.py) ... [?25l[?25hdone
  Created wheel for nvidia-pyindex: filename=nvidia_pyindex-1.0.9-py3-none-any.whl size=8418 sha256=20baf76efb576f259ab495c208504e3bfa731471df4becbc4ec004f65d8843a4
  Stored in directory: /root/.cache/pip/wheels/2c/af/d0/7a12f82cab69f65d51107f48bcd6179e29b9a69a90546332b3
Successfully built nvidia-pyindex
Installing collected packages: nvidia-pyindex
Successfully installed nvidia-pyindex-1.0.9
Collecting nvidia-tensorrt
  Downloading nvidia_tensorrt-99.0.0-py3-none-manylinux_2_17_x86_64.whl (17 kB)
Collecting tensorrt (from nvidia-tensorrt)
  Downloading tensorrt-8.6.1.post1.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: tensorrt
  Building wheel for tensorrt (setup.py) ... [?25

# Import Python Modules

In [None]:
import cv2
import torch
import random
import time
import numpy as np
import tensorrt as trt
from collections import OrderedDict,namedtuple

# allows getting of files from google drive
import gdown

# Get Files from Google Drive

In [None]:
# performance test zip file url
perform_test_zip_url = "https://drive.google.com/file/d/1bIwzoKGgnfFMZusOOKcJ61yIbv9mZKUn/view?usp=sharing"
perform_test_zip_output = "Performance Test Sets.zip"

gdown.download(perform_test_zip_url, perform_test_zip_output, quiet = False, fuzzy = True)

# model file
# model_url = "https://drive.google.com/file/d/1FcPeDwU1f56uWHqapy7Nuas1QgjwYyx7/view?usp=sharing"
# model_output = "yolov7-self-driving-A100.trt"

# model_url = "https://drive.google.com/file/d/1OizWs0pjEdCdTQj17-HpA0da7v9CMff0/view?usp=sharing"
# model_output = "yolov7-self-driving-v100.trt"

model_url = "https://drive.google.com/file/d/1-8qbCDQmtfOrErRvV274DG7ERnhyfyO9/view?usp=sharing"
model_output = "yolov7-self-driving-T4.trt"

gdown.download(model_url, model_output, quiet = False, fuzzy = True)

Downloading...
From (original): https://drive.google.com/uc?id=1bIwzoKGgnfFMZusOOKcJ61yIbv9mZKUn
From (redirected): https://drive.google.com/uc?id=1bIwzoKGgnfFMZusOOKcJ61yIbv9mZKUn&confirm=t&uuid=82e4d526-5e6c-4077-9be1-42bef4aa1ec9
To: /content/Performance Test Sets.zip
100%|██████████| 3.05G/3.05G [00:47<00:00, 63.5MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-8qbCDQmtfOrErRvV274DG7ERnhyfyO9
To: /content/yolov7-self-driving-T4.trt
100%|██████████| 75.4M/75.4M [00:00<00:00, 83.9MB/s]


'yolov7-self-driving-T4.trt'

# Unzip Performance Tests Zip

In [None]:
! unzip "/content/Performance Test Sets.zip"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Performance Test Sets/Mixed/002_GOPR3761_049821.jpg  
  inflating: Performance Test Sets/Mixed/002_GOPR3761_049830.jpg  
  inflating: Performance Test Sets/Mixed/002_GOPR3761_049972.jpg  
  inflating: Performance Test Sets/Mixed/002_GOPR3761_049973.jpg  
  inflating: Performance Test Sets/Mixed/002_GOPR3761_049996.jpg  
  inflating: Performance Test Sets/Mixed/002_GOPR3761_050076.jpg  
  inflating: Performance Test Sets/Mixed/002_GOPR3761_050090.jpg  
  inflating: Performance Test Sets/Mixed/002_GOPR3761_050101.jpg  
  inflating: Performance Test Sets/Mixed/002_GOPR3761_050138.jpg  
  inflating: Performance Test Sets/Mixed/002_GOPR3761_050167.jpg  
  inflating: Performance Test Sets/Mixed/002_GOPR3761_050203.jpg  
  inflating: Performance Test Sets/Mixed/002_GOPR3761_050218.jpg  
  inflating: Performance Test Sets/Mixed/002_GOPR3761_050228.jpg  
  inflating: Performance Test Sets/Mixed/002_GOPR3761_050234.jpg

# Constants

In [None]:
# change to relevant path
PATH_TO_MODEL_WEIGHTS = "/content/yolov7-self-driving-T4.trt"

GPU_DEVICE = torch.device("cuda:0")

PERFORMANCE_TEST_SOURCE = "/content/Performance Test Sets"

CLASS_NAMES = ['pedestrian', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle', 'traffic light', 'traffic sign']

CLASS_COLOURS = {
    'pedestrian' : (199, 27, 185), # pink
    'rider': (130, 140, 0), # dark green
    'car' : (97, 248, 37), # lime green
    'truck' : (255, 0, 0), # red
    'bus' : (24, 226, 195), # turquoise
    'train' : (255, 127, 117), # salmon
    'motorcycle' : (227, 217, 30), # yellow
    'bicycle' : (113, 10, 187), # purple
    'traffic light' : (28, 45, 199), # blue
    'traffic sign' : (255, 127, 0) # orange
}

# Deserialize TensorRT Engine (Fine-Tuned Model) and Set up Execution
This code will prepare a TensorRT engine for predicting objects location and class by setting up the necessary data structures and execution context

In [None]:
# init the tensor engine
binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
logger = trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(logger, namespace="")

# get model from given path and deserialize it
try:
  with open(PATH_TO_MODEL_WEIGHTS, 'rb') as f, trt.Runtime(logger) as runtime:
      model = runtime.deserialize_cuda_engine(f.read())
except Exception as e:
  print(f'Failed to deserialize the model: {e}')

bindings = OrderedDict()
for index in range(model.num_bindings):
    name = model.get_tensor_name(index)
    dtype = trt.nptype(model.get_tensor_dtype(name))
    shape = tuple(model.get_tensor_shape(name))
    data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(GPU_DEVICE)
    bindings[name] = binding(name, dtype, shape, data, int(data.data_ptr()))

binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())

# allows for the execution of the model on data
context = model.create_execution_context()

# warmup 10 times
for i in range(10):
  temp = torch.randn(1, 3, 640, 640)
  binding_addrs['image'] = int(temp.data_ptr())
  context.execute_v2(list(binding_addrs.values()))

# Code to Resize Input Image to 640*640

In [None]:
def resize_img(img):
  new_img_size = (640, 640)
  padding_colour = (114, 114, 114)
  current_img_size = img.shape[:2]

  # calculate the minimum scale ratio to ge the image to the new size
  scale_ratio = min(new_img_size[0] / current_img_size[0], new_img_size[1] / current_img_size[1])

  # adding padding to maintain the current aspect ratio of the input image for post processing
  new_unpadded = int(round(current_img_size[1] * scale_ratio)), int(round(current_img_size[0] * scale_ratio))
  width_padding = new_img_size[1] - new_unpadded[0]
  height_padding = new_img_size[0] - new_unpadded[1]

  # divide the padding into 2 sides
  width_padding /= 2
  height_padding /=2

  # if the size of the input image and desired size are not the same then resize
  if current_img_size[::-1] != new_unpadded:
    img = cv2.resize(img, new_unpadded, interpolation=cv2.INTER_LINEAR)

  # calculate the required padding for the image
  top = int(round(height_padding - 0.1))
  bottom = int(round(height_padding + 0.1))
  left = int(round(width_padding - 0.1))
  right = int(round(width_padding + 0.1))

  # add padding to the image
  img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=padding_colour)
  return img, scale_ratio, (width_padding, height_padding)

# Postprocessing to Reverse Resizing of Input Image of the Image and Model Predictions

In [None]:
def postprocessing(boxes, scale_ratio, width_height_padding):
  width_height_padding = torch.tensor(width_height_padding * 2).to(boxes.device)
  boxes -= width_height_padding
  boxes /= scale_ratio
  return boxes

# Function to Run the Performance Test on an Image

In [None]:
def run_performance_test(img_path):
  # get the image
  source_img = cv2.imread(img_path)

  total_start_time = time.time()

  # run preprocessing on the image
  img_pre = cv2.cvtColor(source_img, cv2.COLOR_BGR2RGB)
  image = img_pre.copy()

  # resize img
  image, scale_ratio, width_height_padding = resize_img(image)
  image = image.transpose((2, 0, 1))
  image = np.expand_dims(image, 0)
  image = np.ascontiguousarray(image)

  img = image.astype(np.float32)
  img =  torch.from_numpy(img).to(GPU_DEVICE)

  # normalise image
  img /= 255

  # run the image through the model
  model_start_time = time.time()
  binding_addrs['images'] = int(img.data_ptr())
  context.execute_v2(list(binding_addrs.values()))
  model_end_time = time.time()

  # run postprocessing on the image
  nums = bindings['num_dets'].data
  boxes = bindings['det_boxes'].data
  scores = bindings['det_scores'].data
  classes = bindings['det_classes'].data
  nums.shape,boxes.shape,scores.shape,classes.shape

  boxes = boxes[0,:nums[0][0]]
  scores = scores[0,:nums[0][0]]
  classes = classes[0,:nums[0][0]]

  for box,score,cl in zip(boxes,scores,classes):
      box = postprocessing(box, scale_ratio, width_height_padding).round().int()
      name = CLASS_NAMES[cl]
      colour = CLASS_COLOURS[name]
      name += ' ' + str(round(float(score),3))
      cv2.rectangle(img_pre, box[:2].tolist(), box[2:].tolist(), colour, 2)

      # print text on top of box
      (w, h), _ = cv2.getTextSize(name, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
      cv2.rectangle(img_pre, (box[0].tolist(), box[1].tolist() - 20), (box[0].tolist() + w, box[1].tolist()), colour, -1)
      cv2.putText(img_pre, name, (int(box[0]), int(box[1]) - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), thickness=1)

  total_end_time = time.time()

  # return time to predict and time to complete total sequence (preprocessing, predict, postprocessing)
  predict_ms = (model_end_time - model_start_time) * 1000
  total_ms = (total_end_time - total_start_time) * 1000
  return total_ms, predict_ms

# Run Performance Tests

In [None]:
import os
import time
import pandas as pd

resolution_folders = os.listdir(PERFORMANCE_TEST_SOURCE)

results_df = pd.DataFrame()

for test_set_folder in resolution_folders:
  test_images = os.listdir(PERFORMANCE_TEST_SOURCE + '/' + test_set_folder)

  total_time_results_series = pd.Series(name = test_set_folder + ' Total Time', dtype='float')
  model_time_results_series = pd.Series(name = test_set_folder + ' Model Time', dtype='float')

  for image in test_images:
    print(test_set_folder + '/' + image)

    # detection code HERE vvv
    total_time, model_time = run_performance_test(PERFORMANCE_TEST_SOURCE + '/' + test_set_folder + '/' + image)

    end_time = time.time()
    total_time_results_series = pd.concat([total_time_results_series, pd.Series([total_time], name = total_time_results_series.name)])
    model_time_results_series = pd.concat([model_time_results_series, pd.Series([model_time], name = model_time_results_series.name)])

  print(total_time_results_series)
  print(model_time_results_series)

  # reset index on series
  total_time_results_series.reset_index(drop=True, inplace=True)
  model_time_results_series.reset_index(drop=True, inplace=True)

  results_df = pd.concat([results_df, total_time_results_series], axis=1)
  results_df = pd.concat([results_df, model_time_results_series], axis=1)

  #reset index
  results_df.reset_index(drop=True, inplace=True)

# export df as a csv
results_df.to_csv('output T4 GPU.csv')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
720/ca057c8b-e42f0882.jpg
720/bbef1eae-ab6f884c.jpg
720/c237ce66-cf2860b7.jpg
720/c30f959a-bd87aa16.jpg
720/bfe133fb-db319b45.jpg
720/b6d0b46a-ec482777.jpg
720/b6f2176b-20c2f527.jpg
720/bd656502-f04cf4da.jpg
720/be5ca360-3585455f.jpg
720/c586b898-4247945c.jpg
720/bd1b8b79-16ecdafa.jpg
720/b77405dc-08d4db28.jpg
720/ca551156-fb7d1680.jpg
720/c5ca1272-3f672c39.jpg
720/c65c2462-96e8ae92.jpg
720/c17b2387-d6a59eed.jpg
720/c9cf1065-fd69ddbf.jpg
720/bc0932fb-f2731c60.jpg
720/c8ffbba5-334918b7.jpg
720/bb799f71-b5357135.jpg
720/bcaa771b-806e4fcd.jpg
720/c503f2e8-55cc08bf.jpg
720/b7ad4512-be6f5823.jpg
720/b7205754-4faa3862.jpg
720/bb3bb8c0-b32626a2.jpg
720/fe1f55fa-19ba3600.jpg
720/bcd9f474-7f12bf92.jpg
720/bc88fd46-45149809.jpg
720/c093f8be-ca74d5ff.jpg
720/c33fd06b-7beab8d7.jpg
720/c0f5b0d0-6e438a33.jpg
720/c121b93f-b2ee73d4.jpg
720/c4a19ba6-fa93d45f.jpg
720/bfc8dd24-e8506ffa.jpg
720/c2fb790e-a8d3b8eb.jpg
720/c2d051a0-007fa10e.jpg

In [None]:
results_df

Unnamed: 0,Mixed Total Time,Mixed Model Time,1080 Total Time,1080 Model Time,720 Total Time,720 Model Time
0,201.347828,10.801077,19.231796,11.403084,17.602921,12.396336
1,20.900965,10.787964,22.288799,11.438131,20.295620,12.451410
2,21.785736,10.826826,22.938967,11.459112,21.088362,12.438536
3,25.423527,10.843992,21.737099,11.440039,24.691820,12.579679
4,25.442362,10.848522,24.260044,11.471272,23.544550,12.498379
...,...,...,...,...,...,...
7995,23.387194,11.446476,20.271063,12.391806,,
7996,19.028425,11.393070,23.726463,12.471199,,
7997,18.823862,11.385202,19.980669,12.413740,,
7998,19.154787,11.437654,19.336939,12.403488,,
