In [None]:
import os

# Download code
if not os.path.exists('detect_isu.py'):
  !git clone https://github.com/donglaiw/yolov7

if os.path.exists('yolov7'):
  %cd yolov7

!pip install wget

# <b>1. Model setup</b>

In [None]:
import os
import numpy as np
np.random.seed(0)
# Download model weight
if not os.path.exists('yolov7.pt'):
  # Download model weight
  !curl -L -o yolov7.pt https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt

# Create the model
from detect_isu import ObjectDetector
detector = ObjectDetector()
detector.set_conf('weights', 'yolov7.pt')
detector.setup_model()

# <b>2. Test on one image</b>

## Read and display the image

In [None]:
import cv2
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (40,10)

# read the image
img = cv2.imread('inference/images/vehicle_test.png')

# display the image
# cv2.imread: BGR -> plt.imshow(rgb)
plt.imshow(img[:, :, ::-1])
plt.title('Input image')
plt.axis('off')
plt.show()

## Run vehicle detection

In [None]:
output_box = detector.detect(img, ['truck', 'bus', 'car'], do_visual=False)
print(f"detect {output_box.shape[0]} objects")
print("The output is a Nx6 matrix")
print("Each row is one detection: box coordinates, prediction confidence, class_id")
print('--------')
print("Let's look at the first detection")
print(output_box[0])

## Plot the detection result on the image

In [None]:
output_visual = detector.plot_box(img[:, :, ::-1], output_box)
plt.imshow(output_visual)
plt.title('Detetion results')
plt.axis('off')
plt.show()

# <b>3. Test on one video</b>

## Display the video

In [None]:
from IPython.display import HTML
from base64 import b64encode

def show_video(video_path, video_width = 600):   
  video_file = open(video_path, "r+b").read() 
  video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
  return HTML(f"""<video width={video_width} controls><source src="{video_url}"></video>""")
 
video_source = 'inference/videos/vehicle_test.mp4'
show_video(video_source)

## Run detection and save result

We will run the detector on every frame from the video and save the output into a video.

In [None]:
import cv2
import os

class VideoWriter(object):
    def __init__(self, output_file, fps):
      self.output_file = output_file
      self.output_folder = output_file[:output_file.rfind('.')] + '/'
      self.output_template = '%05d.png'
      if not os.path.exists(self.output_folder):
        os.mkdir(self.output_folder)
      self.fps = fps
      self.frame_id = 0
    
    def write(self, img):
      cv2.imwrite(self.output_folder + self.output_template%self.frame_id, img)
      self.frame_id += 1

    def release(self):
      # convert the folder of outputs into a mp4 file
      os.system(f'ffmpeg -framerate {self.fps} -i {self.output_folder}{self.output_template} -c:v libx264 -pix_fmt yuv420p {self.output_file}')
      # remove the temp folder
      os.system(f'rm -r {self.output_folder}')



cap = cv2.VideoCapture(video_source)
fps = cap.get(cv2.CAP_PROP_FPS)
output_video_name = 'detection_output.mp4'
output_counter = []

if os.path.exists(output_video_name):
    os.system(f'rm {output_video_name}')

output_video = VideoWriter(output_video_name, fps)

frame_id = 0
ret = True
while ret:
    ret, frame = cap.read()        
    if ret:
        if frame_id % 10 == 0:
            print(f'process frame {frame_id}')
        output_box = detector.detect(frame, ['truck', 'bus', 'car'], do_visual=False, verbose=False)
        output_visual = detector.plot_box(frame, output_box)
        output_counter.append(output_box.shape[0])
        output_video.write(output_visual)
        frame_id += 1
        ### to save time, only run it for the first 100 frames
        if frame_id > 100:
            break

cap.release()
output_video.release()

## Visualize result

Visualize the video.

In [None]:
show_video(output_video_name)

Plot the vehicle count over time. This shows how AI can be used to monitor traffic automatically in real-time!!!

In [None]:
import numpy as np

plt.rcParams["figure.figsize"] = (10,10)
plt.plot(np.arange(len(output_counter))/fps, output_counter)
plt.xlabel('Time (sec)', fontsize=18)
plt.ylabel('Number of vehicles', fontsize=18)

# <b> [Optional 1] Run with different options</b>


Repeat section 2 above but with modifying several parameters. 

## Alter the confidence threshold
First, we can make the threshold for recognizing objects be more strict or loose. Change the `conf_thres` parameter below to any value between 0 and 1 and observe the result. What can you deduce about this parameter? How would changing this parameter affect our traffic monitor application above? 

In [None]:
# read the image
img = cv2.imread('inference/images/vehicle_test.png')

output_box = detector.detect(img, ['truck', 'bus', 'car'], do_visual=False,  conf_thres = 0.7)
print(f"detect {output_box.shape[0]} objects")
print("The output is a Nx6 matrix")
print("Each row is one detection: box coordinates, prediction confidence, class_id")
print('--------')
print("Let's look at the first detection")
print(output_box[0])
output_visual = detector.plot_box(img[:, :, ::-1], output_box)
plt.imshow(output_visual)
plt.title('Detetion results')
plt.axis('off')
plt.show()


## Alter the image with a 90 degree rotation
Play with the parameter `np.rot90` below and observe the changes to detection result. What can you deduce about how image orientation affects the accuracy of detection?

Do this twice to see what happens if the image is rotated 180 degrees

In [None]:
# read the image
import numpy as np

img = cv2.imread('inference/images/vehicle_test.png')
print(img.__class__)
img90 = np.rot90(img)

output_box = detector.detect(img90, ['truck', 'bus', 'car'], do_visual=False,  conf_thres = 0.1)
print(f"detect {output_box.shape[0]} objects")
print("The output is a Nx6 matrix")
print("Each row is one detection: box coordinates, prediction confidence, class_id")
print('--------')
print("Let's look at the first detection")
if len(output_box) > 0:
  print(output_box[0])
  output_visual = detector.plot_box(img90[:, :, ::-1], output_box)
  plt.imshow(output_visual)
  plt.title('Detetion results')
  plt.axis('off')
  plt.show()
else:
  plt.imshow(img90[:, :, ::-1])
  plt.title('Input image')
  plt.axis('off')
  plt.show()

img180 = np.rot90(img90)

output_box = detector.detect(img180, ['truck', 'bus', 'car'], do_visual=False,  conf_thres = 0.1)
print(f"detect {output_box.shape[0]} objects")
print("The output is a Nx6 matrix")
print("Each row is one detection: box coordinates, prediction confidence, class_id")
print('--------')
print("Let's look at the first detection")
if len(output_box) > 0:
  print(output_box[0])
  output_visual = detector.plot_box(img180[:, :, ::-1], output_box)
  plt.imshow(output_visual)
  plt.title('Detetion results')
  plt.axis('off')
  plt.show()
else:
  plt.imshow(img180[:, :, ::-1])
  plt.title('Input image')
  plt.axis('off')
  plt.show()  

## Different categories of objects

The model was trained on 80 categories of objects, not just trucks and cars. For example, it can also be used to recognize cats and dogs. Play with the 3 examples below to see how it can recognize just cats, just dogs, or both. 

[***Internet connectivity required***]: if you have the url of an image you want, you can also paste it here to see the result. Warning: some websites do not allow you to load the image directly from there but would rather have you save the image first. 

In [None]:
import wget
# there are also images in inference/images/
imageUrl = 'https://static01.nyt.com/images/2019/10/01/science/00SCI-CATS1/merlin_102054072_34962289-a2a4-4c52-9969-4b2719347e76-superJumbo.jpg?quality=75&auto=webp'
filename = wget.download(imageUrl)
img = cv2.imread(filename)

output_box = detector.detect(img, ['cat'], do_visual=False)
print(f"detect {output_box.shape[0]} objects")
print("The output is a Nx6 matrix")
print("Each row is one detection: box coordinates, prediction confidence, class_id")
print('--------')
print("Let's look at the first detection")
print(output_box[0])
output_visual = detector.plot_box(img[:, :, ::-1], output_box)
plt.imshow(output_visual)
plt.title('Detetion results')
plt.axis('off')
plt.show()



In [None]:
imageUrl = 'https://epwn.org/wp-content/uploads/2021/08/many-many-dogs.jpg'
filename = wget.download(imageUrl)
img = cv2.imread(filename)

output_box = detector.detect(img, ['dog'], do_visual=False)
print(f"detect {output_box.shape[0]} objects")
print("The output is a Nx6 matrix")
print("Each row is one detection: box coordinates, prediction confidence, class_id")
print('--------')
print("Let's look at the first detection")
print(output_box[0])
output_visual = detector.plot_box(img[:, :, ::-1], output_box)
plt.imshow(output_visual)
plt.title('Detetion results')
plt.axis('off')
plt.show()

Now put both cat and dog detection on the same image. Do you see any misclassification? What could be causing it?

In [None]:
imageUrl = 'https://mypetsashes.co.uk/wp-content/uploads/2015/08/Pet-Cremation-UK-650x650.jpg'
filename = wget.download(imageUrl)
img = cv2.imread(filename)

output_box = detector.detect(img, ['cat', 'dog'], do_visual=False)
print(f"detect {output_box.shape[0]} objects")
print("The output is a Nx6 matrix")
print("Each row is one detection: box coordinates, prediction confidence, class_id")
print('--------')
print("Let's look at the first detection")
print(output_box[0])

plt.imshow(img[:, :, ::-1])
plt.title('Input image')
plt.axis('off')
plt.show()  

output_visual = detector.plot_box(img[:, :, ::-1], output_box)
plt.imshow(output_visual)
plt.title('Detetion results')
plt.axis('off')
plt.show()

Play with more images in this directory:

# <b>[Optional 2] Image processing</b>

## How are images represented?

Images are just a bunch of numbers! Let's download some images and see how a computer represents them.

In [None]:
# download two car images (one color, one grayscale)
! wget https://bc-cv.github.io/csci3343/public/kitti/kitti.png -O kitti.png
! wget https://bc-cv.github.io/csci3343/public/kitti/kitti_gray.png -O kitti_gray.png

Here's a grayscale image (only black and white). 

In [None]:
import imageio.v3
import matplotlib.pyplot as plt

# read our grayscale image from the downloaded file
image_gray = imageio.v3.imread('kitti_gray.png')

print('Image shape:', image_gray.shape)
print("It's a 2D matrix!\n", image_gray)

# display the image!
plt.figure(figsize=(12, 10)) 
plt.imshow(image_gray, cmap='gray')
# add a title
plt.title('Grayscale image from a driving car')
# turn off the plot axis
plt.axis('off'); 
# show it 
plt.show()

Here's a color image! The representation is a little more complicated. Instead of one number representing each pixel, we have 3 numbers (in a 3D tensor) representing each pixel.

In [None]:
# read our color image from the downloaded file
image = imageio.v3.imread('kitti.png')

print('Image shape:', image.shape)
print("It's a 3D matrix!\n", image)

# display the image!
plt.figure(figsize=(12, 10)) 
plt.imshow(image)
# add a title
plt.title('Color image from a driving car')
# turn off the plot axis
plt.axis('off'); 
# show it 
plt.show()

## Transforming images

Since images are just a bunch of numbers, we can apply transformations or operations on those numbers to change how an image looks. This is like applying an image filter on your camera photos on your phone!

### Basic transformations

Simple transformations can be done by simply adding a value to each pixel value. This will make the image lighter or darker!

In [None]:
import numpy as np

# we add 60 to each image pixel value
def lighten_image_operation(img):
  return np.clip((img.astype(float)+100),0,255).astype(np.uint8)

lighter_image = lighten_image_operation(image)

# display the original image and lighter image side by side
plt.figure(figsize=(12, 10)) 
plt.subplot(121)
# show original image
plt.imshow(image)
# add a title
plt.title('Original image from a driving car')
# turn off the plot axis
plt.axis('off')

plt.subplot(122)
# show lighter image
plt.imshow(lighter_image)
# add a title
plt.title('Lighter image from a driving car')
# turn off the plot axis
plt.axis('off')
# show it 
plt.show()

We can also multiply image pixel values to make more contrasted images that look more brighter or darker.

In [None]:
def contrast_image_operation(img):
  return np.clip(((img.astype(float)-128)*1.5+128),0,255).astype(np.uint8)

contrast_image = contrast_image_operation(image)

# display the original image and contrast image side by side
plt.figure(figsize=(12, 10)) 
plt.subplot(121)
# show original image
plt.imshow(image)
# add a title
plt.title('Original image from a driving car')
# turn off the plot axis
plt.axis('off')

plt.subplot(122)
# show contrast image
plt.imshow(contrast_image)
# add a title
plt.title('Contrast image from a driving car')
# turn off the plot axis
plt.axis('off')
# show it 
plt.show()

### More advanced transformations

A <a href="https://www.adobe.com/creativecloud/photography/discover/sepia-photography.html">sepia filter</a> is an old chemical way to make photograph and now a style of image. 

In [None]:
def sepia_filter_operation(img):
  color_trans = np.array([[0.189, 0.168, 0.131],\
                        [0.769, 0.686, 0.534],\
                        [0.393, 0.349, 0.272]])
  # reshape image into Nx3
  img_reshape = img.reshape(-1,3)
  # convert image range into 0-1
  img_reshape = img / 255.0
  # Sepia color transform
  img_sepia = np.matmul(img_reshape, color_trans)
  # convert it back to 0-255
  img_sepia = (np.clip(img_sepia*255,0,255)).astype(np.uint8)
  # reshape it back
  return img_sepia.reshape(img.shape)

sepia_image = sepia_filter_operation(image)

# display the original image and contrast image side by side
plt.figure(figsize=(12, 10)) 
plt.subplot(121)
# show original image
plt.imshow(image)
# add a title
plt.title('Original image from a driving car')
# turn off the plot axis
plt.axis('off')

plt.subplot(122)
# show sepia image
plt.imshow(sepia_image)
# add a title
plt.title('Sepia image from a driving car')
# turn off the plot axis
plt.axis('off')
# show it 
plt.show()

A Gaussian filter can be applied to produce the effect of blurring the image.

In [None]:
kernel_size = 21 # size of blurring filter
sigma = 10 # degree of blur (larger values blur more)
blurred_image = cv2.GaussianBlur(image, (kernel_size, kernel_size), sigma)

# display the original image and contrast image side by side
plt.figure(figsize=(12, 10)) 
plt.subplot(121)
# show original image
plt.imshow(image)
# add a title
plt.title('Original image from a driving car')
# turn off the plot axis
plt.axis('off')

plt.subplot(122)
# show sepia image
plt.imshow(blurred_image)
# add a title
plt.title('Blurry image from a driving car')
# turn off the plot axis
plt.axis('off')
# show it 
plt.show()

## How well does our model do?

With these transformed images, we can test how well our model does in different situations. What do these results tell us about the performance of our AI model under various situations?

In [None]:
output_visual = detector.detect(image, ['truck', 'bus', 'car'], do_visual=True,  conf_thres = 0.5)
plt.subplot(411)
plt.imshow(output_visual)
plt.title('Detetion results on the original image', fontsize=18)
plt.axis('off')

output_visual = detector.detect(lighter_image, ['truck', 'bus', 'car'], do_visual=True,  conf_thres = 0.5)
plt.subplot(412)
plt.imshow(output_visual)
plt.title('Detetion results on the lighter image', fontsize=18)
plt.axis('off')

output_visual = detector.detect(sepia_image, ['truck', 'bus', 'car'], do_visual=True,  conf_thres = 0.5)
plt.subplot(413)
plt.imshow(output_visual)
plt.title('Detetion results on the sepia image', fontsize=18)
plt.axis('off')

output_visual = detector.detect(blurred_image, ['truck', 'bus', 'car'], do_visual=True,  conf_thres = 0.5)
plt.subplot(414)
plt.imshow(output_visual)
plt.title('Detetion results on the blurry image', fontsize=18)
plt.axis('off')

plt.show()
