In [1]:
#imports
import numpy as np
import tensorflow.keras as K
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import cv2

2023-04-17 11:01:49.944289: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-17 11:01:51.411158: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-04-17 11:01:51.411289: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [14]:
# YOLO Class
# Task 1. Process Outputs
class Yolo:
    """
    Initialize Yolo class.
    """
    def __init__(self, model_path, classes_path, class_t, nms_t, anchors):
        """
        Class Constructor
        model_path: path to where Darnket Keras model is stored
        classes_path: path to list of class names used for Darknet model
        class_t: float representing box score threshold for initial filtering
        nms_t: float representing the IOU threshold for non-max suppression
        anchors: numpy.ndarray of shape (outputs, anchor_boxes, 2) containing
        the anchor boxes
            outputs: number of outputs made by the Darknet model
            anchor_boxes: number of anchor boxes used for each prediction
            2: [anchor_box_width, anchor_box_height]
        
        Public Instance Attributes
            model: Darknet Keras Model
            class_names: list of the class names for the model
            class_t: box score threshold for initial filtering
            nms_t: IOU threshold for non-max suppression
            anchors: the anchor boxes
        """
        self.model = K.models.load_model(model_path)
        with open(classes_path) as file:
            class_names = file.read()
        self.class_names = class_names.replace("\n", "|").split("|")[:-1]
        self.class_t = class_t
        self.nms_t = nms_t
        self.anchors = anchors

    def process_outputs(self, outputs, image_size):
        """
        Process outputs of Darknet model
        outputs: numpy.ndarray containing predictions for a single image
            Each output will have the shape (grid_height,
                                             grid_width,
                                             anchor_boxes,
                                             4 + 1 + classes)
                grid_height: height of grid used for output
                grid_width: width of grid used for output
                anchor_boxes: number of anchor boxes used
                4: t_x, t_y, t_w, t_h
                1: box confidence
                classes: class probabilities for all classes
        image_size: numpy.ndarray containing the original image size
            [image_height, image_width]

        Returns a tuple of (boxes, box_confidences, box_class_probs)
            boxes: list of numpy.ndarrays of shape (grid_height,
                                                    grid_width,
                                                    anchor_boxes,
                                                    4)
                4: x1, y1, x2, y2
                    (x1, y1, x2, y2) should represent the boundary box relative
                    to original image
            box_confidences: a list of numpy.ndarrays of shape (grid_height,
                                                                grid_width,
                                                                anchor_boxes,
                                                                classes)
                containing the confidences for each output, respectively
            box_class_probs: list of numpy.ndarrays of shape (grid_height,
                                                              grid_width,
                                                              anchor_boxes,
                                                              classes)
                containing the box's class probabilities for each output
        """
        # Create lists for return
        box_confidences, box_class_probs = [], []
        boxes = [output[..., :4] for output in outputs]

        # Create lists for bounding box corner coordinates
        x_corners, y_corners = [], []

        # Define sigmoid activation function
        def sigmoid(z):
            """
            sigmoid activation function
            """
            return (1 / (1 + np.exp(-z)))

        # Creat all of the grid cells to overlay image
        # Calculate box_confidences and box_class_probs
        for output in outputs:
            grid_height = output.shape[0]
            grid_width = output.shape[1]
            anchors = output.shape[2]

            cx = np.arange(grid_width).reshape(1, grid_width)
            cx = np.repeat(cx, grid_height, axis=0)
            x_corners.append(np.repeat(cx[..., np.newaxis], anchors, axis=2))
            cy = np.arange(grid_width).reshape(1, grid_width)
            cy = np.repeat(cy, grid_height, axis=0).T
            y_corners.append(np.repeat(cy[..., np.newaxis], anchors, axis=2))

            box_confidences.append(sigmoid(output[..., 4:5]))
            box_class_probs.append(sigmoid(output[..., 5:]))

        input_width = self.model.input.shape[1]
        input_height = self.model.input.shape[2]

        for x, box in enumerate(boxes):
            # Activate bounding boxes
            bx = (sigmoid(box[..., 0]) + x_corners[x])/outputs[x].shape[1]
            by = (sigmoid(box[..., 1]) + y_corners[x])/outputs[x].shape[0]
            bw = (np.exp(box[..., 2]) * self.anchors[x, :, 0]) / input_width
            bh = (np.exp(box[..., 3]) * self.anchors[x, :, 1]) / input_height

            # Move bounding box coordinates from corner to center
            box[..., 1] = (by - (bh * .5)) * image_size[0]
            box[..., 0] = (bx - (bw * .5)) * image_size[1]
            box[..., 3] = (by + (bh * .5)) * image_size[0]
            box[..., 2] = (bx + (bw * .5)) * image_size[1]

        return (boxes, box_confidences, box_class_probs)

    def filter_boxes(self, boxes, box_confidences, box_class_probs):
        """
        Determine which bounding boxes meet or exceed threshold
        boxes: list of numpy.ndarrays of shape (grid_height,
                                                grid_width,
                                                anchor_boxes,
                                                4)
            containing the processed boundary boxes for each output
        box_confidences: list of numpy.ndarrays of shape (grid_height,
                                                          grid_width,
                                                          anchor_boxes,
                                                          1)
            containing the processed box confidences for each output
        box_class_probs: list of numpy.ndarrays of shape (grid_height,
                                                          grid_width,
                                                          anchor_boxes,
                                                          classes)
            containing the preprocessed box class probabilities for each output

        Returns a tuple of (filtered_boxes, box_classes, box_scores)
            filtered_boxes: numpy.ndarray of shape (?, 4) containing all of the
                filtered bounding boxes
            box_classes: numpy.ndarray of shape (?,) containing the class
                number that each box in filtered_boxes predicts
            box_scores: numpy.ndarray of shape (?) containing the box scores
                for each box in filtered_boxes
        """
        # Create items for return tuple
        filtered_boxes, box_classes, box_scores = None, [], []

        for box in range(len(boxes)):
            score = np.max(box_class_probs[box] * box_confidences[box],
                           axis=3)
            cls = np.argmax(box_class_probs[box] * box_confidences[box],
                            axis=3)
            index = score >= self.class_t

            if filtered_boxes is None:
                filtered_boxes = boxes[box][index]
            else:
                filtered_boxes = np.concatenate((filtered_boxes,
                                                 boxes[box][index]),
                                                 axis=0)
            filtered_score = score[index]
            filtered_cls = cls[index]

            box_classes = np.concatenate((box_classes, filtered_cls), axis=0)
            box_scores = np.concatenate((box_scores, filtered_score), axis=0)


        return (filtered_boxes, box_classes.astype(int), box_scores)

In [None]:
# 0-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
yolo.model.summary()
print('Class names:', yolo.class_names)
print('Class threshold:', yolo.class_t)
print('NMS threshold:', yolo.nms_t)
print('Anchor boxes:', yolo.anchors)

In [None]:
# 1-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
output1 = np.random.randn(13, 13, 3, 85)
output2 = np.random.randn(26, 26, 3, 85)
output3 = np.random.randn(52, 52, 3, 85)
boxes, box_confidences, box_class_probs = yolo.process_outputs([output1, output2, output3], np.array([500, 700]))
print('Boxes:', boxes)
print('Box confidences:', box_confidences)
print('Box class probabilities:', box_class_probs)

In [15]:
# 2-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
output1 = np.random.randn(13, 13, 3, 85)
output2 = np.random.randn(26, 26, 3, 85)
output3 = np.random.randn(52, 52, 3, 85)
boxes, box_confidences, box_class_probs = yolo.process_outputs([output1, output2, output3], np.array([500, 700]))
boxes, box_classes, box_scores = yolo.filter_boxes(boxes, box_confidences, box_class_probs)
print('Boxes:', boxes)
print('Box classes:', box_classes)
print('Box scores:', box_scores)

Boxes: [[-213.74336488 -485.47886784  305.68206077  531.53467019]
 [ -62.82223363  -11.37138215  156.45267787   70.19663572]
 [ 190.62733946    7.65943712  319.201764     43.75737906]
 ...
 [ 647.78041714  491.58472667  662.00736941  502.60750466]
 [ 586.27543101  487.95333873  715.85860922  499.39422783]
 [ 666.1128673   481.29683099  728.88754319  501.09378706]]
Box classes: [19 54 29 ... 63 25 46]
Box scores: [0.7850503  0.67898563 0.81301861 ... 0.8012832  0.61427808 0.64562072]


In [None]:
# 3-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
output1 = np.random.randn(13, 13, 3, 85)
output2 = np.random.randn(26, 26, 3, 85)
output3 = np.random.randn(52, 52, 3, 85)
boxes, box_confidences, box_class_probs = yolo.process_outputs([output1, output2, output3], np.array([500, 700]))
boxes, box_classes, box_scores = yolo.filter_boxes(boxes, box_confidences, box_class_probs)
boxes, box_classes, box_scores = yolo.non_max_suppression(boxes, box_classes, box_scores)
print('Boxes:', boxes)
print('Box classes:', box_classes)
print('Box scores:', box_scores)

In [None]:
# 4-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
images, image_paths = yolo.load_images('../data/yolo')
i = np.random.randint(0, len(images))
cv2.imshow(image_paths[i], images[i])
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# 5-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
images, image_paths = yolo.load_images('../data/yolo')
pimages, image_shapes = yolo.preprocess_images(images)
print(type(pimages), pimages.shape)
print(type(image_shapes), image_shapes.shape)
i = np.random.randint(0, len(images))
print(images[i].shape, ':', image_shapes[i])
cv2.imshow(image_paths[i], pimages[i])
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# 6-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
images, image_paths = yolo.load_images('../data/yolo')
boxes = np.array([[119.22100287, 118.62197718, 567.75985556, 440.44121152],
                    [468.53530752, 84.48338278, 696.04923556, 167.98947829],
                    [124.2043716, 220.43365057, 319.4254314 , 542.13706101]])
box_scores = np.array([0.99537075, 0.91536146, 0.9988506])
box_classes = np.array([1, 7, 16])
ind = 0
for i, name in enumerate(image_paths):
    if "dog.jpg" in name:
        ind = i
        break
yolo.show_boxes(images[i], boxes, box_classes, box_scores, "dog.jpg")

In [None]:
# 7-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
predictions, image_paths = yolo.predict('../data/yolo')
for i, name in enumerate(image_paths):
    if "dog.jpg" in name:
        ind = i
        break
print(image_paths[ind])
print(predictions[ind])