In [3]:
#imports
import numpy as np
import tensorflow.keras as K
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import cv2

2023-05-30 21:07:05.997251: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-30 21:07:06.028215: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-30 21:07:06.028256: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [4]:
# YOLO Class
# Task 1. Process Outputs
class Yolo:
    """
    Initialize Yolo class.
    """
    def __init__(self, model_path, classes_path, class_t, nms_t, anchors):
        """
        Class Constructor
        model_path: path to where Darnket Keras model is stored
        classes_path: path to list of class names used for Darknet model
        class_t: float representing box score threshold for initial filtering
        nms_t: float representing the IOU threshold for non-max suppression
        anchors: numpy.ndarray of shape (outputs, anchor_boxes, 2) containing
        the anchor boxes
            outputs: number of outputs made by the Darknet model
            anchor_boxes: number of anchor boxes used for each prediction
            2: [anchor_box_width, anchor_box_height]
        
        Public Instance Attributes
            model: Darknet Keras Model
            class_names: list of the class names for the model
            class_t: box score threshold for initial filtering
            nms_t: IOU threshold for non-max suppression
            anchors: the anchor boxes
        """
        self.model = K.models.load_model(model_path)
        with open(classes_path) as file:
            class_names = file.read()
        self.class_names = class_names.replace("\n", "|").split("|")[:-1]
        self.class_t = class_t
        self.nms_t = nms_t
        self.anchors = anchors

    def process_outputs(self, outputs, image_size):
        """
        Process outputs of Darknet model
        outputs: numpy.ndarray containing predictions for a single image
            Each output will have the shape (grid_height,
                                             grid_width,
                                             anchor_boxes,
                                             4 + 1 + classes)
                grid_height: height of grid used for output
                grid_width: width of grid used for output
                anchor_boxes: number of anchor boxes used
                4: label_center[t_x, t_y], label_width[t_w], label_height[t_h]
                1: box confidence
                classes: class probabilities for all classes
        image_size: numpy.ndarray containing the original image size
            [image_height, image_width]

        Returns a tuple of (boxes, box_confidences, box_class_probs)
            boxes: list of numpy.ndarrays of shape (grid_height,
                                                    grid_width,
                                                    anchor_boxes,
                                                    4)
                4: x1, y1, x2, y2
                    (top_left[x1, y1], bot_right[x2, y2]) should represent the
                    boundary box relative to original image
            box_confidences: a list of numpy.ndarrays of shape (grid_height,
                                                                grid_width,
                                                                anchor_boxes,
                                                                classes)
                containing the confidences for each output, respectively
            box_class_probs: list of numpy.ndarrays of shape (grid_height,
                                                              grid_width,
                                                              anchor_boxes,
                                                              classes)
                containing the box's class probabilities for each output
        """
        # Create lists for return
        box_confidences, box_class_probs = [], []
        boxes = [output[..., :4] for output in outputs]

        # Create lists for bounding box corner coordinates
        x_corners, y_corners = [], []

        # Define sigmoid activation function
        def sigmoid(z):
            """
            sigmoid activation function
            """
            return (1 / (1 + np.exp(-z)))

        # Create all of the grid cells to overlay image
        # Calculate box_confidences and box_class_probs
        for output in outputs:
            grid_height = output.shape[0]
            grid_width = output.shape[1]
            anchors = output.shape[2]

            cx = np.arange(grid_width).reshape(1, grid_width)
            cx = np.repeat(cx, grid_height, axis=0)
            x_corners.append(np.repeat(cx[..., np.newaxis], anchors, axis=2))
            cy = np.arange(grid_width).reshape(1, grid_width)
            cy = np.repeat(cy, grid_height, axis=0).T
            y_corners.append(np.repeat(cy[..., np.newaxis], anchors, axis=2))

            box_confidences.append(sigmoid(output[..., 4:5]))
            box_class_probs.append(sigmoid(output[..., 5:]))

        input_width = self.model.input.shape[1]
        input_height = self.model.input.shape[2]

        for x, box in enumerate(boxes):
            # Activate bounding boxes
            bx = (sigmoid(box[..., 0]) + x_corners[x])/outputs[x].shape[1]
            by = (sigmoid(box[..., 1]) + y_corners[x])/outputs[x].shape[0]
            bw = (np.exp(box[..., 2]) * self.anchors[x, :, 0]) / input_width
            bh = (np.exp(box[..., 3]) * self.anchors[x, :, 1]) / input_height

            # Move bounding box coordinates from center to corner
            box[..., 0] = (bx - (bw * .5)) * image_size[1]
            box[..., 1] = (by - (bh * .5)) * image_size[0]
            box[..., 2] = (bx + (bw * .5)) * image_size[1]
            box[..., 3] = (by + (bh * .5)) * image_size[0]


        return (boxes, box_confidences, box_class_probs)

    def filter_boxes(self, boxes, box_confidences, box_class_probs):
        """
        Determine which bounding boxes meet or exceed threshold
        boxes: list of numpy.ndarrays of shape (grid_height,
                                                grid_width,
                                                anchor_boxes,
                                                4)
            containing the processed boundary boxes for each output
        box_confidences: list of numpy.ndarrays of shape (grid_height,
                                                          grid_width,
                                                          anchor_boxes,
                                                          1)
            containing the processed box confidences for each output
        box_class_probs: list of numpy.ndarrays of shape (grid_height,
                                                          grid_width,
                                                          anchor_boxes,
                                                          classes)
            containing the preprocessed box class probabilities for each output

        Returns a tuple of (filtered_boxes, box_classes, box_scores)
            filtered_boxes: numpy.ndarray of shape (?, 4) containing all of the
                filtered bounding boxes
            box_classes: numpy.ndarray of shape (?,) containing the class
                number that each box in filtered_boxes predicts
            box_scores: numpy.ndarray of shape (?) containing the box scores
                for each box in filtered_boxes
        """
        # Create items for return tuple
        filtered_boxes, box_classes, box_scores = None, [], []

        for box in range(len(boxes)):
            score = np.max(box_class_probs[box] * box_confidences[box],
                           axis=3)
            cls = np.argmax(box_class_probs[box] * box_confidences[box],
                            axis=3)
            index = score >= self.class_t

            if filtered_boxes is None:
                filtered_boxes = boxes[box][index]
            else:
                filtered_boxes = np.concatenate((filtered_boxes,
                                                 boxes[box][index]),
                                                 axis=0)
            filtered_score = score[index]
            filtered_cls = cls[index]

            box_classes = np.concatenate((box_classes, filtered_cls), axis=0)
            box_scores = np.concatenate((box_scores, filtered_score), axis=0)


        return (filtered_boxes, box_classes.astype(int), box_scores)

    def _iou(self, box1, box2):
        """Calculates IoU for two boxes"""
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])

        intersection_area = max(0, x2 - x1) * max(0, y2 - y1)
        box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
        box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])

        union_area = box1_area + box2_area - intersection_area
        return intersection_area / union_area

    def non_max_suppression(self, filtered_boxes, box_classes, box_scores):
        """
        Method to suppress all non-max bounding boxes in each grid square
        filtered_boxes: numpy.ndarray of shape (?, 4) containing all of the
            filtered bounding boxes
        box_classes: numpy.ndarray of shape (?,) containing the class number
            for the class that filtered_boxes predicts
        box_scores: numpy.ndarray of shape (?) containing the box scores for
            each box in filtered_boxes
        Returns a tuple of (box_predictions, predicted_box_classes,
            predicted_box_scores):
            box_predictions: numpy.ndarray shape (?, 4) containing all of the
                predicted bounding boxes ordered by class and box score
            predicted_box_classes: numpy.ndarray shape (?,) containing the
                class number for box_predictions ordered by class and box score
            predicted_box_scores: numpy.ndarray shape (?) containing the box
                scores for box_predictions ordered by class and box score
        """
        unique_classes = np.unique(box_classes)
        box_predictions = []
        predicted_box_classes = []
        predicted_box_scores = []

        for cls in unique_classes:
            idxs = np.where(box_classes == cls)
            cls_boxes = filtered_boxes[idxs]
            cls_box_scores = box_scores[idxs]

            while len(cls_boxes) > 0:
                max_score_idx = np.argmax(cls_box_scores)
                box_predictions.append(cls_boxes[max_score_idx])
                predicted_box_classes.append(cls)
                predicted_box_scores.append(cls_box_scores[max_score_idx])

                iou_scores = [self._iou(cls_boxes[max_score_idx],
                                        box) for box in cls_boxes]
                to_remove = np.where(np.array(iou_scores) >= self.nms_t)
                cls_boxes = np.delete(cls_boxes, to_remove, axis=0)
                cls_box_scores = np.delete(cls_box_scores, to_remove, axis=0)

        return (np.array(box_predictions),
                np.array(predicted_box_classes),
                np.array(predicted_box_scores))

    @staticmethod
    def load_images(folder_path):
        """
        Method to load images given a folder path
        Returns a tuple of (images, image_paths)
        """
        images = []
        image_paths = []
        for photo in os.listdir(folder_path):
            images.append(cv2.imread(folder_path + '/' + photo))
            image_paths.append(folder_path + '/' + photo)
        return (images, image_paths)

    def preprocess_images(self, images):
        """
        images: list of images as numpy.ndarrays
        Resize image with inter-cubic interpolation
        Rescale all images to have pixel values in range [0, 1]
        Returns a tuple of (pimages, image_shape)
            pimages: numpy.ndarray of shape (ni, input_h, input_w, 3)
                ni: number of images that were preprocessed
                input_h: input height for the Darknet model
                input_w: input width fot the Darknet model
                3: number of color channels
            image_shapes: numpy.ndarray of shape (ni, 2) containing the
                original height and width of the images
                    2: (image_height, image_width)
        """
        pimages, image_shapes = [], []
        processed_size = (self.model.input.shape[1],
                          self.model.input.shape[2])
        for image in images:
            pimages.append((cv2.resize(image,
                            processed_size,
                            interpolation=cv2.INTER_CUBIC)) / 255)
            image_shapes.append(image.shape[0:2])

        pimages = np.asarray(pimages)
        image_shapes = np.asarray(image_shapes)
        return (pimages, image_shapes)

    def show_boxes(self, image, boxes, box_classes, box_scores, file_name):
        """
        image: numpy.ndarray containing an unprocessed image
        boxes: numpy.ndarray containing the boundary boxes for the image
        box_classes: numpy.ndarray containing the class indices for each box
        box_scores: numpy.ndarray containing the box scores for each box
        file_name: path to where the original image is stored
        Displays the image with all boundary boxes, class names, and box scores
            Boxes should be drawn witha  blue line of thickness 2
            Class names and box scores should be drawn above each box in red
                Round box scores to 2 decimal places
                Text should be written 5 pixels above the top left corner
                Text should be written in FONT_HERSHEY_SIMPLEX
                Font scale should be 0.5
                Line thickness should be 1
                Use LINE_AA as the line type
            The window name should be the same as file_name
            If the `s` key is pressed:
                The image should be saved in the directory `detections` located
                    in the current directory
                If `detections` does not exist, create it
                The saved image should have the name file_name
                The image window should be closed
        """
        for i, box in enumerate(boxes):
            pt1 = (int(box[0]), int(box[1]))
            pt2 = (int(box[2]), int(box[3]))
            label_title = self.class_names[box_classes[i]]
            label_score = box_scores[i]
            font = cv2.FONT_HERSHEY_SIMPLEX
            offset = (int(box[0]), int(box[1] - 5))
            blue = (255, 0, 0)
            red = (0, 0, 255)
            cv2.rectangle(image, pt1, pt2, blue, 2)
            label = "{} {:.2f}".format(label_title, label_score)
            cv2.putText(image, label, offset, font, 0.5, red, 1, cv2.LINE_AA)
        cv2.imshow(file_name, image)
        key = cv2.waitKey(0)

        if key == ord('s'):
            if not os.path.exists('detections'):
                os.makedirs('detections')
            cv2.imwrite(os.path.join('detections', file_name), image)

        cv2.destroyAllWindows()

    def predict(self, folder_path):
        """
        Detects objects in photos located in folder_path
        Returns a tuple of (predictions, image_paths)
            predictions: list of tuples for each image of
                (boxes, box_classes, box_scores)
            image_paths: list of image paths corresponding to each prediction
                in predictions
        """
        images, paths = self.load_images(folder_path)
        pimages, pimage_shapes = self.preprocess_images(images)
        darknet_pred_set = self.model.predict(pimages)
        predictions = []

        # darknet outputs 3 predictions. Need to look at each
        for x, img in enumerate(images):
            darknet_pred = [
                darknet_pred_set[0][x, ...],
                darknet_pred_set[1][x, ...],
                darknet_pred_set[2][x, ...]
            ]

            boxes, box_confidences, box_class_probs = self.process_outputs(
                darknet_pred, pimage_shapes[x]
            )
            filtered_boxes, box_classes, box_scores = self.filter_boxes(
                boxes, box_confidences, box_class_probs
            )
            box_pred, pred_classes, pred_scores = self.non_max_suppression(
                filtered_boxes, box_classes, box_scores
            )
            predictions.append((box_pred, pred_classes, pred_scores))

            self.show_boxes(img,
                            box_pred,
                            pred_classes,
                            pred_scores,
                            paths[x].split('/')[-1])

            # Ignore Non-Max-Suppression
            # self.show_boxes(img,
            #                 filtered_boxes,
            #                 box_classes,
            #                 box_scores,
            #                 paths[x].split('/')[-1])

        return (predictions, paths)




In [5]:
# 0-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('data/yolo.h5', 'data/coco_classes.txt', 0.6, 0.5, anchors)
yolo.model.summary()
print('Class names:', yolo.class_names)
print('Class threshold:', yolo.class_t)
print('NMS threshold:', yolo.nms_t)
print('Anchor boxes:', yolo.anchors)

2023-05-30 21:07:09.225119: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/bsbanotto/.local/lib/python3.8/site-packages/cv2/../../lib64:
2023-05-30 21:07:09.225624: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2023-05-30 21:07:09.225801: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (BensLaptop): /proc/driver/nvidia/version does not exist
2023-05-30 21:07:09.226685: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 416, 416, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 416, 416, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization (BatchNorm  (None, 416, 416, 32  128        ['conv2d[0][0]']                 
 alization)                     )                                                             

In [6]:
# 1-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('data/yolo.h5', 'data/coco_classes.txt', 0.6, 0.5, anchors)
output1 = np.random.randn(13, 13, 3, 85)
output2 = np.random.randn(26, 26, 3, 85)
output3 = np.random.randn(52, 52, 3, 85)
boxes, box_confidences, box_class_probs = yolo.process_outputs([output1, output2, output3], np.array([500, 700]))
print('Boxes:', boxes)
print('Box confidences:', box_confidences)
print('Box class probabilities:', box_class_probs)

Boxes: [array([[[[-2.13743365e+02, -4.85478868e+02,  3.05682061e+02,
           5.31534670e+02],
         [-6.28222336e+01, -1.13713822e+01,  1.56452678e+02,
           7.01966357e+01],
         [-7.00753664e+02, -7.99011810e+01,  7.77777040e+02,
           1.24440730e+02]],

        [[ 6.52921110e+01,  1.43688377e+00,  1.14233396e+02,
           2.36524697e+01],
         [ 2.79501974e+01, -2.03937627e+01,  1.59898932e+02,
           5.02826906e+01],
         [ 6.17629106e+01, -8.31889643e+01,  1.02303202e+02,
           1.38551621e+02]],

        [[ 1.20166765e+02,  2.31461495e+01,  1.71676644e+02,
           3.82523882e+01],
         [ 3.39847297e+01, -3.21481177e+01,  2.45335593e+02,
           6.17333839e+01],
         [-5.41077407e+02, -4.37386113e+02,  7.94834201e+02,
           4.95448229e+02]],

        ...,

        [[ 4.37830908e+02, -1.12792212e+02,  6.59360033e+02,
           1.26273767e+02],
         [ 4.40039718e+02, -1.34968536e+02,  6.70734974e+02,
           1.75141946

In [7]:
# 2-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('data/yolo.h5', 'data/coco_classes.txt', 0.6, 0.5, anchors)
output1 = np.random.randn(13, 13, 3, 85)
output2 = np.random.randn(26, 26, 3, 85)
output3 = np.random.randn(52, 52, 3, 85)
boxes, box_confidences, box_class_probs = yolo.process_outputs([output1, output2, output3], np.array([500, 700]))
boxes, box_classes, box_scores = yolo.filter_boxes(boxes, box_confidences, box_class_probs)
print('Boxes:', boxes)
print('Box classes:', box_classes)
print('Box scores:', box_scores)

Boxes: [[-213.74336488 -485.47886784  305.68206077  531.53467019]
 [ -62.82223363  -11.37138215  156.45267787   70.19663572]
 [ 190.62733946    7.65943712  319.201764     43.75737906]
 ...
 [ 647.78041714  491.58472667  662.00736941  502.60750466]
 [ 586.27543101  487.95333873  715.85860922  499.39422783]
 [ 666.1128673   481.29683099  728.88754319  501.09378706]]
Box classes: [19 54 29 ... 63 25 46]
Box scores: [0.7850503  0.67898563 0.81301861 ... 0.8012832  0.61427808 0.64562072]


In [8]:
# 3-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('data/yolo.h5', 'data/coco_classes.txt', 0.6, 0.5, anchors)
output1 = np.random.randn(13, 13, 3, 85)
output2 = np.random.randn(26, 26, 3, 85)
output3 = np.random.randn(52, 52, 3, 85)
boxes, box_confidences, box_class_probs = yolo.process_outputs([output1, output2, output3], np.array([500, 700]))
boxes, box_classes, box_scores = yolo.filter_boxes(boxes, box_confidences, box_class_probs)
boxes, box_classes, box_scores = yolo.non_max_suppression(boxes, box_classes, box_scores)
print('Boxes:', boxes)
print('Box classes:', box_classes)
print('Box scores:', box_scores)

Boxes: [[483.49145347 128.010205   552.78146847 147.87465464]
 [-38.91328475 332.66704009 102.94594841 363.78584864]
 [ 64.10861893 329.13266621 111.87941603 358.37523958]
 ...
 [130.0729606  467.20024928 172.42160784 515.90336094]
 [578.82381106  76.25699693 679.22893305 104.63320075]
 [169.12132771 304.32765204 251.1457077  342.16397829]]
Box classes: [ 0  0  0 ... 79 79 79]
Box scores: [0.80673525 0.80405611 0.78972362 ... 0.61758194 0.61455015 0.6001824 ]


In [9]:
# 4-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('data/yolo.h5', 'data/coco_classes.txt', 0.6, 0.5, anchors)
images, image_paths = yolo.load_images('data/yolo')
i = np.random.randint(0, len(images))
cv2.imshow(image_paths[i], images[i])
cv2.waitKey(0)
cv2.destroyAllWindows()



In [10]:
# 5-main
np.random.seed(3)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('data/yolo.h5', 'data/coco_classes.txt', 0.6, 0.5, anchors)
images, image_paths = yolo.load_images('data/yolo')
pimages, image_shapes = yolo.preprocess_images(images)
print(type(pimages), pimages.shape)
print(type(image_shapes), image_shapes.shape)
i = np.random.randint(0, len(images))
print(images[i].shape, ':', image_shapes[i])
cv2.imshow(image_paths[i], pimages[i])
cv2.waitKey(0)
cv2.destroyAllWindows()

<class 'numpy.ndarray'> (13, 416, 416, 3)
<class 'numpy.ndarray'> (13, 2)
(424, 640, 3) : [424 640]


In [11]:
# 6-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('data/yolo.h5', 'data/coco_classes.txt', 0.6, 0.5, anchors)
images, image_paths = yolo.load_images('data/yolo')
boxes = np.array([[119.22100287, 118.62197718, 567.75985556, 440.44121152],
                    [468.53530752, 84.48338278, 696.04923556, 167.98947829],
                    [124.2043716, 220.43365057, 319.4254314 , 542.13706101]])
box_scores = np.array([0.99537075, 0.91536146, 0.9988506])
box_classes = np.array([1, 7, 16])
ind = 0
for i, name in enumerate(image_paths):
    if "dog.jpg" in name: 
        ind = i
        break
yolo.show_boxes(images[i], boxes, box_classes, box_scores, "dog.jpg")



In [13]:
# 7-main
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('data/yolo.h5', 'data/coco_classes.txt', 0.6, 0.5, anchors)
predictions, image_paths = yolo.predict('data/yolo')
for i, name in enumerate(image_paths):
    if "dog.jpg" in name:
        ind = i 
        break
print(image_paths[ind])
print(predictions[ind])

data/yolo/dog.jpg
(array([[119.10174, 118.63829, 567.89417, 440.58704],
       [468.6808 ,  84.4819 , 695.9741 , 168.00749],
       [124.10596, 220.4373 , 319.45682, 542.3967 ]], dtype=float32), array([ 1,  7, 16]), array([0.99545461, 0.91439855, 0.99883264]))
