# MNIST Object detection using YOLOv4

In [9]:
import os

In [10]:
!git submodule update --init --recursive
if not os.path.exists('MNIST-ObjectDetection/data'):
    %pip install -r MNIST-ObjectDetection/requirements.txt
    %cd MNIST-ObjectDetection
    !python generate_data.py
    %cd ..

In [11]:
# GPU_NAME =  str(os.popen('nvidia-smi --query-gpu=name --format=csv,noheader').read()).strip()
# GPU_COMPUTE_CAPABILITY = str(os.popen('nvidia-smi --query-gpu=compute_cap --format=csv,noheader').read()).strip()
# # Remove newlines
# GPU_COMPUTE_CAPABILITY = GPU_COMPUTE_CAPABILITY.replace('.', '')

# print(f'GPU_NAME: {GPU_NAME}')
# print(f'GPU_COMPUTE_CAPABILITY: {GPU_COMPUTE_CAPABILITY}')
# # Build using make, enable OpenCV and CUDNN
# import re
# with open('darknet/Makefile', 'r') as f:
#     makefile = f.read()
# makefile = re.sub(r'GPU=0', 'GPU=1', makefile)
# makefile = re.sub(r'CUDNN=0', 'CUDNN=1', makefile)
# makefile = re.sub(r'CUDNN_HALF=0', 'CUDNN_HALF=1', makefile)
# makefile = re.sub(r'OPENCV=0', 'OPENCV=1', makefile)

# # Remove everything in arch= and remove lines after it if it ends with a \. Replace with GPU_COMPUTE_CAPABILITY
# makefile = re.sub(r'ARCH=(.*\\\n)*.*', f'ARCH={GPU_COMPUTE_CAPABILITY}', makefile)


# # Enable CUDNN_HALF if GPU compute capability >= 75
# if int(GPU_COMPUTE_CAPABILITY) >= 75:
#     makefile = re.sub(r'CUDNN_HALF=0', 'CUDNN_HALF=1', makefile)

# with open('darknet/Makefile', 'w') as f:
#     f.write(makefile)

# # Build darknet
# !(cd darknet && make -j)

In [12]:
# # Upload video to colab
# from google.colab import files
# uploaded = files.upload()
# video_path = list(uploaded.keys())[0]

video_path = "../Videos/Img_1688.mp4"

In [13]:
# if not os.path.exists("yolov4.weights"):
#   !curl https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4.weights -o yolov4.weights

# Load darknet dll
import ctypes
import os
import sys
import cv2
import numpy as np

print(cv2.__version__)
import darknet.darknet as darknet


# Load the network using opencv
net = cv2.dnn.readNet("yolov4.weights", "darknet/cfg/yolov4.cfg")
classes = open("darknet/data/coco.names").read().strip().split("\n")
colors = np.random.uniform(0, 255, 1000000)


# Set the backend to opencv
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
# Set the target to CUDA
net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL_FP16)
font=cv2.FONT_HERSHEY_SIMPLEX


4.5.5


In [14]:
cv2.__version__
cv2.cuda.getCudaEnabledDeviceCount()

1

In [15]:
# Detect objects in the video
cap = cv2.VideoCapture(video_path)
# YoloV4 input size
width = 416
height = 416

while True:
    _, img = cap.read()
    if img is None:
        break
    # img = cv2.resize(img, (width, height))
    blob = cv2.dnn.blobFromImage(img, 1 / 255, (width, height), [0, 0, 0], 1, crop=False)
    net.setInput(blob)
    output_layers_names = net.getUnconnectedOutLayersNames()
    layerOutputs = net.forward(output_layers_names)
    boxes = []
    confidences = []
    class_ids = []
    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append((float(confidence)))
                class_ids.append(class_id)
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    if len(indexes) > 0:
        for i in indexes.flatten():
            x, y, w, h = boxes[i]
            # Convert coordinates to integers to image size
            x = int(x * (img.shape[1] / width))
            w = int(w * (img.shape[1] / width))
            y = int(y * (img.shape[0] / height))
            h = int(h * (img.shape[0] / height))
            
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i], 2))
            color = colors[i]
            cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
            cv2.putText(img, label + " " + confidence, (x, y + 20), font, 1, color, 2)
    cv2.imshow("Image", img)
    key = cv2.waitKey(1)
    if key == 27:
        break


KeyboardInterrupt: 

In [None]:
# Use YOLOv4 to detect MNIST digits from images

# Structure is train -> images -> 0,1,2,3,4,5,6,7,8,9 -> 0.jpg, 1.jpg, 2.jpg, ...
# Structure is train -> labels -> 0,1,2,3,4,5,6,7,8,9 -> 0.txt, 1.txt, 2.txt, ...
# Same for test

data_path = 'MNIST-ObjectDetection/data/mnist_detection'
train_path = os.path.join(data_path, 'train')
test_path = os.path.join(data_path, 'test')

# Train YOLOv4 on MNIST dataset