In [54]:
import random
import glob
import cv2
import PIL
import io
import html
import time
import numpy as np
import matplotlib.pyplot as plt

from google.colab import drive
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from google.colab.patches import cv2_imshow
from base64 import b64decode, b64encode

%matplotlib inline

In [2]:
!nvidia-smi

Thu Feb 16 15:51:18 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   65C    P0    29W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Mount google drive

In [None]:
drive.mount('/content/gdrive')
!ln -s /content/gdrive/My\ Drive/ /mydrive
!ls /mydrive

# Clone and build Darknet

In [4]:
# Clone darknet repo
!git clone https://github.com/AlexeyAB/darknet.git

Cloning into 'darknet'...
remote: Enumerating objects: 15502, done.[K
remote: Total 15502 (delta 0), reused 0 (delta 0), pack-reused 15502[K
Receiving objects: 100% (15502/15502), 14.15 MiB | 22.39 MiB/s, done.
Resolving deltas: 100% (10404/10404), done.


In [None]:
# Modify makefile to have GPU and OPENCV enabled
%cd darknet
!sed -i 's/OPENCV=0/OPENCV=1/' Makefile
!sed -i 's/GPU=0/GPU=1/' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/' Makefile
!sed -i 's/LIBSO=0/LIBSO=1/' Makefile

# Compile darknet
!make

# Prepare dataset in Cloud

In [6]:
# Store custom data in Google drive
!ls /mydrive/yolov3

backup		    obj.data			  yolov3_custom.cfg
generate_train.py   obj.names			  yolov3_testing.cfg
Huli_and_Fence.zip  train_YOLOv3_colab_GPU.ipynb


In [7]:
# Copy the .zip file to the root directory of cloud VM
!cp /mydrive/yolov3/Huli_and_Fence.zip ../

In [None]:
# Unzip the file and store into /darknet/data/ojb directory
!unzip ../Huli_and_Fence.zip -d data/obj

# Get & edit YOLOv3 Configuration file
* Make a copy & rename the original yolov3 config file;
* Update Config file -> classes & filters:
  1. `max_batches` = num_classes * 2000
  2. `steps` = 0.8 * `max_batches`, 0.2 * `max_batches`
  3. `classes` = 3 in three YOLO layers
  4. `filters` = (`classes` + 4 + 1) * 3 in three conv layers right before YOLO layers

In [None]:
# Make a copy of original yolov3.cfg file and rename it for training
!cp cfg/yolov3.cfg /mydrive/yolov3/yolov3_custom.cfg

In [None]:
# Make a copy of original yolov3.cfg file and rename it for testing
!cp cfg/yolov3.cfg /mydrive/yolov3/yolov3_testing.cfg

In [9]:
# Copy the modified .cfg file to cloud VM
!cp /mydrive/yolov3/yolov3_custom.cfg ./cfg
!cp /mydrive/yolov3/yolov3_testing.cfg ./cfg

# Extract and prepare dataset for training process
* Create a new directory in darknet/data/ directory & upload dataset
* Create classes.names & training.data
* Create training.txt file

In [None]:
# Create .names and .data files required for darknet framework
!echo -e 'Huli\nDog\nFence' >> data/obj.names
!echo -e 'classes = 3\ntrain = data/train.txt\nvalid = data/test.txt\nnames = data/obj.names\nbackup = /mydrive/yolov3/backup' >> data/obj.data

In [10]:
# Copy generated files to /data directory
!cp /mydrive/yolov3/obj.names ./data
!cp /mydrive/yolov3/obj.data  ./data

In [None]:
# Get full path of all images
images_list = glob.glob("data/obj/Huli_and_Fence/*[jpg|png|jpeg]")
print(images_list)

In [None]:
len(images_list)

In [13]:
# Split dataset into train set and test set
split_size = int(len(images_list)*0.15)

# For reproducibility
random.seed(42)
# Shuffle data to remove any possible patterns
random.shuffle(images_list)
train_data = images_list[:-split_size]
test_data = images_list[-split_size:]

In [None]:
len(train_data), len(test_data)

In [15]:
# Create train.txt file containing paths of training images
file = open("data/train.txt", "w") 
file.write("\n".join(train_data))
file.close()

In [16]:
# Create test.txt file containing paths of testing images
file = open("data/test.txt", "w") 
file.write("\n".join(test_data))
file.close()

# Download YOLOv3 pretrained weights

In [None]:
# Get pretrained convolutional layers weights
!wget https://pjreddie.com/media/files/darknet53.conv.74

--2023-02-14 12:12:29--  https://pjreddie.com/media/files/darknet53.conv.74
Resolving pjreddie.com (pjreddie.com)... 128.208.4.108
Connecting to pjreddie.com (pjreddie.com)|128.208.4.108|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 162482580 (155M) [application/octet-stream]
Saving to: ‘darknet53.conv.74’


2023-02-14 12:12:39 (15.8 MB/s) - ‘darknet53.conv.74’ saved [162482580/162482580]



# Train on custom data

In [None]:
# Train YOLOv3 on custom data
!./darknet detector train data/obj.data cfg/yolov3_custom.cfg darknet53.conv.74 -dont_show

In [None]:
# Continue training from where it left off
!./darknet detector train data/obj.data cfg/yolov3_custom.cfg /mydrive/yolov3/backup/yolov3_custom_last.weights -dont_show

# Evaluate trained model on test data

Calculate Mean Average Precision(MAP) of the trained model with weights of different stage.

In [None]:
# Calculate Mean Average Precision(MAP) of the trained model with weights of different stage
!./darknet detector map data/obj.data cfg/yolov3_testing.cfg /mydrive/yolov3/backup/yolov3_custom_6000.weights

In [None]:
!./darknet detector map data/obj.data cfg/yolov3_testing.cfg /mydrive/yolov3/backup/yolov3_custom_5000.weights

In [None]:
!./darknet detector map data/obj.data cfg/yolov3_testing.cfg /mydrive/yolov3/backup/yolov3_custom_4000.weights

In [None]:
!./darknet detector map data/obj.data cfg/yolov3_testing.cfg /mydrive/yolov3/backup/yolov3_custom_3000.weights

In [None]:
!./darknet detector map data/obj.data cfg/yolov3_testing.cfg /mydrive/yolov3/backup/yolov3_custom_2000.weights

In [None]:
!./darknet detector map data/obj.data cfg/yolov3_testing.cfg /mydrive/yolov3/backup/yolov3_custom_1000.weights

In [None]:
!./darknet detector map data/obj.data cfg/yolov3_testing.cfg /mydrive/yolov3/backup/yolov3_custom_final.weights

# Test on image sample

In [53]:
# Define helper function to show results
def imShow(path):
  image = cv2.imread(path)
  height, width = image.shape[:2]
  resized_image = cv2.resize(image,(3*width, 3*height), interpolation = cv2.INTER_CUBIC)

  fig = plt.gcf()
  fig.set_size_inches(18, 10)
  plt.axis("off")
  plt.imshow(cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB))
  plt.show()

In [None]:
# Detect objects from sample iamge
!./darknet detector test data/obj.data cfg/yolov3_custom.cfg /mydrive/yolov3/backup/yolov3_custom_last.weights /mydrive/yolov3/IMG_7033.jpg -thresh 0.3
imShow('predictions.jpg')

# YOLOv3 on Webcam Videos

In [49]:
# Import darknet functions to perform object detections
from darknet import *
# Load in custom trained YOLOv3 architecture network
network, class_names, class_colors = load_network("cfg/yolov3_custom.cfg", "data/obj.data", "/mydrive/yolov3/backup/yolov3_custom_last.weights")
width = network_width(network)
height = network_height(network)

# darknet helper function to run detection on image
def darknet_helper(img, width, height):
  darknet_image = make_image(width, height, 3)
  img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  img_resized = cv2.resize(img_rgb, (width, height),
                              interpolation=cv2.INTER_LINEAR)

  # Get image ratios to convert bounding boxes to proper size
  img_height, img_width, _ = img.shape
  width_ratio = img_width/width
  height_ratio = img_height/height

  # Run model on darknet style image to get detections
  copy_image_from_bytes(darknet_image, img_resized.tobytes())
  detections = detect_image(network, class_names, darknet_image)
  free_image(darknet_image)
  
  return detections, width_ratio, height_ratio

In [50]:
# Define function to convert the JavaScript object into an OpenCV image
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# Define function to convert OpenCV Rectangle bounding box image into base64 byte string to be overlayed on video stream
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # Convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # Format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # Format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes

In [51]:
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;
    
    var pendingResolve = null;
    var shutdown = false;
    
    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }
    
    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }
    
    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);
      
      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);
           
      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);
      
      const instruction = document.createElement('div');
      instruction.innerHTML = 
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };
      
      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);
      
      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();
      
      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }
            
      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }
      
      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;
      
      return {'create': preShow - preCreate, 
              'show': preCapture - preShow, 
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)
  
def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data

In [52]:
# Start streaming video from webcam
video_stream()
# Label for video
label_html = 'Capturing...'
# Initialze bounding box to empty
bbox = ''
count = 0 
while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # Convert JS response to OpenCV Image
    frame = js_to_image(js_reply["img"])

    # Create transparent overlay for bounding box
    bbox_array = np.zeros([480,640,4], dtype=np.uint8)

    # Call our darknet helper on video frame
    detections, width_ratio, height_ratio = darknet_helper(frame, width, height)

    # Loop through detections and draw them on transparent overlay image
    for label, confidence, bbox in detections:
      left, top, right, bottom = bbox2points(bbox)
      left, top, right, bottom = int(left * width_ratio), int(top * height_ratio), int(right * width_ratio), int(bottom * height_ratio)
      bbox_array = cv2.rectangle(bbox_array, (left, top), (right, bottom), class_colors[label], 2)
      bbox_array = cv2.putText(bbox_array, "{} [{:.2f}]".format(label, float(confidence)),
                        (left, top - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        class_colors[label], 2)

    bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
    # Convert overlay of bbox into bytes
    bbox_bytes = bbox_to_bytes(bbox_array)
    # Update bbox so next frame gets new overlay
    bbox = bbox_bytes

<IPython.core.display.Javascript object>