In [1]:
#  installing darknet
import os
if not os.path.exists('darknet'):
  # pull the darknet repo from github and compile it in the runtime
  !git clone https://github.com/AlexeyAB/darknet
  %cd darknet
  # reconfigure the Makefile file - a configuration file needed for installing darknet in the enviroment
  # sed = stream editor - a linux command to modify info in text files
  !sed -i 's/OPENCV=0/OPENCV=1/' Makefile
  # !!! In case you dont have a GPU, make sure to comment out the below 3 lines !!!
  !sed -i 's/GPU=0/GPU=1/' Makefile
  !sed -i 's/CUDNN=0/CUDNN=1/' Makefile
  !sed -i 's/CUDNN_HALF=0/CUDNN_HALF=1/' Makefile
  # compile the darknet source code
  !make
  # install the torch_snippets package
  %pip install -q torch_snippets
  # download and extract the dataset - remove the zip file to save space
  !wget --quiet https://www.dropbox.com/s/agmzwk95v96ihic/open-images-bus-trucks.tar.xz
  !tar -xf open-images-bus-trucks.tar.xz
  !rm open-images-bus-trucks.tar.xz
  # fetch the pre-trained weights to make a sample prediction
  !wget --quiet https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights

Cloning into 'darknet'...
remote: Enumerating objects: 15909, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 15909 (delta 2), reused 2 (delta 2), pack-reused 15903 (from 2)[K
Receiving objects: 100% (15909/15909), 14.43 MiB | 24.15 MiB/s, done.
Resolving deltas: 100% (10710/10710), done.
/content/darknet
mkdir -p ./obj/
mkdir -p backup
mkdir -p results
chmod +x *.sh
g++ -std=c++11 -std=c++11 -Iinclude/ -I3rdparty/stb/include -DOPENCV `pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv` -DGPU -I/usr/local/cuda/include/ -DCUDNN -DCUDNN_HALF -Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -rdynamic -Ofast -DOPENCV -DGPU -DCUDNN -I/usr/local/cudnn/include -DCUDNN_HALF -c ./src/image_opencv.cpp -o obj/image_opencv.o
[01m[K./src/image_opencv.cpp:[m[K In function ‘[01m[Kvoid draw_detections_cv_v3(void**, detection*, int, float, char**, image**, int, int)[m[K’:
  945 |      

In [2]:
# test whether the installation is successful
!./darknet detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights data/person.jpg

 CUDA-version: 12050 (12040)
, cuDNN: 9.2.1, CUDNN_HALF=1, GPU count: 1  
 CUDNN_HALF=1 
 OpenCV version: 4.5.4
 0 : compute_capability = 750, cudnn_half = 1, GPU: Tesla T4 
net.optimized_memory = 0 
mini_batch = 1, batch = 8, time_steps = 1, train = 0 
   layer   filters  size/strd(dil)      input                output
   0 Create CUDA-stream - 0 
 Create cudnn-handle 0 
conv     32       3 x 3/ 1    608 x 608 x   3 ->  608 x 608 x  32 0.639 BF
   1 conv     64       3 x 3/ 2    608 x 608 x  32 ->  304 x 304 x  64 3.407 BF
   2 conv     64       1 x 1/ 1    304 x 304 x  64 ->  304 x 304 x  64 0.757 BF
   3 route  1 		                           ->  304 x 304 x  64 
   4 conv     64       1 x 1/ 1    304 x 304 x  64 ->  304 x 304 x  64 0.757 BF
   5 conv     32       1 x 1/ 1    304 x 304 x  64 ->  304 x 304 x  32 0.379 BF
   6 conv     64       3 x 3/ 1    304 x 304 x  32 ->  304 x 304 x  64 3.407 BF
   7 Shortcut Layer: 4,  wt = 0, wn = 0, outputs: 304 x 304 x  64 0.006 BF
   8 conv  

setting up dataset format

In [3]:
# file containing the name of classes, one per line
%%writefile data/obj.names
bus
truck

Writing data/obj.names


In [4]:
# file descibing the parameters in the dataset and the locations of text files containing train and test image paths and the location of the file containing object names and the folder where you want to save trained models
%%writefile data/obj.data
classes = 2
train = data/train.txt
valid = data/val.txt
names = data/obj.names
backup = backup/

Writing data/obj.data


In [5]:
# move all images and ground-truth text files to the data/obj folder
!mkdir -p data/obj
!cp -r open-images-bus-trucks/images/* data/obj/
!cp -r open-images-bus-trucks/yolo_labels/all/{train,val}.txt data/
!cp -r open-images-bus-trucks/yolo_labels/all/labels/*.txt data/obj/

configuring the architecture

In [7]:
# create a copy of existing configuration and modify it in place
!cp cfg/yolov4-tiny-custom.cfg cfg/yolov4-tiny-bus-trucks.cfg
# max_batches to 4000 (since the dataset is small enough)
!sed -i 's/max_batches = 500200/max_batches=4000/' cfg/yolov4-tiny-bus-trucks.cfg
# number of sub-batches per batch
!sed -i 's/subdivisions=1/subdivisions=16/' cfg/yolov4-tiny-bus-trucks.cfg
# number of batches after which learning rate is decayed
!sed -i 's/steps=400000,450000/steps=3200,3600/' cfg/yolov4-tiny-bus-trucks.cfg
# number of classes is 2 as opposed to 80 (which is the number of COCO classes)
!sed -i 's/classes=80/classes=2/g' cfg/yolov4-tiny-bus-trucks.cfg
# in the classification and regression heads, change number of output convolution filters
# from 255 -> 21 and 57 -> 33, since we have fewer classes we don't need as many filters
!sed -i 's/filters=255/filters=21/g' cfg/yolov4-tiny-bus-trucks.cfg
!sed -i 's/filters=57/filters=33/g' cfg/yolov4-tiny-bus-trucks.cfg

training and testing the model

In [8]:
# get the weights from the following github lcations and store them in build/darknet/x64
!wget --quiet https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29
!cp yolov4-tiny.conv.29 build/darknet/x64/

In [9]:
# train the model
# -dont_show flag skips showing intermediate prediction images
# -mapLastAt will periodically print the mean average precision on the val data
!./darknet detector train data/obj.data cfg/yolov4-tiny-bus-trucks.cfg yolov4-tiny.conv.29 -dont_show -mapLastAt

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 total_bbox = 311761, rewritten_bbox = 0.267192 % 
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 30 Avg (IOU: 0.819812), count: 3, class_loss = 0.462663, iou_loss = 0.315639, total_loss = 0.778302 
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 37 Avg (IOU: 0.966851), count: 1, class_loss = 0.228492, iou_loss = 0.440577, total_loss = 0.669070 
 total_bbox = 311765, rewritten_bbox = 0.267188 % 
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 30 Avg (IOU: 0.844684), count: 4, class_loss = 0.414338, iou_loss = 0.703193, total_loss = 1.117531 
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 37 Avg (IOU: 0.000000), count: 1, class_loss = 0.076301, iou_loss = 0.000000, total_loss = 0.076301 
 total_bbox = 311769, rewritten_bbox = 0.267185 % 
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 30 Avg (IOU: 0.749534), count: 4, class_los

In [14]:
from torch_snippets import Glob, stem, show, read
# upload your own images to a folder
image_paths = [str(f) for f in Glob('images-of-trucks-and-busses')]
for f in image_paths:
  !./darknet detector test data/obj.data cfg/yolov4-tiny-bus-trucks.cfg\
  backup/yolov4-tiny-bus-trucks_4000.weights {f}
  !mv predictions.jpg {stem(f)}_pred.jpg

for i in Glob('*_pred.jpg', silent=True):
  show(read(i, 1), sz=20)