# Python yolov3 demo of my implementation

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import cv2
import os
from torch.autograd import Variable
import matplotlib.pyplot as plt

from yolo.Darknet import *
from yolo.utils import *

  from .autonotebook import tqdm as notebook_tqdm


### Configuration variables - feel free to change them

In [2]:
# Some variables
weightsfile = 'yolov3.weights'
classfile = 'coco.names'
cfgfile = 'yolov3.cfg'
sample_img1 = 'dog-cycle-car.png'
input_dir = 'input'
output_directory = 'output'
nms_thesh = 0.5
CUDA = False
batch_size = 1

In [3]:
# Make sure both folders exitst
if not os.path.exists(input_dir):
    os.mkdir(input_dir)

if not os.path.exists(output_directory):
    os.makedirs(output_directory)

### Load the model & weights

Darnket 53 is the base model used in the yolo paper. We load create the model using the configuration file and load the weights that can be directly used for inference.

In [4]:

# Load the model
print("Loading network.....")
model = Darknet(cfgfile)
model.load_weights(weightsfile)
print("Loaded network with success.....")

# Load the weights
print("Loading the classes.....")
classes = load_classes(classfile)
print("Loaded classes with success.....")

# Some safety assertions
input_dimension = int(model.net_info["height"])
assert input_dimension % 32 == 0
assert input_dimension > 32

# If there's a GPU availible, put the model on GPU
if CUDA:
    model.cuda()

# Set the model in evaluation mode
model.eval()


Loading network.....
Loaded network with success.....
Loading the classes.....
Loaded classes with success.....


Darknet(
  (module_list): ModuleList(
    (0): Sequential(
      (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (leaky_0): LeakyReLU(negative_slope=0.1, inplace=True)
    )
    (1): Sequential(
      (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (leaky_1): LeakyReLU(negative_slope=0.1, inplace=True)
    )
    (2): Sequential(
      (conv_2): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (batch_norm_2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (leaky_2): LeakyReLU(negative_slope=0.1, inplace=True)
    )
    (3): Sequential(
      (conv_3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch

### Load the image and prepare them

The code works as follow. First of all we get all the path for the images. Then we rescale the images and we extract three differents list

- List of resized images
- List of images with base dimensio
- List of origin dimensions for the images

In [5]:
# Get the path of the images
print("Loading the images.....")
images_path = get_image_paths(input_dir)
print("Loaded the images with success.....")

# Preparing the images
print("Rescaling the images.....")
batches = list(map(prep_image, images_path, [input_dimension for x in range(len(images_path))]))
print("Rescaled images with success.....")

# Resized, original image with the dimension list
images_resized = [x[0] for x in batches]
image_origin = [x[1] for x in batches]
image_base_dimensions = [x[2] for x in batches]
image_base_dimensions = torch.FloatTensor(
    image_base_dimensions).repeat(1, 2)

# If cuda is available send the image to cuda
if CUDA:
    image_base_dimensions = image_base_dimensions.cuda()

Loading the images.....
Loaded the images with success.....
Rescaling the images.....
Rescaled images with success.....


### Inference

This functions works in two stes. First of all, feed the model with our images and get the results. The second function write_results apply non-max suppression on the images.

In [8]:
output = torch.zeros(size=(0,8))

for i,batch in enumerate(images_resized):
    # If cuda available put into cuda
    if CUDA:
        batch = batch.cuda()


    # Apply offsets to the result predictions
    # Tranform the predictions as described in the YOLO paper
    # flatten the prediction vector
    # B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes)
    # Put every proposed box as a row.
    # Perform inference using the yolo model (Darknet 53)
    with torch.no_grad():
        prediction = model(batch, CUDA)

    # Get the predictions
    prediction = get_predictions(
        prediction, batch_size, confidence=0.5, num_classes=80, batch_offset=i*batch_size,nms_conf=nms_thesh)

    # Concatenate output
    output = torch.cat((output, prediction))

    if CUDA:
        torch.cuda.synchronize()

try:
    output
except NameError:
    print("No detections were made")
    exit()

### Output the images

The function output_image will save the image with the rectangles written in the images. For this purpose, we need to pass some variables.

In [9]:
output_images(image_base_dimensions,output,input_dimension,images_path,output_directory,images_resized,image_origin,classes)