An example of using YoloV3 to detect birds in images

In [None]:
import torch.utils.data
from torch.utils.data.dataset import Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchvision.models as tvm
import numpy as np
import cv2
import os
import pandas as pd
from sklearn.metrics import confusion_matrix
import copy
import matplotlib.pyplot as plt
from PIL import Image
import sklearn.metrics as skms

YoloV3 is an object detection model developed by Joseph Redmon and Farhadi Ali

@article{yolov3, title={YOLOv3: An Incremental Improvement}, author={Redmon, Joseph and Farhadi, Ali}, journal = {arXiv}, year={2018} } 

We used it to detect birds in our image and then identify their species using Resnet50

In [None]:

def get_output_layers(net):
    
    layer_names = net.getLayerNames()
    try:
        output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
    except:
        output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    return output_layers


def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):

    label = str(classes[class_id])

    color = COLORS[class_id]

    cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)

    cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)


def draw_final_prediction(img, label, x, y, x_plus_w, y_plus_h):

    color = (0,0,0)

    cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)

    cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

In [None]:
image = 'testbrd12.jpg'
classes_path = 'yolov3.txt'
weights = 'yolov3.weights'
config = 'yolov3.cfg'

In [None]:
class_names = [
 '01.Mourning Dove',
 '02.Common Grackle',
 '03.Cardinal',
 '04.American Goldfinch',
 '05.Blue Jay',
 '06.White Breasted Nuthatch',
 '07.House Sparrow',
 '08.Red Bellied Woodpecker',
 '09.Downy Woodpecker',
 '10.Red Winged Black Bird']

In [None]:
image = cv2.imread(image)
untouched_image = copy.deepcopy(image)
Width = image.shape[1]
Height = image.shape[0]
scale = 0.00392

classes = None

with open(classes_path, 'r') as f:
    classes = [line.strip() for line in f.readlines()]

COLORS = np.random.uniform(0, 255, size=(len(classes), 3))

net = cv2.dnn.readNet(weights, config)

blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False)

net.setInput(blob)

outs = net.forward(get_output_layers(net))

class_ids = []
confidences = []
boxes = []
conf_threshold = 0.5
nms_threshold = 0.4

for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0] * Width)
            center_y = int(detection[1] * Height)
            w = int(detection[2] * Width)
            h = int(detection[3] * Height)
            x = center_x - w / 2
            y = center_y - h / 2
            class_ids.append(class_id)
            confidences.append(float(confidence))
            boxes.append([x, y, w, h])


indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
# print("confidence is ",confidences)
newi = [] # To save whichever boxes do not end outside our image
for i in indices:
    try:
        box = boxes[i]
    except:
        i = i[0]
        box = boxes[i]
    
    x = box[0]
    y = box[1]
    w = box[2]
    h = box[3]

    ## TODO: Fix the rectngular box exiting the image.

    if x < 0 or y < 0 or w < 0 or h < 0: # If box ends outside the image, leave it
        continue
    draw_prediction(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))
    newi.append(i)
    
cv2.imwrite("object-detection.jpg", image)
cv2.destroyAllWindows()

In [None]:
newimages = [] # To store partial images identified by YoloV3
for i in newi:
    x,y,w,h = boxes[i][0],boxes[i][1],boxes[i][2],boxes[i][3]
    nx = round(x)
    ny = round(y)
    nw = round(w)
    nh = round(h)
    newimages.append((nx,ny,round(x+w),round(y+h)))
    # newimages.append(image[ny:round(y+h),nx:round(x+w)])
    #  cv2.imwrite("new.jpg", newimages[0])  newimages stores both individual images

In [None]:
md = tvm.resnet50()

IN_FEATURES = md.fc.in_features 
OUTPUT_DIM = 10

fc = nn.Linear(IN_FEATURES, OUTPUT_DIM)
md.fc = fc


md.load_state_dict(torch.load("birdmd1.pth")) # Load model trained in NewBirds.ipnyb
md.eval()

In [None]:
# Tranformation must be different when training and testing. Needs to be more rigorous when training
test_transform = transforms.Compose([
    transforms.Resize(255),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

In [None]:
allimagespic = [] # to display images later
nt = copy.deepcopy(untouched_image)

for img in newimages:
    
    newimg = image[img[1]:img[3],img[0]:img[2]]  #newimages.append((nx,ny,round(x+w),round(y+h)))

    newimg1 = cv2.cvtColor(newimg, cv2.COLOR_BGR2RGB) # Matplotlib takes RGB while Opencv take BGR
    allimagespic.append(newimg1)
    cv2.imwrite("newtemp.jpg", newimg)
    nimg = Image.open("newtemp.jpg").convert('RGB')
    tr = test_transform(nimg)
    tr = tr.unsqueeze(0)

    logits = md(tr)
    pred_probab = nn.Softmax(dim=1)(logits)
    # print(pred_probab) # Test code
    y_pred = pred_probab.argmax(1)
    finalpred = class_names[np.argmax(pred_probab.detach().numpy())]
    draw_final_prediction(nt,finalpred,img[0],img[1],img[2],img[3]) # Final image produced
    print(f"Predicted class: {finalpred}")
    # plt.imshow(nt) # Test Code
cv2.imwrite("finalimg.jpg", nt)

In [None]:
n = len(allimagespic)
fig = plt.figure(figsize=(8, 8))  # width, height in inches

for i in range(0, n):
    img = allimagespic[i]
    fig.add_subplot(1, n, i+1)
    plt.imshow(img)
plt.show()

In [None]:
plt.imshow(image)