In [4]:
from ultralytics import YOLO
import numpy as np
import cv2
import pandas as pd

### The Model

- The YOLO model was trained using separate environment where CUDA is available.
- The training images are splitted images from drone.mov frames, approximated to 640 x 640 per slice.
- The training used polygonal annotation to get better separation performance.
- We used 100 epochs where obtained mAP is about 0.7.


In [5]:
model = YOLO("model_last.pt")

### The test file

- We are using trimmed video of drone.mov, since running the whole drone.mov takes very long time just for testing.

In [10]:
movie_path = "test_data/drone_trim.mov"

### Run

In [11]:
# iterate the video frames using generator
def video_frames(video_file):
    cap = cv2.VideoCapture(video_file)
    while cap.isOpened(): 
        ret, frame = cap.read()
        if not ret:
            break
        yield frame
    cap.release() 

In [12]:
resimages   = []
rcounts     = []

# use the generator function to iterate over the frames
for frame in video_frames(movie_path):
    # do some processing on the frame here
    grayf       = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    grayf3d     = cv2.cvtColor(grayf, cv2.COLOR_GRAY2BGR)
    rcount      = 0

    # Slice the image
    h, w, c     = grayf3d.shape    
    ph          = h // 2 
    pw          = w // 3

    # Slice the image into 6 parts
    imgs = []
    for i in range(2):
        for j in range(3):
            img = grayf3d[ph*i:ph*(i+1), pw*j:pw*(j+1)]
            imgs.append(img)

    for img in imgs:
        results     = model.predict(source=img, save=False, imgsz=640)
        result      = results[0]
        box         = result.boxes
        rcount      += len(box) 

        for cbox in box.xywh:
            x, y, w, h = cbox 
            cx      = int(x)
            cy      = int(y)
            ctr     = (int(x), int(y))
            col     = (50, 0, 255) 
            rad     = int((w+h)/4)
            thic    = 2
            cv2.circle(img, ctr, rad, col, thic)
    
    img_h1      = cv2.hconcat([imgs[0], imgs[1], imgs[2]])
    img_h2      = cv2.hconcat([imgs[3], imgs[4], imgs[5]])
    img_concat  = cv2.vconcat([ img_h1, img_h2 ])

    count_text = f"count: {rcount}"
    cv2.putText(img_concat, count_text, (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, (50, 0, 255), 4, cv2.LINE_8)
    
    cv2.imshow("frame", img_concat) # show the frame
    if cv2.waitKey(1) == 27: # if the user presses ESC, exit the loop
        break

    resimages.append(img_concat)
    rcounts.append(rcount)

cv2.destroyAllWindows()


0: 544x640 55 pas, 118.6ms
Speed: 0.0ms preprocess, 118.6ms inference, 0.0ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 37 pas, 2 pos, 30.7ms
Speed: 0.0ms preprocess, 30.7ms inference, 3.0ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 52 pas, 4 pos, 31.1ms
Speed: 0.0ms preprocess, 31.1ms inference, 2.0ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 32 pas, 2 pos, 23.9ms
Speed: 4.0ms preprocess, 23.9ms inference, 0.0ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 21 pas, 23.0ms
Speed: 3.0ms preprocess, 23.0ms inference, 0.0ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 23 pas, 22.8ms
Speed: 2.0ms preprocess, 22.8ms inference, 5.7ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 55 pas, 25.1ms
Speed: 4.7ms preprocess, 25.1ms inference, 2.7ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 37 pas, 3 pos, 24.7ms
Speed: 6.0ms preprocess, 24.7ms inference, 2.0ms postprocess per image at s

### Generate video as result

- the resulting video named "result.avi"

In [13]:
img = resimages[0]
height, width, _ = img.shape
print(height, width)

video = cv2.VideoWriter("result.avi", cv2.VideoWriter_fourcc(*"MJPG"), 10, (width, height))

# Loop over the images and write them to the video
for image in resimages:
    video.write(image)

# Release the video writer
video.release()

2160 3840


### Simple Logic to compute total tree counts

The algorithm:
1. The count is executed for each frame. 
2. For every new frame, if the count is larger, add the difference to the total count

This algorithm disregards whether the drone returns its sight to previous view or not. For better precission, we need further feature engineering, model training and the involvement of drone trajectory and vision orientation.

In [14]:
# store the count data for any further analysis
df = pd.DataFrame({
    "tree_count" : rcounts
})
df.to_csv("result.csv", index = False)

In [15]:
# get total count
total_count     = rcounts[0]
prev_count      = rcounts[0]

for count in rcounts[1:]:
    # print(count)
    if count > prev_count:
        total_count += count - prev_count
        prev_count = count

print(f"Total tree count is : {total_count}")

Total tree count is : 265
