In [2]:
import torch
import os
import matplotlib.pyplot as plt
import cv2
%matplotlib inline

In [3]:
model = torch.hub.load('../yolov5',
                       'custom', path='../yolov5/runs/train/exp/weights/best.pt',
                       source='local', device='cuda:0')

YOLOv5 🚀 v7.0-253-g63555c8 Python-3.9.18 torch-2.1.2 CUDA:0 (NVIDIA GeForce GTX 1080 Ti, 11177MiB)

Fusing layers... 
Model summary: 212 layers, 20881221 parameters, 0 gradients, 47.9 GFLOPs
Adding AutoShape... 


In [4]:
color_dict = {
    0: (255, 0, 0),    # Red
    1: (0, 255, 0),    # Green
    2: (0, 0, 255),    # Blue
    3: (200, 200, 0),  # Dark Yellow
    4: (0, 255, 255),  # Cyan
    5: (255, 0, 255),  # Magenta
    6: (128, 0, 128),  # Purple
    7: (255, 165, 0)   # Orange
}

# Video
vid = 'test.mp4'

In [5]:
def iou(box1, box2):
    xmin1, ymin1, xmax1, ymax1 = box1
    xmin2, ymin2, xmax2, ymax2 = box2

    inter_xmin = max(xmin1, xmin2)
    inter_ymin = max(ymin1, ymin2)
    inter_xmax = min(xmax1, xmax2)
    inter_ymax = min(ymax1, ymax2)

    if inter_xmin < inter_xmax and inter_ymin < inter_ymax:
        inter_area = (inter_xmax - inter_xmin) * (inter_ymax - inter_ymin)
    else:
        inter_area = 0

    # union
    box1_area = (xmax1 - xmin1) * (ymax1 - ymin1)
    box2_area = (xmax2 - xmin2) * (ymax2 - ymin2)
    union_area = box1_area + box2_area - inter_area

    if union_area == 0:
        return 0

    iou = inter_area / union_area
    return iou

In [11]:
# define a video capture object 
cap = cv2.VideoCapture(vid) 

fps = int(cap.get(cv2.CAP_PROP_FPS))
print('frames per second =',fps)

# frame video and height
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
print('Frame width =', frame_width, ', Frame height =', frame_height)

# defining codec and videowriter oject
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # Be sure to use lower case
out = cv2.VideoWriter('output.mp4', fourcc, fps, (frame_width, frame_height))


frames per second = 12
Frame width = 1280 , Frame height = 960


OpenCV: FFMPEG: tag 0x44495658/'XVID' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


In [12]:
# classes object counts
classnames = ['pickup' , 'suv' , 'mini_car' , 'sedan', 'van', 'htv' , 'bike', 'mini']
counts = {i: 0 for i in classnames}  # 0-7

objects = []

obj_id = 0
obj_id_set = set()


# max frames to keep an undetected object
MAX_FRAMES = 2

# Define line for counting (start and end coordinates)
line_start = (420, 400)
line_end = (1260, 400)

while True:
    ret, frame = cap.read()

    # if no frame we break
    if not ret:
        break

    results = model(frame, size=640)
    df = results.pandas().xyxy[0]

    # List to store indices of detected objects
    detected_indices = []

    for ind in df.index:
        xmin = int(df['xmin'][ind])
        ymin = int(df['ymin'][ind])
        xmax = int(df['xmax'][ind])
        ymax = int(df['ymax'][ind])
        xc = int((xmin+xmax)/2)
        yc = int((ymin+ymax)/2)

        confidence = df['confidence'][ind]
        classNo = int(df['class'][ind])
        name = df['name'][ind]

        # ious with all the objects
        iou_values = [iou([xmin, ymin, xmax, ymax], obj['bbox']) for obj in objects]

        # checking if the object was previously there
        if iou_values and max(iou_values) > 0.1:
            max_iou_index = iou_values.index(max(iou_values))
            objects[max_iou_index]['bbox'] = [xmin, ymin, xmax, ymax]
            objects[max_iou_index]['name'] = name 
            objects[max_iou_index]['confidence'] = confidence
            objects[max_iou_index]['last_seen'] = 0  # Reset 'last seen' counter
            objects[max_iou_index]['class'] = classNo
            objects[max_iou_index]['track'].append((xc,yc))
            detected_indices.append(max_iou_index)
        else:
            # if the object wasnt there, its a new object
            objects.append({'id': obj_id, 'class': classNo, 'bbox': [xmin, ymin, xmax, ymax], 'name': name, 'confidence': confidence, 'last_seen': 0,  'track': []})
            obj_id += 1


        # # bounding box
        # cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color_dict[classNo], 2)

        # # text size and width
        # (text_width, text_height) = cv2.getTextSize(f'{name} {confidence:.2f}', cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)[0]

        # # putting background
        # cv2.rectangle(frame, (xmin-2,ymin), (xmin+text_width-2, ymin-text_height-4), color_dict[classNo], cv2.FILLED)

        # # object name
        # cv2.putText(frame, f'{name} {confidence:.2f}', (xmin, ymin-2), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255,255,255), 2)
    
    # increment 'last seen' counter for undetected objects and remove old objects
    for i, obj in enumerate(objects):
        if i not in detected_indices:
            obj['last_seen'] += 1
        if obj['last_seen'] > MAX_FRAMES:
            objects.remove(obj)

    for obj in objects:
        
        xmin,ymin,xmax,ymax = obj['bbox']

        xc = int((xmin+xmax)/2)
        yc = int((ymin+ymax)/2)

        classNo = obj['class']
        name = obj['name']
        confidence = obj['confidence']
        id = obj['id']

        if yc > line_start[1] and id not in obj_id_set:
            counts[name] += 1
            obj_id_set.add(id)
            print(f'{name} with id {id} crossed the line')

        if id not in obj_id_set:
            # bounding box
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color_dict[classNo], 2)

            # text size and width
            (text_width, text_height) = cv2.getTextSize(f'{name} {confidence:.2f} {id}', cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)[0]

            # putting background
            cv2.rectangle(frame, (xmin-2,ymin), (xmin+text_width-2, ymin-text_height-4), color_dict[classNo], cv2.FILLED)

            # object name
            cv2.putText(frame, f'{name} {confidence:.2f} {id}', (xmin, ymin-2), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255,255,255), 2)

            if len(obj['track']) > 1:
                for i in range(len(obj['track'])-1):
                    cv2.line(frame, obj['track'][i], obj['track'][i+1] , color_dict[classNo], 2)

    # Display counts
    y_offset = 30
    for class_name, count in counts.items():
        cv2.putText(frame, f'{class_name}: {count}', (1100, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,0), 2)
        y_offset += 30

    cv2.line(frame, line_start, line_end, (255,255,255), 2)
    
    cv2.imshow("Frame", frame)
    out.write(frame)

    print(counts)
    
    key = cv2.waitKey(0)

    if key==27:
        break

cap.release()
out.release()
cv2.destroyAllWindows()

pickup with id 0 crossed the line
pickup with id 1 crossed the line
sedan with id 2 crossed the line
sedan with id 4 crossed the line
htv with id 6 crossed the line
bike with id 7 crossed the line
bike with id 9 crossed the line
htv with id 8 crossed the line
sedan with id 10 crossed the line
sedan with id 11 crossed the line
sedan with id 13 crossed the line
suv with id 14 crossed the line
pickup with id 12 crossed the line
sedan with id 15 crossed the line
bike with id 16 crossed the line
pickup with id 17 crossed the line
pickup with id 19 crossed the line
sedan with id 20 crossed the line
htv with id 18 crossed the line
van with id 21 crossed the line
van with id 22 crossed the line
suv with id 23 crossed the line
mini_car with id 25 crossed the line
bike with id 27 crossed the line
mini_car with id 28 crossed the line
mini_car with id 29 crossed the line
sedan with id 30 crossed the line
bike with id 31 crossed the line
bike with id 32 crossed the line
suv with id 33 crossed the l

: 