In [1]:
import cv2
import numpy as np
import torch
import glob
import hashlib
import pandas as pd
from matplotlib import pyplot as plt
import math
from filterpy.kalman import KalmanFilter
pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

Using cache found in /home/uxio/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2023-4-24 Python-3.10.8 torch-2.0.0+cu117 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


[31m[1mrequirements:[0m /home/uxio/.cache/torch/hub/requirements.txt not found, check failed.


In [3]:
class Object:
    def __init__(self, obj_id, centroid, start_point, end_point, obj_class):
        self.track_id = obj_id
        self.centroid = centroid
        self.start_point = start_point
        self.end_point = end_point
        self.obj_class = obj_class
        
        self.disapear = False
        self.disapear_count = 0
                
        
        self.f = KalmanFilter(6, 3)

        self.f.x = np.array([0., 0., 0., 0., 0., 0.])
        
        self.f.F = np.asarray(
            [
                [1., 0., 0., 1., 0., 0.],
                [0., 1., 0., 0., 1., 0.],
                [0., 0., 1., 0., 0., 1.],
                [0., 0., 0., 1., 0., 0.],
                [0., 0., 0., 0., 1., 0.],
                [0., 0., 0., 0., 0., 1.]
            ]
        )

        self.f.H = np.array([
            [1., 0., 0., 0., 0., 0.],
            [0., 1., 0., 0., 0., 0.],
            [0., 0., 1., 0., 0., 0.]
        ])
    def predict(self):
        self.f.predict()
        self.centroid_pred = (int(self.f.x[0]), int(self.f.x[1]))
        

In [4]:
def get_rectangle_center(start_point,end_point):
    # Get top and bottom right corner of the rectangle
    top_left_corner = start_point
    bottom_right_corner = end_point 

    # Calculate width and height of the rectangle
    width = bottom_right_corner[0] - top_left_corner[0]
    height = bottom_right_corner[1] - top_left_corner[1]

    # Calculate and return the center
    center = (int(width/2 + top_left_corner[0]), int(height/2 + top_left_corner[1]))

    return center

In [5]:
def get_area_rectangle(start_point, end_point):
    # Get top and bottom right corner of the rectangle
    top_left_corner = start_point
    bottom_right_corner = end_point 

    # Calculate width and height of the rectangle
    width = bottom_right_corner[0] - top_left_corner[0]
    height = bottom_right_corner[1] - top_left_corner[1]
    
    return width*height

In [6]:
def draw_bb(image, obj):
    
    centers = []
    
    #Loop for object
    for o in obj:
        #Rectangles
        start_point = o.start_point
        end_point = o.end_point
        
        #Colors
        hash_value = hashlib.sha256(str(o.obj_class).encode()).hexdigest()
        rgb_color = tuple(int(hash_value[i:i+2], 16) for i in (0, 2, 4))
        
        #Get 2D points
        pred_2d = stereo_to_2D(o.centroid_pred[0], o.centroid_pred[1], 1)
        pred_center = (int(pred_2d[0]), int(pred_2d[1]))
        
        #Draw 
        if not o.disapear:
            image = cv2.rectangle(image, start_point, end_point, rgb_color, 3)  
            image = cv2.circle(image, o.centroid, radius = 3, color = (255, 0, 0), thickness = 8)
        image = cv2.circle(image, o.centroid_pred, radius = 3, color = (0, 255, 0), thickness = 8)
        image = cv2.putText(image, str(o.track_id), (o.centroid_pred[0], o.centroid_pred[1] - 7), 0, 1, (0, 0, 255), 2)
        
        #Rectangle predicted
        shape = get_shape(o.start_point, o.end_point)
        start_point_p = (int(abs(o.centroid_pred[0] - shape[0]/2)), int(abs(o.centroid_pred[1] - shape[1]/2)))
        end_point_p = (int(abs(o.centroid_pred[0] + shape[0]/2)), int(abs(o.centroid_pred[1] + shape[1]/2)))
        image = cv2.rectangle(image, start_point_p, end_point_p,(0, 0, 0), 3) 


In [7]:
def update_id(obj, df):
    dist_threshold = 300
    area_threshold = 50000
    if(len(obj)>0):
        for i in range(len(obj)):
            df['dist'] = [math.dist(obj[i].centroid, center) for center in df['center']]
            df['dist_thr'] = df['dist'] <= dist_threshold
            df['area_thr'] = (df['area'] - get_area_rectangle(obj[i].start_point, obj[i].end_point)) <= area_threshold
            df['cls'] = df['class'] == obj[i].obj_class
            df['true'] = df['dist_thr'] & df['area_thr'] & df['cls']
            if df.true.sum() == 1:
                true_center = df.loc[df['true']].center.values
                center_3d = df.loc[df['true']].center.values
                start = (int(df.loc[df['true']].xmin), int(df.loc[df['true']].ymin))
                end = (int(df.loc[df['true']].xmax), int(df.loc[df['true']].ymax))
                list_center = [center_3d[0][0], center_3d[0][1], 1]
                obj[i].f.update(list_center)
                obj[i].start_point = start
                obj[i].end_point = end
                obj[i].centroid = (true_center[0][0], true_center[0][1])
                obj[i].disapear = False
                obj[i].disapear_count = 0
                df = df.drop(df.loc[df['true']].index)
            elif df.true.sum() > 1:
                min_dist = min(df[df['true']].dist)
                true_center = df.loc[df.dist == min_dist].center.values
                center_3d = df.loc[df.dist == min_dist].center.values
                list_center = [center_3d[0][0], center_3d[0][1], 1]
                start = (int(df.loc[df.dist == min_dist].xmin), int(df.loc[df.dist == min_dist].ymin))
                end = (int(df.loc[df.dist == min_dist].xmax), int(df.loc[df.dist == min_dist].ymax))
                obj[i].f.update(list_center)
                obj[i].start_point = start
                obj[i].end_point = end
                obj[i].centroid = (true_center[0][0], true_center[0][1])
                obj[i].disapear = False
                obj[i].disapear_count = 0
                df = df.drop(df.loc[df.dist == min_dist].index)
            elif df.true.sum() == 0:
                obj[i].disapear = True
                obj[i].disapear_count += 1
            
    return df

In [8]:
def stereo_vision(points_left, points_right):
    f = 7.070493e+02
    B = 0.54
    three_points=[]
    threshold_y = 10
    threshold_x = 100

    for point_l in points_left:
        match = False
        for point_r in points_right:
            t_y = abs(point_l[1] - point_r[1])
            t_x = abs(point_l[0] - point_r[0])
            
            difference_x = abs(point_l[0]-point_r[0])
            if difference_x == 0:
                difference_x = 0.1
                
            if (t_y < threshold_y) & (t_x < threshold_x):
                y = point_l[1]
                z = f*B/(difference_x)
                angle = 90-(np.arctan2(point_l[0], f))
                x =  z/np.tan(angle)
                three_points.append((x,y,z))
                match = True
                break
        if not match:
            three_points.append((-1, -1, -1))

    if len(three_points) < len(points_left):
        print("There are points withouth pairing")
    
    return three_points



In [9]:
def stereo_to_2D(x ,y, z):
    f = 7.070493e+02
    B = 0.54
    y_2d = y
    if (x == 0) and (z == 0):
        x_2d = 0.0
    else:
        angle = np.arctan2(z,x)
        x_2d = np.tan(90 - angle) * f
    return (x_2d, y_2d)


In [10]:
def get_shape(start, end):
    w = end[0] - start[0]
    h = end[1] - start[1]

    return (w, h)

In [11]:
def add_new_obj(obj, df, id_number):
    if (df.shape[0]>0): #If there is any obj in the frame
        for idx, row in df.iterrows(): #loop for all objects
            start_point = (int(row.xmin), int(row.ymin))
            end_point = (int(row.xmax), int(row.ymax)) 
            centroid = get_rectangle_center(start_point, end_point)
            obj.append(Object(id_number, centroid, start_point, end_point, row['class']))
            id_number += 1
    return id_number

In [12]:
def update_object_list(obj):
    new_obj_list = []
    for element in obj:
        if not element.disapear:
            new_obj_list.append(element)
    return new_obj_list

In [13]:
def check_disapear(obj):
    max_disapear_count = 43
    xmin = 50
    xmax = 1350
    new_obj_list = []
    for element in obj:
        if not element.disapear:
            new_obj_list.append(element)
        else:
            if element.centroid[0] > xmin and element.centroid[1] < xmax:
                if element.disapear_count < max_disapear_count:
                    new_obj_list.append(element)
    return new_obj_list

In [14]:
def unseen_obj(obj):
    for o in obj:
        if o.disapear:
            o.centroid = o.centroid_pred

In [15]:
def run_seq(images_l, images_r, debug = 0):
    labels = ['person','bicycle','car']
    id_number = 0
    conffidence = 0.5
    obj = []
    centers = []
    start_points = []
    end_points = []
    areas = []
    classes = []
    
    for fname_l, fname_r, frame in zip(images_l, images_r, range(np.size(images_l))):
        #Read image left
        img_l = cv2.imread(fname_l)
        image_l = [img_l]
        
        #Read image right
        img_r = cv2.imread(fname_r)
        image_r = [img_r]
        
        #Detect objects
        results_l = model(image_l)
        results_r = model(image_r)
        
        #Filter objects, only car, bikes and people
        filter_by_type_l = results_l.pandas().xyxy[0].loc[results_l.pandas().xyxy[0]['name'].isin(labels)].reset_index().copy()
        filter_by_type_r = results_r.pandas().xyxy[0].loc[results_r.pandas().xyxy[0]['name'].isin(labels)].reset_index().copy()
        
        data_filtered_l = filter_by_type_l[filter_by_type_l['confidence'] > conffidence]
        data_filtered_r = filter_by_type_r[filter_by_type_r['confidence'] > conffidence]
        
        start_points_l = [(x,y) for x,y in zip(data_filtered_l.xmin.astype(int), data_filtered_l.ymin.astype(int))]
        start_points_r = [(x,y) for x,y in zip(data_filtered_r.xmin.astype(int), data_filtered_r.ymin.astype(int))]
        
        end_points_l = [(x,y) for x,y in zip(data_filtered_l.xmax.astype(int), data_filtered_l.ymax.astype(int))]
        end_points_r = [(x,y) for x,y in zip(data_filtered_r.xmax.astype(int), data_filtered_r.ymax.astype(int))]
        
        data_filtered_l['center'] = [get_rectangle_center(start, end) for start,end in zip(start_points_l, end_points_l)]
        data_filtered_r['center'] = [get_rectangle_center(start, end) for start,end in zip(start_points_r, end_points_r)]
        
        data_filtered_l['area'] = [get_area_rectangle(start, end) for start,end in zip(start_points_l, end_points_l)]
        
        #Get 3D points
        points_3D = stereo_vision(data_filtered_l.center.values, data_filtered_r.center.values)
        data_filtered_l['center3D'] = [(x, y, z) for x, y, z in points_3D]
        data_filtered_l = data_filtered_l.drop(data_filtered_l.loc[data_filtered_l.center3D == (-1, -1, -1)].index)
        
        #Update ID
        data_filtered_l = update_id(obj, data_filtered_l)
        obj = check_disapear(obj)
        #Add new obj
        id_number = add_new_obj(obj, data_filtered_l, id_number)

        #Predict Kalman
        for o in obj:
            o.predict()
            
        #Update unseen objects
        unseen_obj(obj)

        #Draw bownding box
        draw_bb(img_l, obj)
        
        
        #Plot
        cv2.imshow('img', img_l)
        if(not debug):
            cv2.waitKey(50)
        else:
            cv2.waitKey(0)
        

In [16]:
images_l = sorted(glob.glob('final_project_2023_rect/seq_03/image_02/data/*.png'))
images_r = sorted(glob.glob('final_project_2023_rect/seq_03/image_03/data/*.png'))
assert images_l, images_r

run_seq(images_l, images_r)
cv2.destroyAllWindows()