In [1]:
from configparser import ConfigParser
# import cv2
import math
import numpy as np
import pandas as pd
from pprint import pprint

In [2]:
## constants
############

CONFIG_PATH = './conf/config.ini'

In [3]:
## config
#########

config = ConfigParser()

config.read(CONFIG_PATH)
sequence_config = dict(config['Sequence'])
tracking_config = dict(config['Tracking'])

sequence_config['seq_length'] = config.getint("Sequence","seq_length")
tracking_config['scaled_height'] = config.getint("Tracking","scaled_height")
tracking_config['scaled_width'] = config.getint("Tracking","scaled_width")
tracking_config['similarity_threshold'] = config.getfloat("Tracking","similarity_threshold")
tracking_config['confidence_threshold'] = config.getfloat("Tracking","confidence_threshold")

In [4]:
## input
########

# pos       name                    description
# 1      frame number           frame in which the object is present
# 2      identity number        trajectory id (-1 default for no track)
# 3      bounding box x         x value from top left of bounding box
# 4      bounding box y         y value from top left of bounding box
# 5      bounding box width     width of bounding box in pixels
# 6      bounding box height    height of bounding box in pixels
# 7      confidence score      class detection confidence

In [5]:
## output
#########

# pos       name                    description
# 1      frame number           frame in which the object is present
# 2      identity number        trajectory id (-1 default for no track)
# 3      bounding box x         x value from top left of bounding box
# 4      bounding box y         y value from top left of bounding box
# 5      bounding box width     width of bounding box in pixels
# 6      bounding box height    height of bounding box in pixels
# 7      confidence score*      class detection confidence (gt: 1 or 0)
# 8      class*                 type of class (1 for pedestrian)
# 9      visibility*            percent visible (percent occluded = 1-visibility)

#        *no need to output these values, will be ignore by evaluator

In [6]:
class Point(object):
    def __init__(self, x, y):
        self.x = x
        self.y = y

In [7]:
class Distance(object):
    
    @staticmethod
    def l2_norm(obj1, obj2):
        p1, p2 = None, None
        
        if type(obj1) == Box:
            p1 = obj1.centroid
        elif type(obj1) == Point:
            p1 = obj1
        
        if type(obj1) == Box:
            p2 = obj2.centroid
        elif type(obj1) == Point:
            p2 = obj2
        
        return sqrt((p2.x-p1.x)**2 + (p2.y-p1.y)**2)
    
    @staticmethod
    def jaccard(obj1, obj2):
        
        if type(obj1) == Box and type(obj2) == Box:
            box1, box2 = obj1, obj2
            
            w_intersection = min(box1.x + box1.w, box2.x + box2.w) - max(box1.x, box2.x)
            h_intersection = min(box1.y + box1.h, box2.y + box2.y) - max(box1.y, box2.y)
            if w_intersection <= 0 or h_intersection <= 0: # No overlap
                return 0
            I = w_intersection * h_intersection
            U = box1.w * box1.h + box2.w * box2.h - I # Union = Total Area - I
            return I / U
    
        elif type(obj1) == numpy.ndarray and type(obj2) == numpy.ndarray:
            descriptors1, descriptors2 = obj1, obj2
            
            # brute force feature matching using manhattan distance
        
            bf = cv2.BFMatcher(cv2.NORM_L1, crossCheck=True)

            matches = bf.match(descriptors_1,descriptors_2)
            matches = sorted(matches, key = lambda x:x.distance)

            # TODO do we want to cutoff at a threshold?

            return len(matches) / (len(descriptors1)+len(descriptors2))
    
    @staticmethod
    def sift(box1, box2, img_path, frame_a_id, frame_b_id, new_w=tracking_config['scaled_width'], new_h=tracking_config['scaled_height']):
        
#         # get filepaths
        
#         img1_path = os.path.join(img_path, "%06d.jpg" % frame_a_id)
#         img2_path = os.path.join(img_path, "%06d.jpg" % frame_b_id)
        
#         # read image files as grayscale
        
#         img1 = cv2.imread(img1_path, 0)
#         img2 = cv2.imread(img2_path, 0)
        
#         # gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
#         # gray2 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
        
#         # cut out the firt object from the first image
        
#         top_left, bottom_right = box1.coords        
#         start_x, start_y = top_left
#         end_x, end_y = bottom_right        
#         crop1 = img1[start_y:end_y, start_x:end_x]
        
#         # cut out the second object from the second image
        
#         top_left, bottom_right = box2.coords        
#         start_x, start_y = top_left
#         end_x, end_y = bottom_right        
#         crop2 = img2[start_y:end_y, start_x:end_x]
        
# #         # calculate the amount to scale in both directions
        
# #         img_1_x_scale = new_w/box1.w
# #         img_1_y_scale = new_h/box1.h
        
# #         img_1_x_scale = new_w/box2.w
# #         img_1_y_scale = new_h/box2.h
        
# #         # resize both cropped images
        
# #         scaled_img1 = cv2.resize(img1, None, fx=img_1_x_scale, fy=img_1_y_scale, interpolation = cv2.INTER_CUBIC)
# #         scaled_img2 = cv2.resize(img2, None, fx=img_2_x_scale, fy=img_2_y_scale, interpolation = cv2.INTER_CUBIC)

#         # TODO do I even need to scale? SIFT is Scale Invariant

#         scaled_img1 = cv2.resize(img1, (new_w, new_h), interpolation = cv2.INTER_CUBIC)
#         scaled_img2 = cv2.resize(img2, (new_w, new_h), interpolation = cv2.INTER_CUBIC)
        
#         cv2.imshow("img1", scaled_img1)
#         cv2.imshow("img2", scaled_img2)
        
#         cv2.waitKey(0)
#         cv2.destroyAllWindows()
        
#         # find keypoints and descriptors
        
#         # https://docs.opencv.org/4.3.0/da/df5/tutorial_py_sift_intro.html
        
#         sift = cv2.xfeatures2d.SIFT_create()
        
#         keypoints_1, descriptors_1 = sift.detectAndCompute(scaled_img1,None)
#         keypoints_2, descriptors_2 = sift.detectAndCompute(scaled_img2,None)

#         # brute force feature matching using manhattan distance
        
#         bf = cv2.BFMatcher(cv2.NORM_L1, crossCheck=True)

#         matches = bf.match(descriptors_1,descriptors_2)
#         matches = sorted(matches, key = lambda x:x.distance)
        
#         # TODO do we want to cutoff at a threshold?
        
#         pprint(matches)
#         pprint(keypoints1)
#         pprint(keypoints2)
        
#         return len(matches) / (len(descriptors_1)+len(descriptors_2))
        pass


In [8]:
class Box(object):
    def __init__(self, x, y, w, h, index=None, frame=None, track_id=None, conf=None):
        self.x = x
        self.y = y
        self.w = w
        self.h = h
        self.frame = frame
        self.index = index
        self.track_id = track_id
        self.conf = conf
    
    @property
    def coords(self):
        return(Point(self.x, self.y), Point(self.x+self.w, self.y-self.h))
    
    @property
    def centroid(self):
        return Point(self.x+(self.w*0.5), self.y-(self.h*0.5))
    
    # TODO: move the similarity/distance calculation out of Box, doesn't belong here
    
    def jaccard(self, box):
        w_intersection = min(self.x + self.w, box.x + box.w) - max(self.x, box.x)
        h_intersection = min(self.y +self.h, box.y + box.y) - max(self.y, box.y)
        if w_intersection <= 0 or h_intersection <= 0: # No overlap
            return 0
        I = w_intersection * h_intersection
        U = self.w * self.h + box.w * box.h - I # Union = Total Area - I
        return I / U

    def copy(self, offset_x=0, offset_y=0):
        return Box(
            self.x + offset_x, 
            self.y + offset_y, 
            self.w, self.h, 
            self.index, self.frame,
            self.track_id, self.conf
        )
    
    def __repr__(self):
        return "Box(%s, %s, %s, %s, %s, %s, %s, %s)" % (
            self.x, self.y, self.w, self.h, self.index, self.frame, self.track_id, self.conf
        )
    
    def __str__(self):
        return "index: %s, frame: %s, track: %s, x: %s, y: %s, w: %s, h: %s, conf: %s" % (
            self.index, self.frame, self.track_id, self.x, self.y, self.w, self.h, self.conf
        )

In [9]:
class Track(object):
    counter = 1
    
    def __init__(self, o):
        self.id = Track.counter
        Track.counter += 1
        
        if type(o) == list:
            boxes = o
            self.boxes = boxes
        elif type(o) == Box:
            box = o
            self.boxes = [box]
            
        self.is_active = True
    
    def add(self, box):
        box.track_id = self.id
        self.boxes.append(box)
    
    @staticmethod
    def angle(box1, box2):
        
        p1 = box1.centroid
        p2 = box2.centroid

        rads = math.atan2(p1.y-p2.y, p1.x-p2.x)
        # deg = math.degrees(rads)
        # return rads
        return rads
    
    @staticmethod
    def distance(box1, box2):
            
        p1 = box1.centroid
        p2 = box2.centroid

        distance = math.hypot((p2.x-p1.x),(p2.y-p1.y))
        # distance = sqrt((x2-x1)**2 + (y2-y1)**2)
        
        return distance
    
    def predict(self, frame_id):
        current = self.boxes[-1]
        
        if current.frame != frame_id - 1:
            return None
        
        if len(self.boxes)==1:
            predicted_location = current.copy()
        else:
            previous = self.boxes[-2]
            
            angle = Track.angle(previous, current)
            distance = Track.distance(previous, current)
            
            offset_x = distance*math.sin(angle)
            offset_y = distance*math.cos(angle)
            
            predicted_location = current.copy(offset_x, offset_y)
            
        predicted_location.frame += 1

        return predicted_location

    def __repr__(self):
        return "Track([<__main__.Box>]) count: %s" % len(self.boxes)
    
    def __str__(self):
        return "TrackID: %s, Frames: %s-%s" % (
            self.id, self.boxes[0].frame, self.boxes[-1].frame
        )
    
    def __len__(self):
        return len(self._boxes)

In [10]:
class Frame(object):
    def __init__(self, number, boxes=None):
        self.id = number
        
        if boxes is None:
            boxes = []
        
        self._boxes = boxes
    
    def add(self, box):
        if type(box) == Box:
            self._boxes.append(box)
        elif type(box) == list:
            self._boxes.extend(boxes)
    
    @property
    def objects(self):
        return self._boxes
    
    def __repr__(self):
        return "Frame(%s, [<__main__.Box>]) count: %s" % (self.id, len(self.boxes))
    
    def __str__(self):
        return "FrameID: %s, Count: %s" % (self.id, len(self.boxes))
    
    def __len__(self):
        return len(self._boxes)

In [11]:
class Detections(object):
    def __init__(self, data_path=tracking_config['detections_path']):
        self.data_path = data_path
        self.mean_width = None
        self.mean_height = None
        self._df = None
        self._frames = {}
        self.start_index = None
        self.end_index = None
        self.count = sequence_config['seq_length']
        self._pos = None
        self._load()
    
    def _load(self, conf_threshold=tracking_config['confidence_threshold']):
        
        header_list = ['frame','trajectory','x','y','w','h','confidence']
        dtype = {'frame':int,'trajectory':int,'x':float,'y':float,'w':float,'h':float,'confidence':float}
        
        df = pd.read_csv(self.data_path, names=header_list, dtype=dtype)
        self._df = df[df["confidence"] >= conf_threshold]
        
        self.mean_width = df["w"].mean()
        self.mean_height = df["h"].mean()

        # get the indices for the first and last frames

        self.start_index = df['frame'].min()
        self.end_index = df['frame'].max()

        for i in range(self.start_index, self.end_index+1):
            self._frames[i] = df.loc[df['frame']==i,:]
    
    def __iter__(self):
        return self
    
    def __next__(self):
        return self.next()
    
    def next(self):
        while True:
            if self._pos is None:
                self._pos = self.start_index        
            elif self._pos < self.end_index:
                current, self._pos = self._pos, self._pos + 1
                current_frame = self._frames[current]

                detection_boxes = []

                for i, detection in current_frame.iterrows():                
                    frame_no, traj_no, x, y, w, h, conf = detection
                    box = Box(x, y, w, h, i, frame_no, -1, conf)
                    detection_boxes.append(box)

                return Frame(current, detection_boxes)
            else:
                raise StopIteration()
        
    def __repr__(self):
        return "Detections(%s, %s, %s, %s, %s, %s)" % (
            self.data_path, self.mean_width, self.mean_height, 
            self.start_index, self.end_index, self.count
        )
    
    def __str__(self):
        return "path: %s, name: %s, mean_width: %s, mean_height: %s, start_index: %s, end_index: %s, count: %s)" % (
            self.data_path, sequence_config['name'], 
            self.mean_width, self.mean_height, 
            self.start_index, self.end_index, self.count
        )
    
    def __len__(self):
        # TODO this is really the length of the frames, that's how
        # I use it in trajectories... should rethink this
        return self.count

In [12]:
class Trajectories(object):
    def __init__(self):
        self._tracks = {}
        self.detections = Detections()
    
    def add(self, o):
        if type(o) == Box:
            box = o
            track = Track(box)
            box.track_id = track.id
            self._tracks[track.id] = track
        if type(o) == Track:
            track = o
            self._tracks[track.id] = track
    
    def _matching(self, predictions, current_frame):
        measure = tracking_config['distance_measure']
        
        if measure == 'eculidean':
            distance_func = Distance.l2_norm
        elif measure == 'iou':
            distance_func = Distance.jaccard
        else:
            distance_func = Distance.sift
            
        sim_vector = []
        computed = []
        
        for box_a in predictions:
            for box_b in current_frame.objects:
                
                # WARNING this is kind of dangerous because index is not 
                # a required parameter for Box. Tracking computed pairs
                # is meant to save time, but it might save too much time
                # if box indices are None.
                if box_a.index is None or box_b.index is None:
                    raise Exception("Boundary index may not be None.")                    
                this_pair = set([box_a.index, box_b.index])
                if this_pair not in computed:
                    # WARNING I think this is a problem because sift
                    # has unique parameters (img, frame, etc)
                    similarity = distance_func(box_a, box_b)
                    if similarity >= tracking_config['similarity_threshold']:
                        sim_vector.append((similarity, box_a, box_b))
                    computed.append(this_pair)

        sim_vector.sort(reverse=True)

        matching_pairs = []
        matched_objects = set([])

        for distance, box_1, box_2 in sim_vector:
            
            # we want to ensure that we only return the top matches, so we keep track
            # of the objects already matched and ignore less similar matches for these objects
            
            if box_1.index not in matched_objects or box_2.index not in matched_objects:                
            
                match = (box_1, box_2)
                matching_pairs.append(match)
                
                matched_objects.add(box_1.index)
                matched_objects.add(box_2.index)
        
        # unmatched detections in current frame need to become new tracks
        
#         for box_b in current_frame.objects:
#             if box_b.index not in matched_objects:
#                 self.add(box_b)

        print(matched_objects)
        
        return matching_pairs
    
    def calculate(self, start=0, end=None):
        if end is None:
            end = len(self.detections)
            
        # for frame in self.detections[start:end]: # Detections not subscriptable
        
        for frame in self.detections:
            if len(self._tracks)==0:
                for box in frame.objects:
                    self.add(box)
                continue

            predictions = []
            for track_id, track in self._tracks.items():
                prediction = track.predict(frame.id)
                if prediction is not None:
                    predictions.append(prediction)

            matching_pairs = self._matching(predictions, frame)
            for box_1, box_2 in matching_pairs:
                track = self._tracks[box_1.track_id]
                track.add(box_2)

    def output(self, path=None):
        if path is None:            
            for track_id, boxes in self._tracks.items():
                print(track_id, boxes)
        else:
            # write
            pass
    
    def __len__(self):
        return len(self._tracks)

In [13]:
trajectories = Trajectories()
trajectories.calculate()
trajectories.output()

{17536, 17537, 17538, 17539, 17540, 17541, 17542, 17543, 17544, 17545, 17547, 17548, 17549, 17550, 17551, 17552, 17553, 17554, 17555, 22710, 22711, 22713, 22714, 22715, 22716, 22717, 22718, 22719, 22720, 22721, 22722, 22723, 22724, 22726, 22727, 22728, 22729, 22730, 22731, 22732, 22733, 22734, 17531, 17533, 17534, 17535}
{18623, 22724, 18601, 18602, 18603, 18604, 18606, 18607, 18608, 18609, 18610, 18611, 18612, 18613, 22710, 18615, 18614, 22713, 18616, 22715, 22716, 18621, 22718, 18622, 18624, 22720, 22722, 22721, 22723, 22717, 22726, 22719, 18625, 22727, 22730, 22731, 22728, 22733, 22734, 22729, 22732, 22711, 18617, 22714, 18618, 18619}
{2468, 2478, 18602, 2463, 2465, 2466, 2467, 18604, 2469, 2470, 2471, 2472, 18601, 2473, 18603, 2476, 2474, 18606, 18607, 2479, 18608, 2482, 18611, 2484, 18613, 2486, 18615, 18614, 2488, 18616, 18612, 18609, 18621, 18622, 18617, 18624, 18625, 2475, 2481, 18619, 2480, 18623, 18610, 2483, 2487, 18618, 2477}
{12808, 12809, 12810, 12812, 12813, 12814, 12815