In [1]:
from configparser import ConfigParser
import cv2
import errno
import logging
import os
import math
import matplotlib.pyplot as plt
import pandas as pd
from pprint import pprint
from random import randint
from skimage.measure import compare_ssim
import sys
from statistics import mean
import unittest

In [2]:
## constants ##

SEQUENCE_PATH = "MOT20/train/MOT20-01/"
CONFIG_FILENAME = "seqinfo.ini"
DETECTIONS_DIR = "det"
DETECTIONS_FILENAME = "det.txt"

# this should be read from a config
SEARCH_AREA = 200
# DISTANCE_THRESHOLD = 0.1
SIMILARITY_THRESHOLD = 0.24
MEMORY_WINDOW = 30

SSIM_MEAN = 0.25
SIFT_MEAN = 0.025
SURF_MEAN = 0.07

In [3]:
## logging ##

log = logging.getLogger(__name__)
out_hdlr = logging.StreamHandler(sys.stdout)
out_hdlr.setFormatter(logging.Formatter('%(asctime)s %(message)s'))
out_hdlr.setLevel(logging.DEBUG)
log.addHandler(out_hdlr)
log.setLevel(logging.DEBUG)

In [4]:
class Tools(object):
    
    class Similarity(object):
        
        class Spatial(object):
            
            @staticmethod
            def euclidean(det_a, det_b):
                
                center_a = det_a.centroid
                center_b = det_b.centroid
                
                distance = math.sqrt(
                    (center_b.x-center_a.x)**2 + (center_b.y-center_a.y)**2
                )
                
                return distance                
        
            @staticmethod
            def iou(det_a, det_b):

                # to make this easier we'll create two arrays, [x1, y1, x2, y2], s.t.
                # (x1, y1) is the top left point for a box and (x2, y2) is the bottom right
                a = [det_a.x, det_a.y, det_a.x + det_a.w, det_a.y + det_a.h]
                b = [det_b.x, det_b.y, det_b.x + det_b.w, det_b.y + det_b.h]

                # intersection

                # find the boundary of the intersection between the two boxes
                x1 = max(a[0], b[0]) # rightmost x of the top left points
                y1 = max(a[1], b[1]) # lowest y of the top left points
                x2 = min(a[2], b[2]) # leftmost x of the bottom right points
                y2 = min(a[3], b[3]) # highest y of the bottom right points

                # find the area of the intersection
                width = (x2 - x1)
                height = (y2 - y1)

                # if no overlap don't bother going further, return 0
                if width <= 0 or height <= 0:
                    return 0

                area_of_intersection = width * height

                # area of union

                # this is easy, you don't need to know where the boxes are, since you've
                # already calculated the intersection. if you just add the total area
                # of box_a and the the area of box_b you've counted the intersection
                # twice, so just subtract the intersection once and you have the answer
                a_area = (a[2] - a[0]) * (a[3] - a[1])
                b_area = (b[2] - b[0]) * (b[3] - b[1])

                area_of_union = a_area + b_area - area_of_intersection

                # protect again division by zero
                epsilon = 1e-5

                iou = area_of_intersection / (area_of_union + epsilon)
                return iou

        class Visual(object):
        
            @staticmethod
            def ssim(det_a, det_b):
                
#                 patch_a = det_a.patch
#                 patch_b = det_b.patch
                
                patch_a = det_a.eq_gray
                patch_b = det_b.eq_gray

                if patch_a.size > patch_b.size:
                    h, w = patch_a.shape[0], patch_a.shape[1]
                else:
                    h, w = patch_b.shape[0], patch_b.shape[1]

                patch_a_scaled = cv2.resize(
                    patch_a, (w, h), interpolation=cv2.INTER_AREA)
                patch_b_scaled = cv2.resize(
                    patch_b, (w, h), interpolation=cv2.INTER_AREA)                            
                
                score, diff = compare_ssim(patch_a_scaled, patch_b_scaled, full=True, multichannel=True)

                return score
            
            @staticmethod
            def sift(det_a, det_b):
                
                patch_a = det_a.patch
                patch_b = det_b.patch

#                 gray_a = cv2.cvtColor(patch_a, cv2.COLOR_BGR2GRAY)
#                 gray_b = cv2.cvtColor(patch_b, cv2.COLOR_BGR2GRAY)
                
                sift = cv2.xfeatures2d.SIFT_create()

                keypoints_a, descriptors_a = sift.detectAndCompute(patch_a, None)
                keypoints_b, descriptors_b = sift.detectAndCompute(patch_b, None)

#                 keypoints_a, descriptors_a = sift.detectAndCompute(gray_a, None)        
#                 keypoints_b, descriptors_b = sift.detectAndCompute(gray_b, None)
        
                # FLANN feature matcher
                
                FLANN_INDEX_KDTREE = 0
                index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
                search_params = dict(checks=50)   # or pass empty dictionary

                flann = cv2.FlannBasedMatcher(index_params,search_params)
                
                try:
                    matches = flann.knnMatch(descriptors_a, descriptors_b, k=2)
                except Exception as ex:
                    return 0

                good_matches = []

                # ratio test as per Lowe's paper
                for i,(m,n) in enumerate(matches):
                    if m.distance < 0.7*n.distance:
                        good_matches.append(m)

                epsilon = 1e-5

                try:
                    return len(good_matches) / (len(matches)+epsilon)
                except Exception as ex:
                    print(ex)
                    return 0
    
            @staticmethod
            def surf(det_a, det_b):
                
                patch_a = det_a.patch
                patch_b = det_b.patch

#                 gray_a = cv2.cvtColor(patch_a, cv2.COLOR_BGR2GRAY)
#                 gray_b = cv2.cvtColor(patch_b, cv2.COLOR_BGR2GRAY)
                
                surf = cv2.xfeatures2d.SURF_create()

                keypoints_a, descriptors_a = surf.detectAndCompute(patch_a, None)
                keypoints_b, descriptors_b = surf.detectAndCompute(patch_b, None)

#                 keypoints_a, descriptors_a = sift.detectAndCompute(gray_a, None)        
#                 keypoints_b, descriptors_b = sift.detectAndCompute(gray_b, None)
        
                # FLANN feature matcher
                
                FLANN_INDEX_KDTREE = 0
                index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
                search_params = dict(checks=50)   # or pass empty dictionary

                flann = cv2.FlannBasedMatcher(index_params,search_params)
                
                try:
                    matches = flann.knnMatch(descriptors_a, descriptors_b, k=2)
                except Exception as ex:
                    return 0

                good_matches = []

                # ratio test as per Lowe's paper
                for i,(m,n) in enumerate(matches):
                    if m.distance < 0.7*n.distance:
                        good_matches.append(m)

                epsilon = 1e-5

                try:
                    return len(good_matches) / (len(matches)+epsilon)
                except Exception as ex:
                    print(ex)
                    return 0

In [5]:
class Sequence(object):
    
    ## Point ##
    
    class Point(object):
        def __init__(self, x, y):
            self.x = x
            self.y = y
    
    ## ImageInfo ##
    
    class ImageInfo(object):
        def __init__(self, img_dir, filename, height, width):
            self.img_dir = img_dir
            self.filename = filename
            self.path = os.path.join(img_dir, filename)
            self.frame_id = int('.'.join(filename.split('.')[:-1]))
            self.height = height
            self.width = width

        def __str__(self):
            return "Frame: %s\tHeight: %s\t Width: %s" % (
                self.frame_id, self.height, self.width
            )

        def __repr__(self):
            return "ImageInfo(%s, %s, %s, %s)" % (
                self.img_dir, self.filename, self.height, self.width
            )
    
    ## Detection ##
    
    class Detection(object):
        def __init__(self, index, frame_id, track, x, y, w, h, confidence, patch=None, eq_gray=None):
            self.index = index
            self.frame = frame_id
            self.trajectory = track
            self.x = x
            self.y = y
            self.w = w
            self.h = h
            self.confidence = confidence
            self.patch = patch
            self.eq_gray = eq_gray

        @property
        def centroid(self):
            return Sequence.Point(self.x+0.5*self.w, self.y+0.5*self.h)

        def __str__(self):
            return "Index: %s, Frame: %s, Track: %s, x: %s, y: %s, w: %s, h: %s, conf: %s" % (
                self.index, self.frame, self.trajectory, self.x, self.y, 
                self.w, self.h, self.confidence
            )

        def __repr__(self):
            return "Detection(%s, %s,%s,%s,%s,%s,%s,%s)" % (
                self.index, self.frame, self.trajectory, self.x, self.y, 
                self.w, self.h, self.confidence
            )
    
    ## Frame ##
    
    class Frame(object):
        def __init__(self, index, image_info, image=None, detections=None):
            self.index = index
            self.image_info = image_info
            self.image = image
            self.detections = detections
        
        def __str__(self):
            if self.detections:
                return "%s, detection count: %s" % (self.image_info, len(self.detections))
            else:
                return "%s" % self.image_info

        def __repr__(self):
            return "Frame(%s)" % self.image_info
    
    ## Sequence ##
    
    def __init__(self, sequence_path, config_filename, detections_dir=None, detections_filename=None):
        self.root_path = sequence_path
        self.config_filename = config_filename
        self.config_path = os.path.join(sequence_path, config_filename)
        self.detecitons_path = None
        self.name = None
        self.length = None
        self.frame_rate = None
        self.height = None
        self.width = None
        self.image_path = None
        self._image_infos = None
        self._pos = None
        self._load_conf()
        self._load_image_infos()
        self._detections = None
        self._tracks = None
        
        # attributes of questionable value
        self._mean_width = None
        self._mean_height = None
        
        if detections_dir and detections_filename:
            self.detections_path = os.path.join(
                sequence_path, detections_dir, detections_filename
            )
            self._load_detections()
        else:
            log.info("No detections specified, loading only images.")

    def _load_conf(self):
        
        log.info("Loading sequence configuration.")
        
        if not os.path.isfile(self.config_path):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), self.config_path)
        
        config = ConfigParser()
        config.read(self.config_path)
        sequence_config = dict(config['Sequence'])
        
        self.name = sequence_config['name']
        self.length = int(sequence_config['seqlength'])
        self.frame_rate = sequence_config['framerate']
        self.height = sequence_config['imheight']
        self.width = sequence_config['imwidth']
        self.image_path = os.path.join(self.root_path, sequence_config['imdir'])
        self.image_ext = sequence_config['imext'].replace('.','')
        
    def _load_image_infos(self):
        
        log.info("Loading image info.")
        
        dir_contents = os.listdir(self.image_path)
        file_type = self.image_ext
        image_infos = [Sequence.ImageInfo(self.image_path, x, self.height, self.width) 
                       for x in dir_contents if x.split('.')[-1] == file_type]
        
        image_infos.sort(key=lambda x: x.frame_id)
        
        if len(image_infos) != self.length:
            msg = ("The sequence length: %s, and image count: %s, do not match." % (
                len(image_infos),
                self.length
            ))
            log.debug(msg)
        
        self._image_infos = image_infos

    def _load_detections(self):
        
        log.info("Loading detections.")
        
        if not os.path.isfile(self.detections_path):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), self.detections_path)

        header_list = ['frame','trajectory','x','y','w','h','confidence', 'gt', 'class', 'vis']
        index_col = []# ['frame','trajectory']
        usecols = ['frame','trajectory','x','y','w','h','confidence']
        dtype = {'frame':int,'trajectory':int,'x':float,'y':float,'w':float,'h':float,'confidence':float}
        
        df = pd.read_csv(
            self.detections_path,
            names = header_list,
            index_col = index_col,
            usecols = usecols,
            dtype = dtype
        )

        # detections have confidence either 0 or 1, i'm not going to attempt
        # to track low confidence detections at this time
        # df = df[df["confidence"] == 1]
        
        self._mean_width = df["w"].mean()
        self._mean_height = df["h"].mean()
        
        start = df['frame'].min()
        end = df['frame'].max()
        
        ## note ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##
        #                                                                #
        #  i'm really not sure the best way to handle detections, for    #
        #  now i'm just pulling rows out of the dataframe and creating   #
        #  a dictionary of lists of detection objects                    #
        #                                                                #
        #  it might make more sense to keep them in a dataframe, perhaps #
        #  to more efficiently query or apply offsets later              #
        #                                                                #
        ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##
        
        self._detections = dict()
        
        for i in range(start, end+1):
            frame = df[df['frame']==i]
            detections = []
            
            for i, detection in frame.iterrows():                
                frame_no, traj_no, x, y, w, h, conf = detection
                
                detections.append(
                    Sequence.Detection(
                        index=i,
                        frame_id=int(frame_no), 
                        track=int(traj_no), 
                        x=int(round(x)),
                        y=int(round(y)),
                        w=int(round(w)),
                        h=int(round(h)), 
                        confidence=conf
                    )
                )

            self._detections[int(frame_no)] = detections
            
    def __iter__(self):
        return self
    
    def __next__(self):
        return self.next()
    
    def next(self):
        while True:
            if self._pos is None:
                self._pos = 0
            elif self._pos < len(self._image_infos):
                current, self._pos = self._pos, self._pos + 1
                current_info = self._image_infos[current]
                frame_id = current_info.frame_id

                image = None
                
                try:
                    image = cv2.imread(current_info.path)
                except Exception as ex:
                    log.debug(ex)
                                    
                current_frame_detections = None
                
                if self._detections:
                    
                    if frame_id in self._detections.keys():
                        current_frame_detections = self._detections[frame_id]
                        if image is not None:
                            for i, detection in enumerate(current_frame_detections):
                                x1 = detection.x
                                y1 = detection.y
                                x2 = x1 + detection.w
                                y2 = y1 + detection.h
                                
                                patch = image[y1:y2, x1:x2]
                                current_frame_detections[i].patch = patch
                                                                
                                gray = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
                                eq_gray = cv2.equalizeHist(gray)
                
                                current_frame_detections[i].eq_gray = eq_gray
                
                current_frame = Sequence.Frame(
                    frame_id, current_info, image, current_frame_detections
                )
                
                return current_frame
            else:
                raise StopIteration()

    def __str__(self):
        return("Sequence: %s\nPath: %s\nImage Path: %s\nResolution: %sx%s\nFrame Rate: %s\nLength: %s\nType: %s"
          % (
              self.name,
              self.root_path,              
              self.image_path,
              self.width,
              self.height,
              self.frame_rate,
              self.length,
              self.image_ext
          ))
    
    def __repr__(self):
        return("Sequence(%s, %s)" % (
            self.root_path,
            self.config_filename
        ))

In [6]:
def track(sequence):
    
    spatial_measure = Tools.Similarity.Spatial.euclidean
    # spatial_measure = Tools.Similarity.Spatial.iou
    
    # visual_measure = Tools.Similarity.Visual.ssim
    visual_measure = Tools.Similarity.Visual.sift

    track_counter = 1
    previous_frame = None    
    
    # this should be read from a config
    search_area = SEARCH_AREA
    similarity_threshold = SIMILARITY_THRESHOLD
    # distance_threshold = DISTANCE_THRESHOLD
    memory_window = MEMORY_WINDOW
    
    ssim_mean = SSIM_MEAN
    sift_mean = SIFT_MEAN
    surf_mean = SURF_MEAN
    
    log.info("Calculating trajectories.")
    
#     ssim_scores = [0.4]*5
#     sift_scores = [0.15]*5
#     surf_scores = [0.15]*5
    
    for current_frame in sequence:
        
        if previous_frame is None:    
        
            sequence._tracks = dict()
        
            # initialize trajectories to all detections in the first frame
            for detection in current_frame.detections:
                detection.trajectory = track_counter
                sequence._tracks[track_counter] = [detection]
                track_counter += 1

            # no memory here
            previous_frame = current_frame
            
        else:
            
            completed_set = []
            
            sim_ranking = []
            this_pair = None
            
            tracks = sequence._tracks
            track_ids = list(tracks.keys())
            tails = [tracks[k][-1] for k in track_ids]
            active_tails = [tail for tail in tails if (current_frame.index - tail.frame) <= memory_window]
            
            for det_a in active_tails: # previous_frame.detections:
                for det_b in current_frame.detections:                    
                    this_pair = set([det_a.index, det_b.index])
                    
                    if this_pair not in completed_set:                    
                        dist = spatial_measure(det_a, det_b)                        
                        completed_set.append(this_pair)
                        
                        if dist <= search_area:
                            # score = visual_measure(det_a, det_b)
                            
                            ssim_score = Tools.Similarity.Visual.ssim(det_a, det_b)
                            sift_score = Tools.Similarity.Visual.sift(det_a, det_b)
                            surf_score = Tools.Similarity.Visual.surf(det_a, det_b)
                            
#                             ssim_scores.append(ssim_score)
#                             sift_scores.append(sift_score)
#                             surf_scores.append(surf_score)
                            
#                             avg_ssim = mean(ssim_scores)
#                             avg_sift = mean(sift_scores)
#                             avg_surf = mean(surf_scores)
                            
#                             adjusted_ssim = ssim_score/avg_ssim
#                             adjusted_sift = sift_score/avg_sift
#                             adjusted_surf = surf_score/avg_surf
                            
#                             adjusted_ssim = ssim_score/ssim_mean
#                             adjusted_sift = sift_score/sift_mean
#                             adjusted_surf = surf_score/surf_mean
                            
#                             top_2 = sorted([adjusted_ssim, adjusted_sift, adjusted_surf], reverse=True)[:2]
                            
#                             # score = (adjusted_ssim+adjusted_sift+adjusted_surf)/3
                            
#                             score = sum(top_2)/2
                            
                            distance_score = (search_area-dist)/search_area
                            score = (ssim_score*0.7+sift_score*3+surf_score*2.25+distance_score*0.35)/4
    
                            # print("%0.3f, %0.3f, %0.3f, %0.3f" % (adjusted_ssim, adjusted_sift, adjusted_surf, score))
                            # print("%0.3f, %0.3f, %0.3f" % (sift_score, surf_score, score))
                            
                            if score >= similarity_threshold:                                
                                sim_ranking.append((score, det_a, det_b))

#             avg_ssim = mean(ssim_scores)
#             avg_sift = mean(sift_scores)
#             avg_surf = mean(surf_scores)
            
#             print(avg_ssim, avg_sift, avg_surf)
            
#             ssim_scores = [avg_ssim]*5
#             sift_scores = [avg_sift]*5
#             surf_scores = [avg_surf]*5
                                
            sim_ranking.sort(key=lambda x: x[0], reverse=True)

            matched_detections = set([])

            for score, det_a, det_b in sim_ranking:
                if det_a.index not in matched_detections and det_b.index not in matched_detections:
                    track_id = det_a.trajectory                    
                    
                    det_b.trajectory = track_id
                    sequence._tracks[track_id].append(det_b)
                    
                    matched_detections.add(det_a.index)
                    matched_detections.add(det_b.index)
                    
            unmatched = set(
                [det for det in current_frame.detections if det.index not in matched_detections]
            )
        
            for det in unmatched:
                det.trajectory = track_counter
                sequence._tracks[track_counter] = [det]
                track_counter += 1

        print("%s:%s" % (current_frame.index, track_counter), end=" ")
        
    log.info("Finished calculating trajectories.")

In [7]:
def output(sequence):

    # get some nice colors
    
    colors = []
    offset = 50
    for i in range(100):
        r = randint(0,255)
        g = randint(0,255)
        b = randint(0,255)

        if r < offset or g < offset or b < offset:
            colors.append(((r,g,b),(r+offset, g+offset, b+offset)))
        else:
            colors.append(((r,g,b),(r-offset, g-offset, b-offset)))

    # get the frames
            
    frames = list(sequence._detections.keys())
    frames.sort()

    video_frames = []

    # set output paths
    
    data_output_path = 'MOT20/train/%s.txt' % "MOT20-01"
    data_out=open(data_output_path,"w")

    # process frames, create images and output csv
    
    for index in frames:
        detections = sequence._detections[index]
        img = cv2.imread(os.path.join("MOT20/train/MOT20-01/img1/%06d.jpg"%int(index)))
        for detection in detections:
            if detection.trajectory < 1:
                continue

            color_index = detection.trajectory % 100
            box_color, text_color = colors[color_index]
            top_left = (detection.x, detection.y)
            bottom_right = (detection.x+detection.w, detection.y+detection.h)
            thickness = 2

            cv2.rectangle(img, top_left, bottom_right, box_color, thickness)

            font = cv2.FONT_HERSHEY_SIMPLEX
            anchor_point = (detection.x, detection.y+20)
            scale = 0.75
            thickness = 2
            line_type = cv2.LINE_AA

            cv2.putText(img, str(detection.trajectory), anchor_point, font, scale, text_color, thickness, line_type)

            data_out.write("%s,%s,%s,%s,%s,%s,%s,%s,%s\n"%(
                index,
                int(detection.trajectory),
                int(detection.x),
                int(detection.y),
                int(detection.w),
                int(detection.h),
                1,1,
                detection.confidence
            ))

        video_frames.append(img)

    # write video out
        
    video_output_path = 'output/%s.mp4' % "MOT20-01"
    fourcc = cv2.VideoWriter_fourcc(*'AVC1') # Be sure to use the lower case
    fps = 25.0
    width = 1920
    height = 1080
    video_writer = cv2.VideoWriter(video_output_path, fourcc, fps, (width, height))

    for video_frame in video_frames:
        video_writer.write(video_frame)

    video_writer.release()
    data_out.close()

In [8]:
def main(argv=None):
    # arg parse blablabla    
    # if args contain config path then set it otherwise use default
    sequence_path = SEQUENCE_PATH
    config_filename = CONFIG_FILENAME
    detections_dir = DETECTIONS_DIR
    detections_filename = DETECTIONS_FILENAME
    
    sequence = Sequence(sequence_path, config_filename, detections_dir, detections_filename)
    track(sequence)
    
    output(sequence)
    
    return sequence

# let's hold on to thi in case we need it for something
sequence = main()

2020-05-05 19:45:38,566 Loading sequence configuration.
2020-05-05 19:45:38,570 Loading image info.
2020-05-05 19:45:38,575 Loading detections.
2020-05-05 19:45:39,658 Calculating trajectories.
1:31 



2:31 3:31 4:31 5:31 6:31 7:31 8:32 9:32 10:32 11:32 12:33 13:33 14:33 15:33 16:35 17:35 18:36 19:36 20:36 21:36 22:36 23:36 24:36 25:36 26:36 27:36 28:36 29:37 30:37 31:37 32:38 33:38 34:38 35:38 36:38 37:38 38:38 39:39 40:40 41:40 42:40 43:40 44:40 45:40 46:40 47:42 48:43 49:43 50:43 51:43 52:43 53:43 54:44 55:44 56:44 57:44 58:44 59:44 60:45 61:45 62:46 63:46 64:46 65:46 66:47 67:47 68:47 69:47 70:47 71:47 72:47 73:47 74:47 75:47 76:47 77:48 78:48 79:51 80:52 81:53 82:55 83:55 84:56 85:56 86:57 87:58 88:59 89:59 90:60 91:62 92:62 93:62 94:62 95:62 96:62 97:62 98:62 99:62 100:62 101:62 102:62 103:62 104:62 105:62 106:62 107:63 108:63 109:63 110:63 111:63 112:64 113:65 114:65 115:65 116:65 117:65 118:69 119:69 120:69 121:70 122:70 123:70 124:71 125:71 126:71 127:71 128:71 129:71 130:71 131:71 132:72 133:73 134:73 135:74 136:74 137:74 138:74 139:74 140:74 141:74 142:74 143:74 144:75 145:75 146:75 147:75 148:75 149:77 150:77 151:77 152:78 153:78 154:79 155:79 156:80 157:80 158:80 159:80 

In [None]:
class Tests(unittest.TestCase):
    
    def setUp(self):
        # load testing config
        pass
    
    def teadDown(self):
        # load regular config?
        pass
    
    def test_something_or_other(self):
        # let's just make sure we hit all execution paths
        pass

unittest.main(argv=[''], verbosity=3, exit=False)