In [19]:
from configparser import ConfigParser
import cv2
import math
import numpy as np
import pandas as pd
from pprint import pprint

In [23]:
## constants
############

CONFIG_PATH = './conf/config.ini'

In [24]:
## config
#########

config = ConfigParser()

config.read(CONFIG_PATH)
sequence_config = dict(config['Sequence'])
tracking_config = dict(config['Tracking'])

In [None]:
## input
########

# pos       name                    description
# 1      frame number           frame in which the object is present
# 2      identity number        trajectory id (-1 default for no track)
# 3      bounding box x         x value from top left of bounding box
# 4      bounding box y         y value from top left of bounding box
# 5      bounding box width     width of bounding box in pixels
# 6      bounding box height    height of bounding box in pixels
# 7      confidence score      class detection confidence

In [None]:
## output
#########

# pos       name                    description
# 1      frame number           frame in which the object is present
# 2      identity number        trajectory id (-1 default for no track)
# 3      bounding box x         x value from top left of bounding box
# 4      bounding box y         y value from top left of bounding box
# 5      bounding box width     width of bounding box in pixels
# 6      bounding box height    height of bounding box in pixels
# 7      confidence score*      class detection confidence (gt: 1 or 0)
# 8      class*                 type of class (1 for pedestrian)
# 9      visibility*            percent visible (percent occluded = 1-visibility)

#        *no need to output these values, will be ignore by evaluator

In [3]:
class Point(object):
    def __init__(self, x, y):
        self.x = x
        self.y = y

In [None]:
class Keypoints(object):
    def __init__(self):
        pass

In [None]:
class Distance(object):
    
    @staticmathod
    def norm_l2(obj1, obj2):
        p1, p2 = None, None
        
        if type(obj1) == Box:
            p1 = obj1.centroid
        elif type(obj1) == Point:
            p1 = obj1
        
        if type(obj1) == Box:
            p2 = obj2.centroid
        elif type(obj1) == Point:
            p2 = obj2
        
        return sqrt((p2.x-p1.x)**2 + (p2.y-p1.y)**2)
    
    @staticmethod
    def jaccard(obj1, obj2):
        
        if type(obj1) == Box and type(obj2) == Box:
            box1, box2 = obj1, obj2
            
            w_intersection = min(box1.x + box1.w, box2.x + box2.w) - max(box1.x, box2.x)
            h_intersection = min(box1.y + box1.h, box2.y + box2.y) - max(box1.y, box2.y)
            if w_intersection <= 0 or h_intersection <= 0: # No overlap
                return 0
            I = w_intersection * h_intersection
            U = self.w * self.h + box.w * box.h - I # Union = Total Area - I
            return I / U
    
        elif type(obj1) == numpy.ndarray and type(obj2) == numpy.ndarray:
            descriptors1, descriptors2 = obj1, obj2
            
            # brute force feature matching using manhattan distance
        
            bf = cv2.BFMatcher(cv2.NORM_L1, crossCheck=True)

            matches = bf.match(descriptors_1,descriptors_2)
            matches = sorted(matches, key = lambda x:x.distance)

            # TODO do we want to cutoff at a threshold?

            return len(matches) / (len(descriptors1)+len(descriptors2))
    
    @staticmethod
    def sift(box1, box2, img_path, frame_a_id, frame_b_id, new_w=tracking_config['scaled_width'], new_h=tracking_config['scaled_height']):
        
        # get filepaths
        
        img1_path = os.path.join(img_path, "%06d.jpg" % frame_a_id)
        img2_path = os.path.join(img_path, "%06d.jpg" % frame_b_id)
        
        # read image files as grayscale
        
        img1 = cv2.imread(img1_path, 0)
        img2 = cv2.imread(img2_path, 0)
        
        # gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
        # gray2 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
        
        # cut out the firt object from the first image
        
        top_left, bottom_right = box1.coords        
        start_x, start_y = top_left
        end_x, end_y = bottom_right        
        crop1 = img1[start_y:end_y, start_x:end_x]
        
        # cut out the second object from the second image
        
        top_left, bottom_right = box2.coords        
        start_x, start_y = top_left
        end_x, end_y = bottom_right        
        crop2 = img2[start_y:end_y, start_x:end_x]
        
#         # calculate the amount to scale in both directions
        
#         img_1_x_scale = new_w/box1.w
#         img_1_y_scale = new_h/box1.h
        
#         img_1_x_scale = new_w/box2.w
#         img_1_y_scale = new_h/box2.h
        
#         # resize both cropped images
        
#         scaled_img1 = cv2.resize(img1, None, fx=img_1_x_scale, fy=img_1_y_scale, interpolation = cv2.INTER_CUBIC)
#         scaled_img2 = cv2.resize(img2, None, fx=img_2_x_scale, fy=img_2_y_scale, interpolation = cv2.INTER_CUBIC)

        # TODO do I even need to scale? SIFT is Scale Invariant

        scaled_img1 = cv2.resize(img1, (new_w, new_h), interpolation = cv2.INTER_CUBIC)
        scaled_img2 = cv2.resize(img2, (new_w, new_h), interpolation = cv2.INTER_CUBIC)
        
        cv2.imshow("img1", scaled_img1)
        cv2.imshow("img2", scaled_img2)
        
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        
        # find keypoints and descriptors
        
        # https://docs.opencv.org/4.3.0/da/df5/tutorial_py_sift_intro.html
        
        sift = cv2.xfeatures2d.SIFT_create()
        
        keypoints_1, descriptors_1 = sift.detectAndCompute(scaled_img1,None)
        keypoints_2, descriptors_2 = sift.detectAndCompute(scaled_img2,None)

        # brute force feature matching using manhattan distance
        
        bf = cv2.BFMatcher(cv2.NORM_L1, crossCheck=True)

        matches = bf.match(descriptors_1,descriptors_2)
        matches = sorted(matches, key = lambda x:x.distance)
        
        # TODO do we want to cutoff at a threshold?
        
        pprint(matches)
        pprint(keypoints1)
        pprint(keypoints2)
        
        return len(matches) / (len(descriptors_1)+len(descriptors_2))


In [4]:
class Box(object):
    def __init__(self, x, y, w, h, frame=None, index=None):
        self.x = x
        self.y = y
        self.w = w
        self.h = h
        self.frame = frame
        self.index = index
    
    @property
    def coords(self):
        return(Point(self.x, self.y), Point(self.x+self.w, self.y-self.h))
    
    @property
    def centroid(self):
        return Point(x+(w*0.5), y-(h*0.5))
    
    # TODO: move the similarity/distance calculation out of Box, doesn't belong here
    
    def jaccard(self, box):
        w_intersection = min(self.x + self.w, box.x + box.w) - max(self.x, box.x)
        h_intersection = min(self.y +self.h, box.y + box.y) - max(self.y, box.y)
        if w_intersection <= 0 or h_intersection <= 0: # No overlap
            return 0
        I = w_intersection * h_intersection
        U = self.w * self.h + box.w * box.h - I # Union = Total Area - I
        return I / U

    def copy(self, offset_x=0, offset_y=0):
        return Box(
            self.x + offset_x, 
            self.y + offset_y, 
            self.w, self.h, 
            self.frame, self.index)

In [6]:
class Track(object):
    counter = 1
    
    def __init__(self, boxes):
        self.id = Track.counter
        Track.counter += 1
        
        if type(boxes) == list:
            self.boxes = boxes
        elif type(boxes) == Bpx:
            self.boxes = [box]
            
        self.is_active = True
    
    def add(self, box):
        self.boxes.append(box)
    
    @staticmethod
    def angle(box1, box2):
        if len(self.boxes==1):
            return None
        previous, current = self.boxes[:-2]
        
        p1 = previous.centroid
        p2 = previous.centroid

        rads = math.atan2(p1.y-p2.y, p1.x-p2.x)
        # deg = math.degrees(rads)
        # return rads
        return rads
    
    @staticmethod
    def distance(box1, box2):
        if len(self.boxes==1):
            return None
        previous, current = self.boxes[:-2]
        
        p1 = previous.centroid
        p2 = current.centroid
    
        distance = math.hypot((p2.x-p1.x),(p2.y-p1.y))
        # distance = sqrt((x2-x1)**2 + (y2-y1)**2)
        
        return distance
    
    def predict(self, frame):
        current = self.boxes[-1:]
        
        if current.frame + 1 != frame:
            return None
        
        if len(self.boxes==1):
            predicted_location = current.copy()
        else:
            previous = self.boxes[-2:-1]
            
            angle = Track.angle(previous, current)
            distance = Track.distance(previous, current)
            
            offset_x = distance*math.sin(angle)
            offset_y = distance*math.cos(angle)
            
            predicted_location = current.copy(offset_x, offset_y)
            
        predicted_location.frame += 1

        return predicted_location

In [8]:
class Frame(object):
    def __init__(self, number, boxes=None):
        self.id = number
        
        if boxes is None:
            boxes = []
        
        self._boxes = boxes
    
    def add(self, box):
        if type(box) == Box:
            self._boxes.append(box)
        elif type(box) == list:
            self._boxes.extend(boxes)
    
    @property
    def objects(object):
        return self._boxes

In [None]:
class Detections(object):
    def __init__(self, data_path=tracking_config['detections_path']):
        self.data_path = data_path
        self._df = None
        self._frames = {}
        self.mean_width = None
        self.mean_height = None
    
    def load(self, conf_threshold=tracking_config['confidence_threshold']):
        
        header_list = ['frame','trajectory','x','y','w','h','confidence']
        dtype = {'frame':int,'trajectory':int,'x':float,'y':float,'w':float,'h':float,'confidence':float}
        
        df = pd.read_csv(data_path, names=header_list, dtype=dtype)
        self._df = df[df["confidence"] >= conf_threshold
        
        self.mean_width = df["w"].mean()
        self.mean_height = df["h"].mean()

        # get the indices for the first and last frames

        start = df['frame'].min()
        end = df['frame'].max()

        for i in range(start, end):
            self._frames[i] = df.loc[df['frame']==i,:]

In [9]:
class Trajectories(object):
    def __init__(self, df):
        self.df = df
        self._tracks = []
    
    def add(self, o):
        if type(o) == Box:
            t = Track(o)
            self._tracks[t.id] = t
        if type(o) == Track:
            self._tracks[o.id] = o
    
    def calculate(self, start=0, end=None):
        if len(self._tracks==0):
            pass

In [None]:
class Tracking(object):
    def __init__(self):
        self.detections = Detections()
        self.trajectories = Trajectories()
        self.detections.load()