In [2]:
import cv2
import numpy as np
import os
from scipy import ndimage

def compute_likelihood_map(frame, object_hypothesis, object_model):
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    object_hist = object_model['histogram']
    likelihood_map = cv2.calcBackProject([hsv], [0, 1], object_hist, [0, 180, 0, 256], 1)
    likelihood_map = cv2.normalize(likelihood_map, None, 0, 1, cv2.NORM_MINMAX)
    return likelihood_map

def compute_cumulative_histograms(likelihood_map, object_region, surrounding_region):
    h_o = np.histogram(likelihood_map[tuple(object_region)], bins=256, range=(0, 1))[0]
    h_s = np.histogram(likelihood_map[tuple(surrounding_region)], bins=256, range=(0, 1))[0]
    c_o = np.cumsum(h_o) / np.sum(h_o)
    c_s = np.cumsum(h_s) / np.sum(h_s)
    return c_o, c_s

def adaptive_threshold(c_o, c_s):
    
    valid_thresholds = np.where(c_o + c_s >= 1)[0]
    costs = 2 * c_o[valid_thresholds] - np.append(c_o[valid_thresholds[1:]], [1]) + c_s[valid_thresholds]
    tau_star = valid_thresholds[np.argmin(costs)] / 255.0
    return tau_star

def estimate_scale(likelihood_map, object_hypothesis, tau):
    segmentation = likelihood_map > tau
    safe_region = np.zeros_like(segmentation)
    top, left, bottom, right = object_hypothesis
    safe_region[top:bottom, left:right] = True
    safe_region[top+1:bottom-1, left+1:right-1] = False  # Inner 80% (approximated)
    
    labeled, num_features = ndimage.label(segmentation)
    object_mask = np.zeros_like(segmentation)
    
    for label in range(1, num_features + 1):
        component = labeled == label
        if np.any(component & safe_region):
            avg_likelihood = np.mean(likelihood_map[component])
            if avg_likelihood > tau:
                object_mask |= component
    
    if np.sum(object_mask) == 0:
        return object_hypothesis
    
    rows, cols = np.where(object_mask)
    top, bottom = np.min(rows), np.max(rows)
    left, right = np.min(cols), np.max(cols)
    return np.array([top, left, bottom, right])

def generate_hypotheses(initial_hypothesis, scale_factors, translations, image_shape):
 
    hypotheses = []
    top, left, bottom, right = initial_hypothesis
    height = bottom - top
    width = right - left
    
    for scale in scale_factors:
        # Scale the bounding box
        new_height = int(height * scale)
        new_width = int(width * scale)
        
        for dy, dx in translations:
            # Translate the bounding box
            new_top = max(0, top + dy)
            new_left = max(0, left + dx)
            new_bottom = min(image_shape[0], new_top + new_height)
            new_right = min(image_shape[1], new_left + new_width)
            
            # Add the new hypothesis if it's within the image bounds
            hypotheses.append((new_top, new_left, new_bottom, new_right))
    
    return hypotheses
def compute_visual_score(likelihood_map, object_hypothesis):
    top, left, bottom, right = object_hypothesis
    return np.sum(likelihood_map[top:bottom, left:right])
def compute_spatial_score(object_hypothesis, previous_center, sigma):

    top, left, bottom, right = object_hypothesis
    rows, cols = np.mgrid[top:bottom, left:right]
    distances = (rows - previous_center[0])**2 + (cols - previous_center[1])**2
    spatial_score = np.sum(np.exp(-distances / (2 * sigma**2)))
    return spatial_score
def find_best_hypothesis(likelihood_map, hypotheses, previous_center, sigma=10):
    best_hypothesis = None
    best_score = -np.inf

    for hypothesis in hypotheses:
        visual_score = compute_visual_score(likelihood_map, hypothesis)
        spatial_score = compute_spatial_score(hypothesis, previous_center, sigma)
        total_score = visual_score * spatial_score

        if total_score > best_score:
            best_score = total_score
            best_hypothesis = hypothesis

    return best_hypothesis
def update_object_hypothesis(prev_hypothesis, scale_estimate, lambda_s,likelihood_map):
    # Example usage
    # initial_hypothesis = (100, 150, 200, 250)  # Example bounding box: (top, left, bottom, right)
    scale_factors = [0.2, 1.0, .2]  # Scale down to 80%, keep original, scale up to 120%
    translations = [(0, 0), (-5, -5), (5, 5), (-10, 0), (10, 0)]  # Various translations

    # Assuming image shape is (height, width) 
    image_shape = (360, 640)  # Example image size
    hypotheses = generate_hypotheses(prev_hypothesis, scale_factors, translations, image_shape)
    print("hypotheses")
    print(type( hypotheses))
    print(scale_estimate.shape)
    best_hypothesis = find_best_hypothesis(likelihood_map, hypotheses, prev_hypothesis, sigma=10)
    print("best_hypothesis",np.array( best_hypothesis).shape)
    return np.array( best_hypothesis)

def update_object_model(frame, object_hypothesis):
    top, left, bottom, right = object_hypothesis
    object_region = frame[top:bottom, left:right]
    hsv = cv2.cvtColor(object_region, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1], None, [180, 256], [0, 180, 0, 256])
    cv2.normalize(hist, hist, 0, 1, cv2.NORM_MINMAX)
    return {'histogram': hist}

def track_object(frames, initial_hypothesis, lambda_s=0.9, max_scale_change=0.9):
    object_hypothesis = initial_hypothesis
    object_model = None
    
    for frame in frames:
        if object_model is None:
            object_model = update_object_model(frame, object_hypothesis)
        
        likelihood_map = compute_likelihood_map(frame, object_hypothesis, object_model)
        print(likelihood_map.shape,frame.shape)


        top, left, bottom, right = object_hypothesis
        object_region = (slice(top, bottom), slice(left, right))
        surrounding_region = (slice(max(0, top-20), min(frame.shape[0], bottom+20)),
                              slice(max(0, left-20), min(frame.shape[1], right+20)))
        
        c_o, c_s = compute_cumulative_histograms(likelihood_map, object_region, surrounding_region)
        tau_star = adaptive_threshold(c_o, c_s)
        
        scale_estimate = estimate_scale(likelihood_map, object_hypothesis, tau_star)
        
        prev_area = np.prod(object_hypothesis[2:] - object_hypothesis[:2])
        new_area = np.prod(scale_estimate[2:] - scale_estimate[:2])
        
        scale_change = np.abs(new_area / prev_area - 1)
        print(object_hypothesis, scale_estimate,scale_change)
        print("scale_change",scale_change)
        # object_hypothesis = scale_estimate
        # if scale_change <= max_scale_change:
        object_hypothesis = update_object_hypothesis(object_hypothesis, scale_estimate, lambda_s,likelihood_map)
            # object_model = update_object_model(frame, object_hypothesis)
            # object_hypothesis = scale_estimate
        yield frame, object_hypothesis, likelihood_map

def main():
    sequence_folder = 'sequence/'
    frame_files = sorted([f for f in os.listdir(sequence_folder) if f.endswith('.jpg') or f.endswith('.png')])
    
    # Given object location
    initial_bbox = (246, 208, 23, 23)  # Converted to integer pixel values for bounding box
    
    initial_hypothesis = np.array([initial_bbox[1], initial_bbox[0], 
                                   initial_bbox[1] + initial_bbox[3], 
                                   initial_bbox[0] + initial_bbox[2]])
    
    frames = (cv2.imread(os.path.join(sequence_folder, f)) for f in frame_files)
    # for frame in frames:
    #     cv2.imshow('Object Tracking', frame)
    #     cv2.waitKey(0)
    
    for frame, object_hypothesis, likelihood_map in track_object(frames, initial_hypothesis):
        vis_frame = frame.copy()
        top, left, bottom, right = object_hypothesis
        cv2.rectangle(vis_frame, (left, top), (right, bottom), (0, 255, 0), 2)
        
        color_map = cv2.applyColorMap((likelihood_map * 255).astype(np.uint8), cv2.COLORMAP_JET)
        vis_likelihood = cv2.addWeighted(frame, 0.1, color_map, 0.9, 0)
        
        vis = np.hstack((vis_frame, vis_likelihood))
        cv2.imshow('Object Tracking', vis)
        # cv2.waitKey(0)
        key = cv2.waitKey(0) & 0xFF
        if key == 27:  # ESC key
            break
    
    cv2.destroyAllWindows()


if __name__ == "__main__":
    main()


(360, 640) (360, 640, 3)
[208 246 231 269] [208 246 218 256] 0.8109640831758034
scale_change 0.8109640831758034
hypotheses
<class 'list'>
(4,)
best_hypothesis (4,)
(360, 640) (360, 640, 3)
[203 241 226 264] [207 263 212 263] 1.0
scale_change 1.0
hypotheses
<class 'list'>
(4,)
best_hypothesis (4,)
(360, 640) (360, 640, 3)
[193 241 216 264] [193 241 216 264] 0.0
scale_change 0.0
hypotheses
<class 'list'>
(4,)
best_hypothesis (4,)
(360, 640) (360, 640, 3)
[183 241 206 264] [183 241 206 264] 0.0
scale_change 0.0
hypotheses
<class 'list'>
(4,)
best_hypothesis (4,)
(360, 640) (360, 640, 3)
[178 236 201 259] [178 236 201 259] 0.0
scale_change 0.0
hypotheses
<class 'list'>
(4,)
best_hypothesis (4,)
(360, 640) (360, 640, 3)
[178 236 201 259] [178 236 201 259] 0.0
scale_change 0.0
hypotheses
<class 'list'>
(4,)
best_hypothesis (4,)
(360, 640) (360, 640, 3)
[178 236 201 259] [178 236 201 259] 0.0
scale_change 0.0
hypotheses
<class 'list'>
(4,)
best_hypothesis (4,)
(360, 640) (360, 640, 3)
[173 23

  c_o = np.cumsum(h_o) / np.sum(h_o)


ValueError: attempt to get argmin of an empty sequence