### Part 1

Alpha-Beta filter

In [1]:
import json
import cv2 as cv
import numpy as np

def load_obj_each_frame(data_file):
    with open(data_file, 'r') as file:
        frame_dict = json.load(file)
    return frame_dict

def alpha_beta_filter(initial_position, initial_velocity, alpha, beta, observations, dt=1):
    estimated_position = initial_position
    estimated_velocity = initial_velocity
    estimates = []

    for observation in observations:
        # Prediction step
        predicted_position = estimated_position + estimated_velocity * dt
        predicted_velocity = estimated_velocity

        # Update step (if observation is available)
        if observation != [-1, -1]:
            residual = observation - predicted_position
            estimated_position = predicted_position + alpha * residual
            estimated_velocity = predicted_velocity + (beta * residual) / dt
        else:
            # use the prediction if observation is missing
            estimated_position = predicted_position
            estimated_velocity = predicted_velocity

        estimates.append([estimated_position, estimated_velocity])

    return estimates

def draw_target_object_center(video_file, obj_centers):
    count = 0
    cap = cv.VideoCapture(video_file)
    ok, image = cap.read()

    # Initialize Alpha-Beta filter parameters
    initial_position = np.array([313, 229])     # Initial guess for position
    initial_velocity = np.array([-0.47328952, -0.3911483])  # Initial guess for velocity
    alpha = 0.4     # Position update factor
    beta = 0.0005   # Velocity update factor
    
    filtered_estimates = alpha_beta_filter(initial_position, initial_velocity, alpha, beta, obj_centers)

    # Save in a JSON file
    estimated_positions = [x[0] for x in filtered_estimates]
    part_1_object_tracking = [[int(round(x)), int(round(y))] for x, y in estimated_positions]
    output_data = {"obj": part_1_object_tracking}
    # with open('part_1_object_tracking.json', 'w', encoding='utf-8') as file:
    #     json.dump(output_data, file, ensure_ascii=False, indent=None)
    # print("Successfully saved in part_1_object_tracking.json!")
    with open('part_1_object_tracking_with_ab.json', 'w', encoding='utf-8') as file:
        json.dump(output_data, file, ensure_ascii=False, indent=None)
    print("Successfully saved in part_1_object_tracking_with_ab.json!")

    # Visualize the smoothed track
    # vidwrite = cv.VideoWriter("part_1_demo.mp4", cv.VideoWriter_fourcc(*'MP4V'), 30, (700,500))
    vidwrite = cv.VideoWriter("part_1_demo_with_ab.mp4", cv.VideoWriter_fourcc(*'MP4V'), 30, (700,500))
    while ok:
        if count < len(filtered_estimates):
            pos, _ = filtered_estimates[count]
            pos_x, pos_y = pos
        else:
            break

        count += 1
        ###### !!! #######
        # Make sure the video is resized. Otherwise the coords in the data file won't work.
        image = cv.resize(image, (700, 500)) 
        ###### !!! #######
        # Draw the circle at the estimated position
        # if pos_x != -1 and pos_y != -1:
        #     image = cv.circle(image, (int(pos_x), int(pos_y)), 1, (0, 0, 255), 2)
        # Draw the Line
        for i in range(count):
            pos, _ = filtered_estimates[i]
            pos_x, pos_y = pos
            image = cv.circle(image, (int(pos_x), int(pos_y)), 1, (0, 0, 255), 2)
        vidwrite.write(image)
        
        ok, image = cap.read()

    vidwrite.release()

In [2]:
frame_dict = load_obj_each_frame("object_to_track.json")
video_file = "commonwealth.mp4"
draw_target_object_center(video_file,frame_dict['obj'])

Successfully saved in part_1_object_tracking.json!


OpenCV: FFMPEG: tag 0x5634504d/'MP4V' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


Kalman filter

In [77]:
import json
import cv2 as cv
import numpy as np

def load_obj_each_frame(data_file):
    with open(data_file, 'r') as file:
        frame_dict = json.load(file)
    return frame_dict

def kalman_filter(initial_state, initial_covariance, transition_matrix, observation_matrix, process_noise, measurement_noise, observations):
    state_estimate = initial_state
    covariance_estimate = initial_covariance
    estimates = []

    for observation in observations:
        # Prediction step
        predicted_state = transition_matrix @ state_estimate
        predicted_covariance = transition_matrix @ covariance_estimate @ transition_matrix.T + process_noise

        # Update step
        if observation != [-1, -1]:
            observation = np.array(observation)
            innovation = observation - (observation_matrix @ predicted_state)
            innovation_covariance = observation_matrix @ predicted_covariance @ observation_matrix.T + measurement_noise

            kalman_gain = predicted_covariance @ observation_matrix.T @ np.linalg.inv(innovation_covariance)

            state_estimate = predicted_state + kalman_gain @ innovation
            covariance_estimate = (np.eye(len(initial_state)) - kalman_gain @ observation_matrix) @ predicted_covariance
        else:
            # skip this step if observation is missing
            state_estimate = predicted_state
            covariance_estimate = predicted_covariance

        estimates.append(state_estimate.tolist())

    return estimates

def draw_target_object_center(video_file, obj_centers):
    count = 0
    cap = cv.VideoCapture(video_file)
    ok, image = cap.read()

    # https://www.bzarg.com/p/how-a-kalman-filter-works-in-pictures/
    # Initialize Kalman filter parameters
    # The result from the alpha-beta filter looks good, and the car seems to be moving at a constant speed, so B is not necessary.
    initial_state = np.array([313, 229, -0.5, -0.4])   # X
    initial_covariance = np.eye(4)  # naive P
    transition_matrix = np.array([[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]])  # A, assume dt=1
    observation_matrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]])     # H
    process_noise = np.eye(4) * (10**2)     # Q
    measurement_noise = np.eye(2) * (100**2)    # R
    
    filtered_estimates = kalman_filter(initial_state, initial_covariance, transition_matrix, observation_matrix, process_noise, measurement_noise, obj_centers)

    print([row[0] for row in filtered_estimates])
    print([row[1] for row in filtered_estimates])
    
    # Save in a JSON file
    estimated_positions = [[row[0], row[1]] for row in filtered_estimates]
    part_1_object_tracking = [[int(round(x)), int(round(y))] for x, y in estimated_positions]
    output_data = {"obj": part_1_object_tracking}
    # with open('part_1_object_tracking.json', 'w', encoding='utf-8') as file:
    #     json.dump(output_data, file, ensure_ascii=False, indent=None)
    # print("Successfully saved in part_1_object_tracking.json!")
    with open('part_1_object_tracking_with_kalman.json', 'w', encoding='utf-8') as file:
        json.dump(output_data, file, ensure_ascii=False, indent=None)
    print("Successfully saved in part_1_object_tracking_with_kalman.json!")

    # Visualize the smoothed track
    # vidwrite = cv.VideoWriter("part_1_demo.mp4", cv.VideoWriter_fourcc(*'MP4V'), 30, (700,500))
    vidwrite = cv.VideoWriter("part_1_demo_with_kalman.mp4", cv.VideoWriter_fourcc(*'MP4V'), 30, (700,500))
    while ok:
        if count < len(filtered_estimates):
            pos_x, pos_y, _, _ = filtered_estimates[count]
        else:
            break

        count += 1
        ###### !!! #######
        # Make sure the video is resized. Otherwise the coords in the data file won't work.
        image = cv.resize(image, (700, 500)) 
        ###### !!! #######
        # Draw the circle at the estimated position
        # if pos_x != -1 and pos_y != -1:
        #     image = cv.circle(image, (int(pos_x), int(pos_y)), 1, (0, 0, 255), 2)
        # Draw the Line
        for i in range(count):
            pos_x, pos_y, _, _ = filtered_estimates[i]
            image = cv.circle(image, (int(pos_x), int(pos_y)), 1, (0, 0, 255), 2)
        vidwrite.write(image)
        
        ok, image = cap.read()

    vidwrite.release()

In [78]:
frame_dict = load_obj_each_frame("object_to_track.json")
video_file = "commonwealth.mp4"
draw_target_object_center(video_file,frame_dict['obj'])

[312.5, 312.0, 311.463223510059, 311.96412652290616, 311.79353195600083, 311.6229373890955, 309.41496085075187, 307.623700496864, 306.75715437911293, 305.8906082613619, 305.0240621436108, 304.15751602585976, 303.2909699081087, 302.42442379035765, 301.5578776726066, 300.69133155485554, 299.8247854371045, 298.9582393193534, 298.09169320160237, 297.2251470838513, 296.35860096610025, 295.4920548483492, 294.62550873059814, 293.7589626128471, 292.89241649509603, 292.025870377345, 287.09905222555494, 286.5281260412982, 285.69134588651025, 285.1948165750661, 284.2867007793066, 284.2091833807781, 283.71629466612274, 283.0428735197883, 282.3694523734539, 281.6960312271195, 281.02261008078506, 280.34918893445064, 279.6757677881162, 278.1555765940647, 276.64902077759797, 275.7664218123676, 274.8838228471372, 274.60027535791954, 273.8185863982694, 272.4609897275252, 271.57847482677687, 270.69595992602854, 269.8134450252802, 268.93093012453187, 268.04841522378354, 267.1659003230352, 266.283385422286

OpenCV: FFMPEG: tag 0x5634504d/'MP4V' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'
