In [1]:
import torch as pt
import pandas as pd
import numpy as np
import pyproj as pp
import tqdm
import gc
import optuna
import atd2025

In [2]:
# Read dataset in
data = pd.read_csv("https://www.maserv.work/ATD/model2/ucf_atd_model/datasets/dataset1_truth.csv")
data["time"] = pd.to_datetime(data["time"])

In [3]:
wgs84 = pp.CRS.from_epsg(4326)
utm = pp.CRS.from_epsg(32616)
to_utm = pp.Transformer.from_crs(wgs84, utm)

rad_earth = 6371000

x, y = to_utm.transform(data["lat"].to_numpy(), data["lon"].to_numpy())

x = x.astype("float32")
y = y.astype("float32")

# Read in rest of data
t = data["time"]
t = (t - t[0]).astype("timedelta64[s]").astype("int").astype("float32")
speed = data["speed"].to_numpy().astype("float32")
course = data["course"].to_numpy().astype("float32") * np.pi / 180

x = pt.tensor(x)
y = pt.tensor(y)
t = pt.tensor(t)
dx = pt.tensor(np.sin(course) * speed)
dy = pt.tensor(np.cos(course) * speed)

gc.collect()

20

In [5]:
def objective(trial: optuna.Trial):
    max_size = data.shape[0]
    state_size = 4

    # Keeps all tracks we have previously found
    mus = pt.zeros((max_size, state_size, 1))
    covs = pt.zeros((max_size, state_size, state_size))
    last_times = pt.zeros((max_size))
    track_id = pt.zeros((max_size), dtype=pt.long)

    # Used to keep track of where we are when filtering
    idxs = pt.arange(max_size)

    noise_x = trial.suggest_float("noise_x", 1e-3, 500)
    noise_y = trial.suggest_float("noise_y", 1e-3, 500)
    noise_dx = trial.suggest_float("noise_dx", 1e-3, 5)
    noise_dy = trial.suggest_float("noise_dy", 1e-3, 5)
    
    # Noise propogation constants
    qx = trial.suggest_float("qx", 1e-5, 1)
    qy = trial.suggest_float("qy", 1e-5, 1)

    # Man dist cutoff
    cut_dist = trial.suggest_float("cut_dist", 1e-5, 1000)

    meas_noise = pt.diag(pt.tensor([noise_x, noise_dx, noise_y, noise_dy], dtype=pt.float32))
    # H = pt.zeros((state_size - 1, state_size))
    H = pt.eye(state_size)

    # Prior state covariance matrix
    prior_cov = (pt.eye(state_size) * 0.1)
    prior_cov[:state_size, :state_size] += meas_noise

    # Generate state transition matrix for CTRV
    def F_gen(dt, filter):
        F = pt.zeros((pt.sum(filter), state_size, state_size))
        
        # Set constants
        F[:, 0, 0] = 1
        F[:, 1, 1] = 1
        F[:, 2, 2] = 1
        F[:, 3, 3] = 1

        F[:, 0, 1] = dt
        F[:, 2, 3] = dt

        return F

    # Generate state noise matrix for CTRV
    def Q_gen(dt, filter):
        Q = pt.zeros((pt.sum(filter), state_size, state_size))
        dt3 = (dt ** 3) / 3
        dt2 = (dt ** 2) / 2
        
        Q[:, 0, 0] = qx * dt3
        Q[:, 0, 1] = qx * dt2
        Q[:, 1, 0] = qx * dt2
        Q[:, 1, 1] = qx * dt

        Q[:, 2, 2] = qy * dt3
        Q[:, 2, 3] = qy * dt2
        Q[:, 3, 2] = qy * dt2
        Q[:, 3, 3] = qy * dt

        return Q

    # Add new track to list
    def add_track(next_track_id, i):
        # Create state
        ti = t[i]
        xi = x[i]
        yi = y[i]
        dxi = dx[i]
        dyi = dy[i]

        # Update
        track_id[i] = next_track_id
        mus[next_track_id] = pt.tensor([xi, dxi, yi, dyi])[:, None]
        covs[next_track_id] = prior_cov
        last_times[next_track_id] = ti
        
    # Update an old track in the list
    def update_track(best_id, mu, cov, S, y_resid, time, i):
        # Calculate Kalman gain / intermediate value
        K = (pt.linalg.solve(S, H) @ cov).T
        imk = pt.eye(state_size) - (K @ H)
        
        # Calculate posterior mu/cov
        post_mu = mu + (K @ y_resid) 
        post_cov = (imk @ cov @ imk.T) + (K @ meas_noise @ K.T)

        # Update
        track_id[i] = best_id
        mus[best_id] = post_mu
        covs[best_id] = post_cov
        last_times[best_id] = time

    # Actual algorithm
    next_track_id = 0
    for i, time in tqdm.tqdm(enumerate(t), total=t.shape[0]):
        if next_track_id == 0:
            add_track(next_track_id, i)
            next_track_id += 1
            continue

        
        filter = (time - last_times[:next_track_id]) >= 2
        
        if not pt.any(filter):
            add_track(next_track_id, i)
            next_track_id += 1
            continue
        else:
            # Check best manhalobis distance, update if below threshold
            prev_times = last_times[:next_track_id][filter]
            dt = time - prev_times

            filtered_idxs = idxs[:next_track_id][filter]
            est_cov = covs[:next_track_id][filter]
            est_mu = mus[:next_track_id][filter]

            # Get F and Q
            F = F_gen(dt, filter)
            Q = Q_gen(dt, filter)

            # Get prior guess of state
            est_mu = F @ est_mu
            est_cov = (F @ est_cov @ F.mT) + Q

            # Get prior guess of measurement
            est_mu_y = H @ est_mu
            est_cov_y = (H @ est_cov @ H.T) + meas_noise

            # Get man dist between measurement and guess for measurement
            measurement = pt.tensor([x[i], dx[i], y[i], dy[i]])[:, None]
            measurement_resid = measurement - est_mu_y
            man_dist = (measurement_resid.mT @ pt.linalg.solve(est_cov_y, measurement_resid)).squeeze()

            best_man_dist_idx = pt.argmin(man_dist)
            best_man_dist = man_dist[best_man_dist_idx]
            best_track_id = track_id[filtered_idxs[best_man_dist_idx]]
            if best_man_dist <= cut_dist:
                # Update track
                mu = est_mu[best_man_dist_idx]
                cov = est_cov[best_man_dist_idx]
                S = est_cov_y[best_man_dist_idx]
                y_resid = measurement_resid[best_man_dist_idx]
                update_track(best_track_id, mu, cov, S, y_resid, time, i)
            else:
                # New track
                add_track(next_track_id, i)
                next_track_id += 1
                continue
    
    tids = track_id.numpy()
    pids = data["point_id"]

    pd.DataFrame({"point_id": pids, "track_id": tids}).to_csv("grademe.csv")
    gc.collect()
    return atd2025.accuracy.evaluate_predictions("grademe.csv", "https://www.maserv.work/ATD/model2/ucf_atd_model/datasets/dataset1_truth.csv")

In [6]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

[I 2025-10-03 21:38:28,401] A new study created in memory with name: no-name-fa90eadd-57a6-4237-886f-30ea72d8613a
100%|██████████| 102861/102861 [01:51<00:00, 918.41it/s]
[I 2025-10-03 21:40:23,555] Trial 0 finished with value: 0.050087010625990414 and parameters: {'noise_x': 175.49884434381377, 'noise_y': 349.6329169813476, 'noise_dx': 2.265604663288929, 'noise_dy': 0.8637702035210998, 'qx': 0.06465484014658862, 'qy': 0.7536058878017058, 'cut_dist': 706.8559998949561}. Best is trial 0 with value: 0.050087010625990414.
100%|██████████| 102861/102861 [01:43<00:00, 997.42it/s]
[I 2025-10-03 21:42:09,780] Trial 1 finished with value: 0.05563333041677604 and parameters: {'noise_x': 266.52238501798956, 'noise_y': 1.5744541675416657, 'noise_dx': 3.762546392129636, 'noise_dy': 3.2400046087281056, 'qx': 0.5031044985578316, 'qy': 0.27736575399047414, 'cut_dist': 600.9029684584173}. Best is trial 1 with value: 0.05563333041677604.
100%|██████████| 102861/102861 [01:37<00:00, 1057.80it/s]
[I 2025

In [29]:
pt.max(track_id)

tensor(11596)

In [28]:
tids = track_id.numpy()
pids = data["point_id"]

pd.DataFrame({"point_id": pids, "track_id": tids}).to_csv("grademe.csv")