In [None]:
import pandas as pd
import tqdm
import numpy as np
from ucf_atd_model.datasets.create_link_data import haversine_distance_m, project_forward
from datetime import datetime

Evaluates accuracy of an oracle classifier model under various screen sizes

In [None]:
def subset(lastpts, i):
    columns = ["time", "lat", "lon", "speed", "course", "track_id_true"]
    return {key: lastpts[key][:i] for key in columns}

def setidx(lastpts, i, val):
    lastpts["time"][i] = val["time"].to_numpy()
    lastpts["lat"][i] = val["lat"]
    lastpts["lon"][i] = val["lon"]
    lastpts["speed"][i] = val["speed"]
    lastpts["course"][i] = val["course"]
    lastpts["track_id_true"][i] = val["track_id_true"]

metric = []

def run_v31_ml_enhanced_tracker(df, n_neighbors):
    """Implements the final ML-Enhanced Tracking algorithm."""
    print("\n--- Starting V31: The ML-Enhanced Tracker ---")
    df = df.sort_values('time').reset_index(drop=True)
    df['track_id'] = -1

    next_track_id = 0
    
    n = df.shape[0]
    lastPtInTrack = {
        "time": np.repeat(pd.Timestamp(year=1970, month=1, day=1, hour=0, minute=0, second=0).to_numpy(), n), 
        "lat": np.repeat(-1.0, n), 
        "lon": np.repeat(-1.0, n), 
        "speed": np.repeat(-1.0, n), 
        "course": np.repeat(-1.0, n),
        "track_id_true": np.repeat(-1.0, n)
    }

    for i in tqdm.tqdm(range(len(df))):
        p_current = df.iloc[i]

        if next_track_id == 0:
            df.loc[i, 'track_id'] = next_track_id
            setidx(lastPtInTrack, i, p_current)
            next_track_id += 1
            continue

        best_match_track_id, best_score = -1, -np.inf

        active_tracks_df = subset(lastPtInTrack, next_track_id)
        
        best_score = 0
        best_match_track_id = -1
        time_diff = (p_current["time"].to_numpy() - active_tracks_df["time"]).astype("timedelta64[s]").astype("int")

        max_dist_m = time_diff * 30 * 0.5144
        real_dist = haversine_distance_m(active_tracks_df["lat"], active_tracks_df["lon"], p_current["lat"], p_current["lon"])
        
        kinematic_errors = haversine_distance_m(p_current["lat"], p_current["lon"], *project_forward(active_tracks_df['lat'], active_tracks_df['lon'], active_tracks_df['speed'], active_tracks_df['course'], time_diff))
        error_cutoff = np.sort(kinematic_errors)[:n_neighbors].max()
        kinematic_filter = kinematic_errors <= error_cutoff
        
        loc_filter = real_dist < max_dist_m

        if not np.any(loc_filter):
            df.loc[i, 'track_id'] = next_track_id
            setidx(lastPtInTrack, next_track_id, p_current)
            next_track_id += 1
            continue
        
        
        timeCorrect: np.ndarray = (0 < time_diff)
        idxs = np.arange(len(timeCorrect))

        if not np.any(loc_filter & timeCorrect):
            df.loc[i, 'track_id'] = next_track_id
            setidx(lastPtInTrack, next_track_id, p_current)
            next_track_id += 1
            continue
        
        if not np.any(loc_filter & timeCorrect & kinematic_filter):
            df.loc[i, 'track_id'] = next_track_id
            setidx(lastPtInTrack, next_track_id, p_current)
            next_track_id += 1
            continue

        id_correct = active_tracks_df["track_id_true"] == p_current["track_id_true"]
        if np.any(id_correct & timeCorrect & kinematic_filter & loc_filter):
            best_score = 1
            best_match_track_id = np.argmax(id_correct & timeCorrect & kinematic_filter & loc_filter)

        # Assignment with a confidence threshold
        if best_score > 0.5:
            df.loc[i, 'track_id'] = best_match_track_id
            setidx(lastPtInTrack, best_match_track_id, p_current)
        else:
            df.loc[i, 'track_id'] = next_track_id
            setidx(lastPtInTrack, next_track_id, p_current)
            next_track_id += 1        

    return df[['point_id', 'track_id']], None


neigborList = [2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 5000]

for neighbor in neigborList:
    # --- Load Data and Run ---
    truth_df = pd.read_csv("https://www.maserv.work/ATD/model2/ucf_atd_model/datasets/dataset1_truth.csv")

    truth_df["time"] = pd.to_datetime(truth_df["time"])
    truth_df["time"] = truth_df["time"].apply(lambda x: datetime.combine(datetime(1970, 1, 1, 0, 0, 0).date(), x.time()))
    truth_df["track_id_true"] = truth_df["track_id"]

    output, metric = run_v31_ml_enhanced_tracker(truth_df, neighbor)
    output.to_csv(f"oracle_out/k_{neighbor}.csv", index=False)


--- Starting V31: The ML-Enhanced Tracker ---


100%|██████████| 102861/102861 [00:55<00:00, 1855.96it/s]
