In [5]:
import json
import math

In [9]:
# Ground truth data has 12 joints, their coordinates are given in meters (m)
# Ground truth data might be missing some of the joint positions in some frames (of the sequence)
ground_truth_json_path = "ground_truth/seq_002_j12_t20260211_223142.json"

# Ground truth data has 15 joints, their coordinates are given in milimeters (mm)
# For all of the frames in the sequence every joint is predicted and exists in the JSON
prediction_json_path = "predictions/seq_002_j15_t20260211_223141.json"


# The mapping shows the mapping from predicted frames to ground truth data, only 10 of the mappings can be made
# It maps like (joint_idx_predicted, joint_idx_truth) ,n the list below
joint_mapping_pairs = [(11, 0), (10, 1), (9, 2), (5 , 3), (4, 4), (3, 5), (14, 6), (13, 7), (8, 8), (7, 9)]

with open(ground_truth_json_path, 'r') as f:
    ground_truth_data = json.load(f)

with open(prediction_json_path, 'r') as f:
    prediction_data = json.load(f)

In [10]:
# Fix the idx's for ids to correspond exactly so for example the joint indext 0 must correspond to right hand wrist for both of them
# Change the coordinate scaling, convert everything to meters 
# The predictions have a completely different coordinate system, swap the y and z coordinates and negate the whole coordinate
def convert_predictions(pred_data, mapping):
    processed_predictions = {}
    
    for frame_id, joints in pred_data.items():
        processed_predictions[frame_id] = {}
        
        for pred_idx, gt_idx in mapping:
            # JSON keys are strings, mapping provides ints
            pred_key = str(pred_idx)
            gt_key = str(gt_idx)
            
            if pred_key in joints:
                raw_coords = joints[pred_key]
                x_mm, y_mm, z_mm = raw_coords
                
                # Scale to meters
                x_m = x_mm / 1000.0
                y_m = y_mm / 1000.0
                z_m = z_mm / 1000.0
                
                # Transform coordinate system
                # "Swap y and z coordinates and negate the whole coordinate"
                # Input: (x, y, z)
                # Swap Y/Z: (x, z, y)
                # Negate: (-x, -z, -y)
                new_x = -x_m
                new_y = -z_m
                new_z = -y_m
                
                processed_predictions[frame_id][gt_key] = [new_x, new_y, new_z]
                
    return processed_predictions

def compare_sequences(ground_truth, processed_preds):
    frame_diffs = {}
    
    # Iterate over frames present in Ground Truth
    for frame_id, gt_joints in ground_truth.items():
        if frame_id not in processed_preds:
            continue
            
        pred_joints = processed_preds[frame_id]
        frame_diffs[frame_id] = {}
        
        # Iterate over joints present in Ground Truth frame
        for joint_id, gt_coord in gt_joints.items():
            if joint_id in pred_joints:
                pred_coord = pred_joints[joint_id]
                
                # Calculate Euclidean distance
                dist = math.sqrt(
                    (gt_coord[0] - pred_coord[0])**2 +
                    (gt_coord[1] - pred_coord[1])**2 +
                    (gt_coord[2] - pred_coord[2])**2
                )
                
                frame_diffs[frame_id][joint_id] = dist
                
    return frame_diffs

In [11]:
processed_preds = convert_predictions(prediction_data, joint_mapping_pairs)
differences = compare_sequences(ground_truth_data, processed_preds)

# Output results
print("Differences (Euclidean Distance in Meters) per Frame per Joint:")
for frame_id, diff_data in differences.items():
    print(f"\nFrame {frame_id}:")
    total_error = 0
    count = 0
    for joint_id, dist in diff_data.items():
        print(f"  Joint {joint_id}: {dist:.4f} m")
        total_error += dist
        count += 1
    if count > 0:
        print(f"  [Average Error for Frame {frame_id}]: {total_error/count:.4f} m")

# Optional: Print processed prediction sample to verify transform
print("\n--- Verification Sample ---")
print(f"GT Frame 0 Joint 0: {ground_truth_data['0']['0']}")
print(f"Pred Frame 0 Joint 0 (mapped from 11): {processed_preds['0']['0']}")

Differences (Euclidean Distance in Meters) per Frame per Joint:

Frame 0:
  Joint 0: 0.1261 m
  Joint 1: 0.1036 m
  Joint 2: 0.1295 m
  Joint 3: 0.0430 m
  Joint 4: 0.1064 m
  Joint 5: 0.0447 m
  Joint 6: 0.1519 m
  Joint 7: 0.1513 m
  Joint 8: 0.0160 m
  Joint 9: 0.0766 m
  [Average Error for Frame 0]: 0.0949 m

Frame 1:
  Joint 0: 0.1270 m
  Joint 1: 0.1012 m
  Joint 2: 0.1269 m
  Joint 3: 0.0407 m
  Joint 4: 0.1036 m
  Joint 5: 0.0473 m
  Joint 6: 0.1515 m
  Joint 7: 0.1506 m
  Joint 8: 0.0147 m
  Joint 9: 0.0770 m
  [Average Error for Frame 1]: 0.0940 m

Frame 2:
  Joint 0: 0.1276 m
  Joint 1: 0.0980 m
  Joint 2: 0.1265 m
  Joint 3: 0.0432 m
  Joint 4: 0.1029 m
  Joint 5: 0.0447 m
  Joint 6: 0.1539 m
  Joint 7: 0.1500 m
  Joint 8: 0.0125 m
  Joint 9: 0.0781 m
  [Average Error for Frame 2]: 0.0937 m

Frame 3:
  Joint 0: 0.1199 m
  Joint 1: 0.0905 m
  Joint 2: 0.1225 m
  Joint 3: 0.0429 m
  Joint 4: 0.1081 m
  Joint 5: 0.0484 m
  Joint 6: 0.1511 m
  Joint 7: 0.1469 m
  Joint 8: 0.012