In [1]:
import json
import pyarrow.parquet as pq
import numpy as np

Define a function to read Parquet data files.

In [2]:
def read_parquet(parquet_path):
    '''
    Read Parquet file, extract embedded metadata, and convert trajectory data back to dictionary format.
    
    :param parquet_path: the path to the Parquet file
    :return: restored_tracks (Dict), restored_meta (Dict)
    '''
    print("\n--- Reading back from Parquet ---")
    
    # 1. Read Parquet file
    table = pq.read_table(parquet_path)
    
    # 2. Extract and parse Metadata
    file_meta = table.schema.metadata
    
    if b'dataset_meta' in file_meta:
        restored_meta_json = file_meta[b'dataset_meta'].decode('utf-8')
        restored_meta = json.loads(restored_meta_json)
        
        print("\n[Success] Meta embedded in Parquet found:")
        # Print all meta info
        print(restored_meta)
        print(f"  - Location: {restored_meta.get('location_name')}")
        print(f"  - Lane Map (Dict): {restored_meta.get('lane_sequence_to_movement_map')}")
    else:
        print("\n[Warning] 'dataset_meta' key not found in Parquet header.")

    # 3. Convert back to DataFrame to view data
    df_read = table.to_pandas()
    
    print("\n[Success] Trajectory Data loaded:")
    print(f"  - Shape: {df_read.shape}")
    print(f"  - Columns: {list(df_read.columns)}")
    
    # Verify complex structure (pixel_corners)
    sample_corners = df_read.iloc[0]['pixel_corners']
    print(sample_corners)
    print(f"  - Sample pixel_corners type: {type(sample_corners)}")
    print(f"  - Sample pixel_corners shape (len): {len(sample_corners)} (should be 5)")

      # 4. Convert back to Dict 
    print("\n--- Converting DataFrame back to Dict ---")
    restored_tracks = {}
    # Convert DataFrame to record list
    records = df_read.to_dict(orient='records')
    
    for record in records:
        # Assume vehicle_id exists and is unique
        if 'vehicle_id' in record:
            vid = record['vehicle_id']
            del record['vehicle_id']
            restored_tracks[vid] = record
            
    print(f"[Success] Converted back to Dict. Total tracks: {len(restored_tracks)}")
    if restored_tracks:
        sample_vid = list(restored_tracks.keys())[0]
        print(f"  - Sample Vehicle ID: {sample_vid}")
        print(f"  - Sample Keys in Track Dict: {list(restored_tracks[sample_vid].keys())[:5]} ...")
    return restored_tracks, restored_meta

In [3]:
# file path
parquet_file_path = "data/Hurong_20220617_B3_F1_demo.parquet"

# read parquet file
tracks, meta = read_parquet(parquet_file_path)


--- Reading back from Parquet ---

[Success] Meta embedded in Parquet found:
{'data_file_name': 'Hurong_20220617_B3_F1_demo', 'location_id': 'B3', 'location_name': 'HurongFreeway-Nanjing-Jiangsu-China', 'frame_interval': 0.1, 'start_timestamp_ms': 1655420392139, 'start_datetime': '2022-06-17 06:59:52', 'total_duration': 300.1, 'timestamp_timezone': 'Asia/Shanghai', 'spatial_unit': 'm', 'dataset_version': '1.0.0', 'lane_sequence_to_movement_map': {'2-50-5': 'Left Turn', '2-50-3': 'Straight Ahead', '4-50-1': 'Straight Ahead', '4-50-5': 'Right Turn', '6-50-1': 'Right Turn', '6-50-3': 'Right Turn', '4-3': 'U Turn', '2-1': 'U Turn', '6-5': 'U Turn'}, 'total_vehicle_count': 356, 'unique_lane_ids': [1, 2, 3, 4, 5, 6, 50]}
  - Location: HurongFreeway-Nanjing-Jiangsu-China
  - Lane Map (Dict): {'2-50-5': 'Left Turn', '2-50-3': 'Straight Ahead', '4-50-1': 'Straight Ahead', '4-50-5': 'Right Turn', '6-50-1': 'Right Turn', '6-50-3': 'Right Turn', '4-3': 'U Turn', '2-1': 'U Turn', '6-5': 'U Turn'}


In [4]:
# show Metadata
print("Location:", meta.get('location_name', 'Unknown'))
print("Lane Map:", meta.get('lane_sequence_to_movement_map', 'Unknown'))

Location: HurongFreeway-Nanjing-Jiangsu-China
Lane Map: {'2-50-5': 'Left Turn', '2-50-3': 'Straight Ahead', '4-50-1': 'Straight Ahead', '4-50-5': 'Right Turn', '6-50-1': 'Right Turn', '6-50-3': 'Right Turn', '4-3': 'U Turn', '2-1': 'U Turn', '6-5': 'U Turn'}


In [5]:
#  show sample data
if tracks:
    sample_vid = list(tracks.keys())[0]
    print(f"Sample Vehicle ID: {sample_vid}")
    print(f"Keys: {list(tracks[sample_vid].keys())[:10]}")
    # print first 10 frame_index
    print(f"Frame Indices (first 10): {tracks[sample_vid].get('frame_index', [])[:10]}")

Sample Vehicle ID: 8
Keys: ['vehicle_class', 'vehicle_width', 'vehicle_length', 'frame_index', 'lane_id', 'pixel_x', 'pixel_y', 'ground_x', 'ground_y', 'pixel_corners']
Frame Indices (first 10): [0 1 2 3 4 5 6 7 8 9]


Plot space-time diagram

In [None]:
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import os

Create an animation of vehicle motion in the XY-plane coordinate system.

In [8]:

import matplotlib.animation as animation
from matplotlib.collections import PolyCollection
from IPython.display import HTML
import numpy as np
import matplotlib.pyplot as plt

def animate_vehicle_contours(trajectory_data):
    """
    Animates vehicle contours (ground_corners) frame by frame.
    
    :param trajectory_data: dictionary of vehicle tracks
    """
    # 1. Reorganize data by frame
    frames_data = {}
    
    print("Reorganizing data for animation...")
    # To determine plot limits
    min_x, max_x = float('inf'), float('-inf')
    min_y, max_y = float('inf'), float('-inf')
    
    for vid, track in trajectory_data.items():
        if 'ground_corners' not in track or 'frame_index' not in track:
            continue
            
        frame_indices = track['frame_index']
        ground_corners = track['ground_corners']
        
        # Ensure lengths match
        if len(frame_indices) != len(ground_corners):
            continue
            
        for i, frame_idx in enumerate(frame_indices):
            corners = ground_corners[i]
            
            if frame_idx not in frames_data:
                frames_data[frame_idx] = []
            
            # Handle format: flat list/array or list of lists
            c_arr = np.array(corners)
            if c_arr.ndim == 1 and c_arr.size == 8:
                 c_arr = c_arr.reshape(4, 2)
            elif c_arr.ndim == 2 and c_arr.shape == (4, 2):
                 pass # already correct
            else:
                 # Try to interpret as best as possible or skip
                 continue
                 
            frames_data[frame_idx].append(c_arr)
            
            # Update bounds
            xs = c_arr[:, 0]
            ys = c_arr[:, 1]
            min_x = min(min_x, np.min(xs))
            max_x = max(max_x, np.max(xs))
            min_y = min(min_y, np.min(ys))
            max_y = max(max_y, np.max(ys))

    sorted_frames = sorted(frames_data.keys())
    if not sorted_frames:
        print("No valid frame data found.")
        return None

    print(f"Animation range: Frame {sorted_frames[0]} to {sorted_frames[-1]}")
    print(f"X range: {min_x:.2f} to {max_x:.2f}")
    print(f"Y range: {min_y:.2f} to {max_y:.2f}")

    # Create figure
    fig, ax = plt.subplots(figsize=(15, 10))
    
    # Initial setup
    polys = PolyCollection([], edgecolors='black', facecolors='cyan', alpha=0.6)
    ax.add_collection(polys)
    
    # Set axis limits with some margin
    margin_x = (max_x - min_x) * 0.05
    margin_y = (max_y - min_y) * 0.05
    if margin_x == 0: margin_x = 10
    if margin_y == 0: margin_y = 10
    
    ax.set_xlim(min_x - margin_x, max_x + margin_x)
    ax.set_ylim(min_y - margin_y, max_y + margin_y)
    ax.set_aspect('equal')
    ax.set_xlabel("Ground X (m)")
    ax.set_ylabel("Ground Y (m)")
    
    title_text = ax.set_title("")
    
    def update(frame_idx):
        verts = frames_data.get(frame_idx, [])
        polys.set_paths(verts)
        title_text.set_text(f"Frame: {frame_idx}")
        return polys, title_text
        
    # Create animation
    # Displaying a subset of frames if too many for faster rendering test? 
    # Or just all frames.
    ani = animation.FuncAnimation(fig, update, frames=sorted_frames, interval=100, blit=True)
    
    plt.close(fig) # Prevent static plot from showing up
    
    print("Generating animation...")
    return HTML(ani.to_jshtml())


In [None]:
# Run animation (might take a while for large datasets)
animate_vehicle_contours(tracks)

In [6]:
def analysis_movement_data(trajectory_data, meta_data):
    '''
    Analyze movement data from trajectory data.
    '''
    print("\n--- Analyzing Movement Data ---")
    
    lane_sequence_to_movement_map = meta_data.get('lane_sequence_to_movement_map', {})
    
    # 1. Calculate global frame range to filter partial trajectories
    all_start_frames = []
    all_end_frames = []
    for track in trajectory_data.values():
        if 'frame_index' in track and track['frame_index'] is not None and len(track['frame_index']) > 0:
            all_start_frames.append(track['frame_index'][0])
            all_end_frames.append(track['frame_index'][-1])
            
    if not all_start_frames:
        print("No valid frames found.")
        return {}

    global_min_frame = min(all_start_frames)
    global_max_frame = max(all_end_frames)
    
    print(f"Global Frame Range: {global_min_frame} - {global_max_frame}")
    
    # Initialize counters
    movement_counts = {}
    od_counts = {}
    od_vehicles = {} # Store vehicle IDs for each OD pair
    
    total_vehicles = len(trajectory_data)
    valid_movement_vehicles = 0
    undefined_filtered_count = 0

    for vid, track in trajectory_data.items():
        lane_ids = track.get('lane_id')
        if lane_ids is None or len(lane_ids) == 0:
            continue
            
        # Deduplicate lane_id while preserving order to get lane_sequence
        lane_seq = []
        for lid in lane_ids:
            if not lane_seq or lane_seq[-1] != lid:
                lane_seq.append(lid)
                
        if len(lane_seq) == 0:
            continue
            
        # Check if any key from lane_sequence_to_movement_map is contained in lane_sequence
        movement_name = "Undefined"
        matched_key = None
        
        for key, name in lane_sequence_to_movement_map.items():
            # Convert key "20-30" to list [20, 30]
            key_list = [int(x) for x in key.split('-')]
            n = len(key_list)
            
            # Check if key_list is a sublist of lane_seq
            if any(lane_seq[i:i+n] == key_list for i in range(len(lane_seq) - n + 1)):
                movement_name = name
                matched_key = key
                break
                
        if matched_key:
            od_key = matched_key
        else:
            od_key = f"{lane_seq[0]}-{lane_seq[-1]}"
            
        # Check if we need to filter (only for Undefined movements)
        if movement_name == "Undefined":
            vehicle_frames = track.get('frame_index')
            # Ensure vehicle_frames is valid and not empty
            if vehicle_frames is None or len(vehicle_frames) == 0:
                continue

            v_start = vehicle_frames[0]
            v_end = vehicle_frames[-1]
            
            # Exclusion condition: exists at global start or global end
            if v_start < global_min_frame+3 or v_end > global_max_frame-3:
                undefined_filtered_count += 1
                continue
        
        # Count
        movement_counts[movement_name] = movement_counts.get(movement_name, 0) + 1
        od_counts[od_key] = od_counts.get(od_key, 0) + 1
        
        # Record Vehicle ID
        if od_key not in od_vehicles:
            od_vehicles[od_key] = []
        od_vehicles[od_key].append(vid)
        
        if movement_name != "Undefined":
            valid_movement_vehicles += 1

    print(f"Total Vehicles: {total_vehicles}")
    print(f"Identified Movements: {valid_movement_vehicles}")
    print(f"Filtered Undefined Vehicles (Time Boundary): {undefined_filtered_count}")
    
    print("\n[Movement Statistics]")
    for name, count in sorted(movement_counts.items(), key=lambda x: x[1], reverse=True):
        print(f"  - {name}: {count}")
        
    print("\n[OD Pair Statistics (Top 10)]")
    for od, count in sorted(od_counts.items(), key=lambda x: x[1], reverse=True)[:10]:
        mapped_name = lane_sequence_to_movement_map.get(od, "Undefined")
        print(f"  - {od} ({mapped_name}): {count}")
        

    return movement_counts

In [None]:
analysis_movement_data(tracks, meta)