# Imports

In [15]:
import os
import random
import shutil

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from PIL import Image
from scipy.interpolate import interp1d

from concurrent.futures import ProcessPoolExecutor
from scenarionet import read_dataset_summary, read_scenario

# Nuscenes

### Read scenario dict

In [None]:
nuscenes_pkl_dir = '/data/tii/data/nuscenes/nuscenes_trainval_pkl'
dataset_summary, scenario_ids, mapping = read_dataset_summary(dataset_path=nuscenes_pkl_dir)

scenario = read_scenario(dataset_path=nuscenes_pkl_dir, mapping=mapping, scenario_file_name=scenario_ids[0])

#--- scenario keys
print('Scenario keys : ', scenario.keys())

#--- agent tracks
print('\n===== Agent tracks =====') 
print('Scenario tracks keys <=> Agents ids : ', scenario['tracks'].keys())
print('Agent 6eac5d14e53743079cd2008e74aec375 keys : ', scenario['tracks']['6eac5d14e53743079cd2008e74aec375'].keys())
print('Agent 6eac5d14e53743079cd2008e74aec375 state keys : ', scenario['tracks']['6eac5d14e53743079cd2008e74aec375']['state'].keys())
#print(scenario['tracks']['6eac5d14e53743079cd2008e74aec375']['state']['position'])

#--- map features
print('\n===== Map Features =====') 
print('Scenario map features keys : ', scenario['map_features'].keys())
print('Map feature boundary_0 keys : ', scenario['map_features']['boundary_0'].keys())
print('Map feature boundary_0 type : ', scenario['map_features']['boundary_0']['type'])
#print(scenario['map_features']['boundary_0']['polyline'])

### Agent features

In [None]:
def process_scenario(scenario_name, mapping, dataset_path, output_dir):
    scenario = read_scenario(dataset_path=dataset_path, mapping=mapping, scenario_file_name=scenario_name)

    scenario_list = []
    for track_data in scenario['tracks'].values():
        if track_data['type'] in {'PEDESTRIAN', 'CYCLIST', 'VEHICLE'}:
            #vd = track_data['state']['valid']                #-- array of shape (seq_length,) (either 0. or 1.)
            #hd = track_data['state']['heading']              #-- array of shape (seq_length,)
            #v  = track_data['state']['velocity']             #-- array of shape (seq_length, 2)
            #h  = track_data['state']['height']               #-- array of shape (seq_length, 1)
            #w  = track_data['state']['width']                #-- array of shape (seq_length, 1)
            #l  = track_data['state']['length']               #-- array of shape (seq_length, 1)
            xyz = track_data['state']['position']            #-- array of shape (seq_length, 3)
            agent = np.column_stack((xyz[:, 0], xyz[:, 1]))  # Modify with additional state parameters as needed
            scenario_list.append(agent)

    scenario_npy = np.array(scenario_list)

    # Save processed scenario
    os.makedirs(output_dir, exist_ok=True)
    np.save(os.path.join(output_dir, scenario_name.replace('.pkl', '.npy')), scenario_npy)
    print(f'Scenario of shape {scenario_npy.shape} and id "{scenario_name}" has been saved!')


def main():
    dataset_path = '/data/tii/data/nuscenes_trainval_pkl/'
    output_dir = '/data/tii/data/nuscenes_trainval_npy/'
    _, scenario_ids, mapping = read_dataset_summary(dataset_path=dataset_path)
    
    for scenario_name in scenario_ids:
        process_scenario(scenario_name, mapping, dataset_path, output_dir)       

if __name__ == "__main__":
    main()

### Map features

##### Visualize


In [None]:
# Initialize the plot
# only polylines
plt.figure(figsize=(10, 10))

for k in scenario['map_features'].keys():
    #if scenario['map_features'][k]['type'] in ['ROAD_LINE_BROKEN_SINGLE_WHITE', 'ROAD_LINE_SOLID_SINGLE_YELLOW', 'LANE_SURFACE_STREET', 'LANE_SURFACE_UNSTRUCTURE']:
        for kk in scenario['map_features'][k].keys():
            if kk == 'polyline':
                polyline_arr = scenario['map_features'][k][kk]
                # Plot the polyline
                plt.plot(polyline_arr[:, 0], polyline_arr[:, 1], color='gray', linewidth=0.3)
            elif kk == 'polygon':
                polygon_arr = scenario['map_features'][k][kk]
                # Plot the polygon
                plt.plot(polygon_arr[:, 0], polygon_arr[:, 1], color='gray', linewidth=0.3)

tracks = scenario['tracks']
for idx, (id, track) in enumerate(tracks.items()):
    #print(id)
    #-- ('PEDESTRIAN', 'CYCLIST', 'TRAFFIC_CONE', 'TRAFFIC_BARRIER', 'VEHICLE')
    if track['type'] in ['PEDESTRIAN', 'CYCLIST', 'VEHICLE']:
        xyz = track['state']['position']  #-- array of shape (seq_length, 3)
        #print(xyz.shape)
        # Filter out invalid points
        valid_points = xyz[(xyz[:, 0] != 0.0) & (xyz[:, 1] != 0.0)]
        if valid_points.shape[0] > 0:
            plt.plot(valid_points[:, 0], valid_points[:, 1], color='b')
        

# Remove axes
plt.show()
plt.close()

In [None]:
map1 = np.load('/data/tii/data/nuscenes_maps/nuscenes_trainval_maps_norm_npy/sd_nuscenes_v1.0-trainval_scene-0160.npy')
data1 = np.load('/data/tii/data/nuscenes_trainval_clean_train/sd_nuscenes_v1.0-trainval_scene-0160.npy')
plt.figure(figsize=(10, 10))

for ag in range(map1.shape[0]):
    agent_traj = map1[ag, :, :]
    plt.plot(agent_traj[:, 0], agent_traj[:, 1], color='gray', linewidth=0.3)

for ag in range(data1.shape[0]):
    agent_traj = data1[ag, :, :]
    valid_xy = agent_traj[(agent_traj[:, 0] != 0.0) & (agent_traj[:, 1] != 0.0)]
    plt.plot(valid_xy[:, 0], valid_xy[:, 1], color='b')
    
plt.show()
plt.close() 

##### Extract  

In [None]:
def interpolate_to_fixed_length(trajectory, num_timesteps, kind):
    """
    Interpolate a variable-length trajectory to a fixed number of timesteps.

    Parameters:
    trajectory (np.ndarray): Input trajectory of shape (L, 2), where L is the number of timesteps.
    num_timesteps (int): The number of timesteps to interpolate to (default is 10).
    kind: choose from 'linear', 'quadratic', 'cubic'.
    Returns:
    np.ndarray: Interpolated trajectory of shape (num_timesteps, 2).
    """
    L = trajectory.shape[0]  # Original length of the trajectory

    # Original timesteps
    original_timesteps = np.linspace(0, L - 1, L)

    # New timesteps to interpolate to
    new_timesteps = np.linspace(0, L - 1, num_timesteps)

    # Interpolated trajectory
    interpolated_trajectory = np.zeros((num_timesteps, 2))

    # Interpolate x and y separately
    for i in range(2):  # for each dimension x and y
        interp_func = interp1d(original_timesteps, trajectory[:, i], kind=kind, fill_value='extrapolate')
        interpolated_trajectory[:, i] = interp_func(new_timesteps)

    return interpolated_trajectory


def slice_array_based_on_condition(arr, epsilon):
    slices = []
    start_idx = 0

    # Iterate through the array to find break points
    for i in range(len(arr) - 1):
        x_diff = abs(arr[i, 0] - arr[i + 1, 0])
        y_diff = abs(arr[i, 1] - arr[i + 1, 1])

        if x_diff > epsilon or y_diff > epsilon:
            # If the condition is met, slice the array
            slices.append(arr[start_idx:i+1])
            start_idx = i + 1

    # Add the last slice
    if start_idx < len(arr):
        slices.append(arr[start_idx:])
    
    return slices

In [None]:
for sc in scenario_ids[0]:
    print(f'Map of the scenario {sc} is being processed...')
    scenario = read_scenario(dataset_path=nuscenes_pkl_dir, mapping=mapping, scenario_file_name=sc)
    plt.figure(figsize=(10, 10))
    
    # Determine the min max position of agents in the scenario 
    tracks = scenario['tracks']
    min_x, min_y = float('inf'), float('inf')
    max_x, max_y = 0., 0.
    for idx, (id, track) in enumerate(tracks.items()):
        xyz = track['state']['position']  #-- array of shape (seq_length, 3)
        # Filter out invalid points
        valid_points = xyz[(xyz[:, 0] != 0.0) & (xyz[:, 1] != 0.0)]
        if valid_points.shape[0] > 0:
            if max(valid_points[:, 0]) > max_x:
                max_x = max(valid_points[:, 0])
            if max(valid_points[:, 1]) > max_y:
                max_y = max(valid_points[:, 1])
            if min(valid_points[:, 0]) < min_x:
                min_x = min(valid_points[:, 0])
            if min(valid_points[:, 1]) < min_y:
                min_y = min(valid_points[:, 1])
        # plot the agents traj
        plt.plot(valid_points[:, 0], valid_points[:, 1], color='b')

    # Crop the map feature of the scenario and filter undesired points 
    arr_list = []
    for k in scenario['map_features'].keys():
        for kk in scenario['map_features'][k].keys():
            if kk in ['polyline', 'polygon']:
                arr = scenario['map_features'][k][kk]
                arr = arr[:, :2]
                if arr.shape[0]>1 :
                    # interpolate 
                    arr = interpolate_to_fixed_length(arr, num_timesteps=50000, kind='linear')
                    # crop to max_x max_y
                    cropped_arr = arr[
                        (arr[:, 0] >= min_x - 50) & 
                        (arr[:, 0] <= max_x + 50) &
                        (arr[:, 1] >= min_y - 50) &
                        (arr[:, 1] <= max_y + 50)]
                    
                    if cropped_arr.shape[0] > 1:
                        # slice the cropped array to avoid weird cnx
                        cropped_arr_slices = slice_array_based_on_condition(cropped_arr, epsilon=50)
                        for arr_slice in cropped_arr_slices:
                            if arr_slice.shape[0] > 1:
                                # interpolate
                                arr_slice = interpolate_to_fixed_length(arr_slice, num_timesteps=128, kind='linear')
                                arr_list.append(arr_slice)
    
    arr_map = np.array(arr_list)
    print(f'Shape of the map array: {arr_map.shape}')
    
    # plot the map
    for idx in range(arr_map.shape[0]):
        plt.plot(arr_map[idx, :, 0], arr_map[idx, :, 1], color='gray', linewidth=0.3)
    
    #dir_path = '/data/tii/data/nuscenes_maps/nuscenes_trainval_maps_npy/'
    #os.makedirs(dir_path, exist_ok=True)
    #full_path = os.path.join(dir_path, sc)
    #new_file_path = full_path.replace('.pkl', '.npy')
    #np.save(new_file_path, arr_map)
    print(f'Map of the scenario {sc} saved in npy format!\n')

##### Normalize 

In [None]:
def list_npy_files(directory):
    """List all .npy files in the specified directory."""
    return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.npy')]

def normalize_and_standardize_excluding_padding(data, mean, std):
    """Normalize and standardize the positions x and y in the data, excluding zero padding."""
    mask = np.any(data != 0, axis=-1)
    normalized_standardized_data = np.copy(data)
    normalized_standardized_data[mask] = (data[mask] - mean) / std
    return normalized_standardized_data

def process_grouped_npy_files(input_directory, output_directory, mean, std):
    """Process each grouped npy file to normalize and standardize the positions and save the modified files."""
    os.makedirs(output_directory, exist_ok=True)
    npy_files = list_npy_files(input_directory)

    for file_path in npy_files:
        data = np.load(file_path)
        normalized_standardized_data = normalize_and_standardize_excluding_padding(data, mean, std)
        output_path = os.path.join(output_directory, os.path.basename(file_path))
        np.save(output_path, normalized_standardized_data)
        print(f"Processed and saved {output_path}")

def main():
    #input_directory = '/data/tii/data/nuscenes_maps/nuscenes_trainval_maps_npy'
    #output_directory = '/data/tii/data/nuscenes_maps/nuscenes_trainval_maps_norm_npy'
    
    mean = [1117.28378752, 1231.38483692]
    std = [600.29805033, 427.54111417]
    print(f'mean and std from padded trainval: (mean: {mean}, std: {std})')
    
    # Process files to normalize and standardize
    process_grouped_npy_files(input_directory, output_directory, mean, std)

if __name__ == "__main__":
    main()

##### Scale

In [None]:
def list_npy_files(directory):
    """List all .npy files in the specified directory."""
    return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.npy')]

def process_grouped_npy_files(input_directory, output_directory, scale_factor):
    """Process each grouped npy file to normalize and standardize the positions and save the modified files."""
    os.makedirs(output_directory, exist_ok=True)
    npy_files = list_npy_files(input_directory)

    for file_path in npy_files:
        data = np.load(file_path)
        scaled_data = data * scale_factor
        output_path = os.path.join(output_directory, os.path.basename(file_path))
        np.save(output_path, scaled_data)
        print(f"Processed and saved {output_path}")

def main():
    #input_directory = '/data/tii/data/nuscenes_maps/nuscenes_trainval_maps_norm_npy'
    #output_directory = '/data/tii/data/nuscenes_maps/nuscenes_trainval_maps_norm_npy'

    # Process files to normalize and standardize
    process_grouped_npy_files(input_directory, output_directory, scale_factor=100)

if __name__ == "__main__":
    main()

# Waymo

### Read scenario dict

In [None]:
waymo_pkl_dir = '/data/tii/data/waymo/pkl'
dataset_summary, scenario_ids, mapping = read_dataset_summary(dataset_path=waymo_pkl_dir)

scenario = read_scenario(dataset_path=waymo_pkl_dir, mapping=mapping, scenario_file_name=scenario_ids[0])
print('Scenario keys : ', scenario.keys())

#--- agent tracks
print('\n===== Agent tracks =====') 
print('Scenario tracks keys <=> Agents ids : ', scenario['tracks'].keys())
print('Agent 0 keys : ', scenario['tracks']['0'].keys())
print('Agent 0 state keys : ', scenario['tracks']['0']['state'].keys())
#print(scenario['tracks']['0']['state']['position'])

#--- map features
print('\n===== Map Features =====') 
print('Scenario map features keys : ', scenario['map_features'].keys())
print('Map feature 2 keys : ', scenario['map_features']['2'].keys())
print('Map feature 2 type : ', scenario['map_features']['2']['type'])
#print(scenario['map_features']['2']['polyline'])

### Agent features

In [None]:
def process_scenario(scenario_name, mapping, dataset_path, output_dir):
    scenario = read_scenario(dataset_path=dataset_path, mapping=mapping, scenario_file_name=scenario_name)

    scenario_list = []
    for track_data in scenario['tracks'].values():
        if track_data['type'] in {'PEDESTRIAN', 'CYCLIST', 'VEHICLE'}:
            xyz = track_data['state']['position'] 
            agent = np.column_stack((xyz[:, 0], xyz[:, 1]))  # Modify with additional state parameters as needed
            scenario_list.append(agent)

    scenario_npy = np.array(scenario_list)

    # Save processed scenario
    os.makedirs(output_dir, exist_ok=True)
    np.save(os.path.join(output_dir, scenario_name.replace('.pkl', '.npy')), scenario_npy)
    print(f'Scenario of shape {scenario_npy.shape} and id "{scenario_name}" has been saved!')


def main():
    dataset_path = '/data/tii/data/waymo/pkl/'
    output_dir = '/data/tii/data/waymo/npy/'
    _, scenario_ids, mapping = read_dataset_summary(dataset_path=dataset_path)
    
    for scenario_name in scenario_ids:
        process_scenario(scenario_name, mapping, dataset_path, output_dir)       

if __name__ == "__main__":
    main()

### Map features

##### Visualize

In [None]:
# Initialize the plot
# only polylines
plt.figure(figsize=(10, 10))

for k in scenario['map_features'].keys():
    #if scenario['map_features'][k]['type'] in ['ROAD_LINE_BROKEN_SINGLE_WHITE', 'ROAD_LINE_SOLID_SINGLE_YELLOW', 'LANE_SURFACE_STREET', 'LANE_SURFACE_UNSTRUCTURE']:
        for kk in scenario['map_features'][k].keys():
            if kk == 'polyline':
                polyline_arr = scenario['map_features'][k][kk]
                # Plot the polyline
                plt.plot(polyline_arr[:, 0], polyline_arr[:, 1], color='gray', linewidth=0.3)
            elif kk == 'polygon':
                polygon_arr = scenario['map_features'][k][kk]
                # Plot the polygon
                plt.plot(polygon_arr[:, 0], polygon_arr[:, 1], color='gray', linewidth=0.3)

tracks = scenario['tracks']
for idx, (id, track) in enumerate(tracks.items()):
    #print(id)
    #-- ('PEDESTRIAN', 'CYCLIST', 'TRAFFIC_CONE', 'TRAFFIC_BARRIER', 'VEHICLE')
    if track['type'] in ['PEDESTRIAN', 'CYCLIST', 'VEHICLE']:
        xyz = track['state']['position']  #-- array of shape (seq_length, 3)
        # Filter out invalid points
        valid_points = xyz[(xyz[:, 0] != 0.0) & (xyz[:, 1] != 0.0)]
        if valid_points.shape[0] > 0:
            plt.plot(valid_points[:, 0], valid_points[:, 1], color='b')
        

# Remove axes
plt.show()
plt.close()

##### Extract

In [60]:
def interpolate_to_fixed_length(trajectory, num_timesteps, kind):
    """
    Interpolate a variable-length trajectory to a fixed number of timesteps.

    Parameters:
    trajectory (np.ndarray): Input trajectory of shape (L, 2), where L is the number of timesteps.
    num_timesteps (int): The number of timesteps to interpolate to (default is 10).
    kind: choose from 'linear', 'quadratic', 'cubic'.
    Returns:
    np.ndarray: Interpolated trajectory of shape (num_timesteps, 2).
    """
    L = trajectory.shape[0]  # Original length of the trajectory

    # Original timesteps
    original_timesteps = np.linspace(0, L - 1, L)

    # New timesteps to interpolate to
    new_timesteps = np.linspace(0, L - 1, num_timesteps)

    # Interpolated trajectory
    interpolated_trajectory = np.zeros((num_timesteps, 2))

    # Interpolate x and y separately
    for i in range(2):  # for each dimension x and y
        interp_func = interp1d(original_timesteps, trajectory[:, i], kind=kind, fill_value='extrapolate')
        interpolated_trajectory[:, i] = interp_func(new_timesteps)

    return interpolated_trajectory


def slice_array_based_on_condition(arr, epsilon):
    slices = []
    start_idx = 0

    # Iterate through the array to find break points
    for i in range(len(arr) - 1):
        x_diff = abs(arr[i, 0] - arr[i + 1, 0])
        y_diff = abs(arr[i, 1] - arr[i + 1, 1])

        if x_diff > epsilon or y_diff > epsilon:
            # If the condition is met, slice the array
            slices.append(arr[start_idx:i+1])
            start_idx = i + 1

    # Add the last slice
    if start_idx < len(arr):
        slices.append(arr[start_idx:])
    
    return slices

In [None]:
ft_list = []
for sc in scenario_ids[:3]:
    print(f'Map of the scenario {sc} is being processed...')
    scenario = read_scenario(dataset_path=waymo_pkl_dir, mapping=mapping, scenario_file_name=sc)
    plt.figure(figsize=(10, 10))
    
    # Determine the min max position of agents in the scenario 
    tracks = scenario['tracks']
    min_x, min_y = float('inf'), float('inf')
    max_x, max_y = 0., 0.
    for idx, (id, track) in enumerate(tracks.items()):
        xyz = track['state']['position']  #-- array of shape (seq_length, 3)
        # Filter out invalid points
        valid_points = xyz[(xyz[:, 0] != 0.0) & (xyz[:, 1] != 0.0)]
        if valid_points.shape[0] > 0:
            if max(valid_points[:, 0]) > max_x:
                max_x = max(valid_points[:, 0])
            if max(valid_points[:, 1]) > max_y:
                max_y = max(valid_points[:, 1])
            if min(valid_points[:, 0]) < min_x:
                min_x = min(valid_points[:, 0])
            if min(valid_points[:, 1]) < min_y:
                min_y = min(valid_points[:, 1])
        # plot the agents traj
        plt.plot(valid_points[:, 0], valid_points[:, 1], color='b')

    # Crop the map feature of the scenario and filter undesired points 
    arr_list = []
    for k in scenario['map_features'].keys():
        for kk in scenario['map_features'][k].keys():
            if kk in ['polyline', 'polygon']:
                arr = scenario['map_features'][k][kk]
                arr = arr[:, :2]
                if arr.shape[0]>1 :
                    # interpolate 
                    arr = interpolate_to_fixed_length(arr, num_timesteps=50000, kind='linear')
                    # crop to max_x max_y
                    cropped_arr = arr[
                        (arr[:, 0] >= min_x - 50) & 
                        (arr[:, 0] <= max_x + 50) &
                        (arr[:, 1] >= min_y - 50) &
                        (arr[:, 1] <= max_y + 50)]
                    
                    if cropped_arr.shape[0] > 1:
                        # slice the cropped array to avoid weird cnx
                        cropped_arr_slices = slice_array_based_on_condition(cropped_arr, epsilon=50)
                        for arr_slice in cropped_arr_slices:
                            if arr_slice.shape[0] > 1:
                                # interpolate
                                arr_slice = interpolate_to_fixed_length(arr_slice, num_timesteps=128, kind='linear')
                                arr_list.append(arr_slice)
    
    arr_map = np.array(arr_list)
    print(f'Shape of the map array: {arr_map.shape}')
    
    #-- plot the map
    for idx in range(arr_map.shape[0]):
        plt.plot(arr_map[idx, :, 0], arr_map[idx, :, 1], color='gray')
    plt.show()
    plt.close()
    
    #dir_path = '/data/tii/data/waymo/maps/npy'
    #os.makedirs(dir_path, exist_ok=True)
    #full_path = os.path.join(dir_path, sc)
    #new_file_path = full_path.replace('.pkl', '.npy')
    #np.save(new_file_path, arr_map)
    print(f'Map of the scenario {sc} saved in npy format!\n')

##### Normalize 

In [None]:
def list_npy_files(directory):
    """List all .npy files in the specified directory."""
    return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.npy')]

def normalize_and_standardize_excluding_padding(data, mean, std):
    """Normalize and standardize the positions x and y in the data, excluding zero padding."""
    mask = np.any(data != 0, axis=-1)
    normalized_standardized_data = np.copy(data)
    normalized_standardized_data[mask] = (data[mask] - mean) / std
    return normalized_standardized_data

def process_grouped_npy_files(input_directory, output_directory, mean, std):
    """Process each grouped npy file to normalize and standardize the positions and save the modified files."""
    os.makedirs(output_directory, exist_ok=True)
    npy_files = list_npy_files(input_directory)

    for file_path in npy_files:
        data = np.load(file_path)
        normalized_standardized_data = normalize_and_standardize_excluding_padding(data, mean, std)
        output_path = os.path.join(output_directory, os.path.basename(file_path))
        np.save(output_path, normalized_standardized_data)
        print(f"Processed and saved {output_path}")

def main():
    #input_directory = '/data/tii/data/waymo/maps/npy/'
    #output_directory = '/data/tii/data/waymo/maps/norm_npy/'
    
    #-- modify these values with the mean and std from the padded scenarios
    mean = [1645.6687,  568.7339]
    std = [5140.911, 6306.616]
    print(f'mean and std from padded trainval: (mean: {mean}, std: {std})')
    
    # Process files to normalize and standardize
    process_grouped_npy_files(input_directory, output_directory, mean, std)

if __name__ == "__main__":
    main()

##### Scale 

In [None]:
def list_npy_files(directory):
    """List all .npy files in the specified directory."""
    return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.npy')]

def process_grouped_npy_files(input_directory, output_directory, scale_factor):
    """Process each grouped npy file to normalize and standardize the positions and save the modified files."""
    os.makedirs(output_directory, exist_ok=True)
    npy_files = list_npy_files(input_directory)

    for file_path in npy_files:
        data = np.load(file_path)
        scaled_data = data * scale_factor
        output_path = os.path.join(output_directory, os.path.basename(file_path))
        np.save(output_path, scaled_data)
        print(f"Processed and saved {output_path}")

def main():
    #input_directory = '/data/tii/data/waymo/maps/norm_npy/'
    #output_directory = '/data/tii/data/waymo/maps/norm_npy/'

    # Process files to normalize and standardize
    process_grouped_npy_files(input_directory, output_directory, scale_factor=100)

if __name__ == "__main__":
    main()

# Argoverse 2

### Read scenario dict

In [None]:
argoverse_pkl_dir = '/data/tii/data/argoverse/pkl/train_pkl'
dataset_summary, scenario_ids, mapping = read_dataset_summary(dataset_path=argoverse_pkl_dir)

scenario = read_scenario(dataset_path=argoverse_pkl_dir, mapping=mapping, scenario_file_name=scenario_ids[0])

#--- scenario keys
print('Scenario keys : ', scenario.keys())
print('Scenario length : ', scenario['length'])

#--- agent tracks
print('\n===== Agent tracks =====') 
print('Scenario tracks keys <=> Agents ids : ', scenario['tracks'].keys())
print('Agent 50513 keys : ', scenario['tracks']['50513'].keys())
print('Agent 50513 state keys : ', scenario['tracks']['50513']['state'].keys())
#print(scenario['tracks']['50513']['state']['position'])

#--- map features
print('\n===== Map Features =====') 
print('Scenario map features keys : ', scenario['map_features'].keys())
print('Map feature 781506721 keys : ', scenario['map_features']['781506721'].keys())
print('Map feature 781506721 type : ', scenario['map_features']['781506721']['type'])
#print(scenario['map_features']['781506721']['polyline'])

### Agent features

In [None]:
def process_scenario(scenario_name, mapping, dataset_path, output_dir):
    scenario = read_scenario(dataset_path=dataset_path, mapping=mapping, scenario_file_name=scenario_name)

    scenario_list = []
    for track_data in scenario['tracks'].values():
        if track_data['type'] in {'PEDESTRIAN', 'CYCLIST', 'VEHICLE'}:
            xyz = track_data['state']['position'] 
            agent = np.column_stack((xyz[:, 0], xyz[:, 1]))  # Modify with additional state parameters as needed
            scenario_list.append(agent)

    scenario_npy = np.array(scenario_list)

    # Save processed scenario
    os.makedirs(output_dir, exist_ok=True)
    np.save(os.path.join(output_dir, scenario_name.replace('.pkl', '.npy')), scenario_npy)
    print(f'Scenario of shape {scenario_npy.shape} and id "{scenario_name}" has been saved!')


def main():
    dataset_path = '/data/tii/data/argoverse/pkl/val_pkl/'
    output_dir = '/data/tii/data/argoverse/npy/val_npy/'
    _, scenario_ids, mapping = read_dataset_summary(dataset_path=dataset_path)
    
    for scenario_name in scenario_ids:
        process_scenario(scenario_name, mapping, dataset_path, output_dir)       

if __name__ == "__main__":
    main()


### Map features

##### Visualize

In [None]:
scenario = read_scenario(dataset_path=argoverse_pkl_dir, mapping=mapping, scenario_file_name=scenario_ids[4])

# Initialize the plot
# only polylines
plt.figure(figsize=(10, 10))

for k in scenario['map_features'].keys():
    #if scenario['map_features'][k]['type'] in ['ROAD_LINE_BROKEN_SINGLE_WHITE', 'ROAD_LINE_SOLID_SINGLE_YELLOW', 'LANE_SURFACE_STREET', 'LANE_SURFACE_UNSTRUCTURE']:
        for kk in scenario['map_features'][k].keys():
            if kk == 'polyline':
                polyline_arr = scenario['map_features'][k][kk]
                # Plot the polyline
                plt.plot(polyline_arr[:, 0], polyline_arr[:, 1], color='gray', linewidth=0.3)
            elif kk == 'polygon':
                polygon_arr = scenario['map_features'][k][kk]
                # Plot the polygon
                plt.plot(polygon_arr[:, 0], polygon_arr[:, 1], color='gray', linewidth=0.3)

tracks = scenario['tracks']
for idx, (id, track) in enumerate(tracks.items()):
    #print(id)
    #-- ('PEDESTRIAN', 'CYCLIST', 'TRAFFIC_CONE', 'TRAFFIC_BARRIER', 'VEHICLE')
    if track['type'] in ['PEDESTRIAN', 'CYCLIST', 'VEHICLE']:
        xyz = track['state']['position']  #-- array of shape (seq_length, 3)
        # Filter out invalid points
        valid_points = xyz[(xyz[:, 0] != 0.0) & (xyz[:, 1] != 0.0)]
        if valid_points.shape[0] > 0:
            plt.plot(valid_points[:, 0], valid_points[:, 1], color='b')
        

# Remove axes
plt.show()
plt.close()

##### Extract

In [None]:
def interpolate_to_fixed_length(trajectory, num_timesteps, kind):
    """
    Interpolate a variable-length trajectory to a fixed number of timesteps.

    Parameters:
    trajectory (np.ndarray): Input trajectory of shape (L, 2), where L is the number of timesteps.
    num_timesteps (int): The number of timesteps to interpolate to (default is 10).
    kind: choose from 'linear', 'quadratic', 'cubic'.
    Returns:
    np.ndarray: Interpolated trajectory of shape (num_timesteps, 2).
    """
    L = trajectory.shape[0]  # Original length of the trajectory

    # Original timesteps
    original_timesteps = np.linspace(0, L - 1, L)

    # New timesteps to interpolate to
    new_timesteps = np.linspace(0, L - 1, num_timesteps)

    # Interpolated trajectory
    interpolated_trajectory = np.zeros((num_timesteps, 2))

    # Interpolate x and y separately
    for i in range(2):  # for each dimension x and y
        interp_func = interp1d(original_timesteps, trajectory[:, i], kind=kind, fill_value='extrapolate')
        interpolated_trajectory[:, i] = interp_func(new_timesteps)

    return interpolated_trajectory


def slice_array_based_on_condition(arr, epsilon):
    slices = []
    start_idx = 0

    # Iterate through the array to find break points
    for i in range(len(arr) - 1):
        x_diff = abs(arr[i, 0] - arr[i + 1, 0])
        y_diff = abs(arr[i, 1] - arr[i + 1, 1])

        if x_diff > epsilon or y_diff > epsilon:
            # If the condition is met, slice the array
            slices.append(arr[start_idx:i+1])
            start_idx = i + 1

    # Add the last slice
    if start_idx < len(arr):
        slices.append(arr[start_idx:])
    
    return slices

In [None]:
for sc in scenario_ids[:3]:
    print(f'Map of the scenario {sc} is being processed...')
    scenario = read_scenario(dataset_path=argoverse_pkl_dir, mapping=mapping, scenario_file_name=sc)
    plt.figure(figsize=(10, 10))
    
    # Determine the min max position of agents in the scenario 
    tracks = scenario['tracks']
    min_x, min_y = float('inf'), float('inf')
    max_x, max_y = 0., 0.
    for idx, (id, track) in enumerate(tracks.items()):
        xyz = track['state']['position']  #-- array of shape (seq_length, 3)
        # Filter out invalid points
        valid_points = xyz[(xyz[:, 0] != 0.0) & (xyz[:, 1] != 0.0)]
        if valid_points.shape[0] > 0:
            if max(valid_points[:, 0]) > max_x:
                max_x = max(valid_points[:, 0])
            if max(valid_points[:, 1]) > max_y:
                max_y = max(valid_points[:, 1])
            if min(valid_points[:, 0]) < min_x:
                min_x = min(valid_points[:, 0])
            if min(valid_points[:, 1]) < min_y:
                min_y = min(valid_points[:, 1])
        # plot the agents traj
        plt.plot(valid_points[:, 0], valid_points[:, 1], color='b')

    # Crop the map feature of the scenario and filter undesired points 
    arr_list = []
    for k in scenario['map_features'].keys():
        for kk in scenario['map_features'][k].keys():
            if kk in ['polyline', 'polygon']:
                arr = scenario['map_features'][k][kk]
                arr = arr[:, :2]
                if arr.shape[0]>1 :
                    # interpolate 
                    arr = interpolate_to_fixed_length(arr, num_timesteps=50000, kind='linear')
                    # crop to max_x max_y
                    cropped_arr = arr[
                        (arr[:, 0] >= min_x - 50) & 
                        (arr[:, 0] <= max_x + 50) &
                        (arr[:, 1] >= min_y - 50) &
                        (arr[:, 1] <= max_y + 50)]
                    
                    if cropped_arr.shape[0] > 1:
                        # slice the cropped array to avoid weird cnx
                        cropped_arr_slices = slice_array_based_on_condition(cropped_arr, epsilon=50)
                        for arr_slice in cropped_arr_slices:
                            if arr_slice.shape[0] > 1:
                                # interpolate
                                arr_slice = interpolate_to_fixed_length(arr_slice, num_timesteps=128, kind='linear')
                                arr_list.append(arr_slice)
    
    arr_map = np.array(arr_list)
    print(f'Shape of the map array: {arr_map.shape}')
    
    #-- plot the map
    for idx in range(arr_map.shape[0]):
        plt.plot(arr_map[idx, :, 0], arr_map[idx, :, 1], color='gray', linewidth=0.3)
    plt.show()
    plt.close()
    
    #dir_path = '/data/tii/data/waymo/maps/npy'
    #os.makedirs(dir_path, exist_ok=True)
    #full_path = os.path.join(dir_path, sc)
    #new_file_path = full_path.replace('.pkl', '.npy')
    #np.save(new_file_path, arr_map)
    print(f'Map of the scenario {sc} saved in npy format!\n')

##### Normalize

In [None]:
def list_npy_files(directory):
    """List all .npy files in the specified directory."""
    return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.npy')]

def normalize_and_standardize_excluding_padding(data, mean, std):
    """Normalize and standardize the positions x and y in the data, excluding zero padding."""
    mask = np.any(data != 0, axis=-1)
    normalized_standardized_data = np.copy(data)
    normalized_standardized_data[mask] = (data[mask] - mean) / std
    return normalized_standardized_data

def process_grouped_npy_files(input_directory, output_directory, mean, std):
    """Process each grouped npy file to normalize and standardize the positions and save the modified files."""
    os.makedirs(output_directory, exist_ok=True)
    npy_files = list_npy_files(input_directory)

    for file_path in npy_files:
        data = np.load(file_path)
        normalized_standardized_data = normalize_and_standardize_excluding_padding(data, mean, std)
        output_path = os.path.join(output_directory, os.path.basename(file_path))
        np.save(output_path, normalized_standardized_data)
        print(f"Processed and saved {output_path}")

def main():
    #input_directory = '/data/tii/data/argoverse/maps/npy/'
    #output_directory = '/data/tii/data/waymo/argoverse/norm_npy/'
    
    #-- modify these values with the mean and std from the padded scenarios
    #-- train_npy
    mean = [2486.6545, 1101.2592]
    std = [2729.8386, 1496.0399]
    #-- val_npy
    mean = [2926.3154, 1114.3304]
    std = [3138.867,  1613.2261]
    
    print(f'mean and std from padded trainval: (mean: {mean}, std: {std})')
    
    # Process files to normalize and standardize
    process_grouped_npy_files(input_directory, output_directory, mean, std)

if __name__ == "__main__":
    main()

##### Scale

In [None]:
def list_npy_files(directory):
    """List all .npy files in the specified directory."""
    return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.npy')]

def process_grouped_npy_files(input_directory, output_directory, scale_factor):
    """Process each grouped npy file to normalize and standardize the positions and save the modified files."""
    os.makedirs(output_directory, exist_ok=True)
    npy_files = list_npy_files(input_directory)

    for file_path in npy_files:
        data = np.load(file_path)
        scaled_data = data * scale_factor
        output_path = os.path.join(output_directory, os.path.basename(file_path))
        np.save(output_path, scaled_data)
        print(f"Processed and saved {output_path}")

def main():
    #input_directory = '/data/tii/data/argoverse/maps/norm_npy/'
    #output_directory = '/data/tii/data/argoverse/maps/norm_npy/'

    # Process files to normalize and standardize
    process_grouped_npy_files(input_directory, output_directory, scale_factor=100)

if __name__ == "__main__":
    main()

# Agents preprocessing

#### 1- Sample the sequence length to 13. 

In [None]:
def list_npy_files(directory):
    """List all .npy files in the specified directory."""
    return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.npy')]

def process_npy_files(input_directory, output_directory, step=12):
    """Process each grouped npy file to replace padded values and save the modified files."""
    os.makedirs(output_directory, exist_ok=True)
    npy_files = list_npy_files(input_directory)

    for file_path in npy_files:
        data = np.load(file_path)
        print('Data shape', data.shape)
        data_sampled = data[:, ::step, :]
        print('Sampled data shape: ', data_sampled.shape)
        output_path = os.path.join(output_directory, os.path.basename(file_path))
        np.save(output_path, data_sampled)
        print(f"Cropped, sampled, and saved {output_path}")

def main():
    input_directory = '/data/tii/data/argoverse/npy'
    output_directory = '/data/tii/data/argoverse/npy_prep'

    process_npy_files(input_directory, output_directory, step=9) # step=12 for nuscnes, 16 for waymo, and 9 for argoverse

if __name__ == "__main__":
    main()

#### 2- Filter full padded agents and out-of-range scenarios

In [None]:
def remove_zero_agents(scenario):
    """
    Remove agents from the scenario where all values in all dimensions are zero across the sequence length.
    
    Parameters:
    - scenario: numpy array of shape (N, L, D), where N is the number of agents, L is the sequence length, and D is the dimension.

    Returns:
    - Filtered numpy array with zero agents removed.
    """
    # Identify agents where all values are zero across the entire sequence length and dimensions
    non_zero_agents = ~np.all(scenario == 0.0, axis=(1, 2))  # Shape will be (N,), where True indicates non-zero agent
    
    # Filter out agents that are all-zero
    filtered_scenario = scenario[non_zero_agents]
    return filtered_scenario


# Directory where scenarios are stored
data_dir = '/data/tii/data/argoverse/npy_prep'

# Iterate over each file in the directory
for filename in os.listdir(data_dir):
    file_path = os.path.join(data_dir, filename)
    scenario = np.load(file_path)
    
    # Remove all-zero agents
    filtered_scenario = remove_zero_agents(scenario)
    
    # Save the filtered scenario
    output_path = os.path.join(data_dir, filename)
    np.save(output_path, filtered_scenario)
    
    print(f"Processed and saved {filename} with shape original shape {scenario.shape} and new shape {filtered_scenario.shape}")


In [None]:
# Directory where scenarios are stored
#data_dir = '/data/tii/data/waymo/npy'
#output_dir = '/data/tii/data/waymo/npy_'
os.makedirs(output_dir, exist_ok=True)

# Set threshold for filtering scenarios (e.g., values in the y dimension should not exceed 81127)
threshold = 81127

# Iterate over each file in the directory
for filename in os.listdir(data_dir):
    file_path = os.path.join(data_dir, filename)
    scenario = np.load(file_path)
    
    # Check if any value in the y dimension (index 1) exceeds the threshold
    if np.any(scenario[:, :, 1] > threshold) or np.any(scenario[:, :, 1] < -threshold):
        print(f'Scenario {filename} discarded due to out-of-range values')
    else:
        # Save the filtered scenario
        output_path = os.path.join(output_dir, filename)
        np.save(output_path, scenario)
        print(f'Scenario {filename} saved')


#### 3- Filter stastionary agents from prep dataset

In [None]:
def calculate_total_movement(data_part):
    # Mask for non-padded steps
    non_padded_mask = ~(np.all(data_part == 0., axis=2))  
    
    # Calculate differences between consecutive positions
    diffs = np.diff(data_part, axis=1) 
    
    # Create valid mask for differences (True only if both steps are non-padded)
    valid_diffs_mask = non_padded_mask[:, :-1] & non_padded_mask[:, 1:]
    valid_diffs = diffs * valid_diffs_mask[:, :, np.newaxis]
    
    # Calculate Euclidean distance for each valid difference
    distances = np.linalg.norm(valid_diffs, axis=2)
    total_movement = distances.sum(axis=1)
    return total_movement

def remove_stationary_agents(data, min_distance):
    """
    Remove stationary agents from a dataset while ignoring padded data.

    Parameters:
        data (numpy.ndarray): A dataset containing trajectories of agents.
                              Expected shape is (N, L, D) where N is the number of agents,
                              L is the sequence length, and D is the dimension (e.g., x, y positions).
        min_distance (float): The minimum total distance an agent must move to not be considered stationary.

    Returns:
        numpy.ndarray: A new dataset with stationary agents removed.
    """
    # Split data into history and future parts
    data_history = data[:, :8, :]  # Shape: (N, 8, D)
    data_future = data[:, 8:, :]   # Shape: (N, L-8, D)
    
    history_total_movement = calculate_total_movement(data_history)
    future_total_movement = calculate_total_movement(data_future)
    
    # Identify non-stationary agents in both history and future
    non_stationary_agents = (history_total_movement > min_distance) & (future_total_movement > min_distance)
    
    # Filter out the stationary agents
    filtered_data = data[non_stationary_agents]

    return filtered_data, history_total_movement, future_total_movement


def main():
    # set at the minimum distance travelled 
    # for historical and future trajectories per agent
    min_distance=4
    input_dir = '/data/tii/data/nuscenes/npy_prep'
    output_dir = '/data/tii/data/nuscenes/npy_clean'
    os.makedirs(output_dir, exist_ok=True)
    
    history_total_movement_list, future_total_movement_list = [], []
    for filename in os.listdir(input_dir):
        data = np.load(os.path.join(input_dir, filename))
        filtered_data, history_total_movement, future_total_movement = remove_stationary_agents(data, min_distance=min_distance)
        
        history_total_movement_list.append(history_total_movement)
        future_total_movement_list.append(future_total_movement)
        
        if filtered_data.shape[0] != 0:  # Only save non-empty filtered data
            output_path = os.path.join(output_dir, filename)
            np.save(output_path, filtered_data)
            print(f"Shape of the original data {data.shape} and the filtered data {filtered_data.shape}")
            print(f"Processed and saved {output_path}")
        else:
            print(f"All agents are stationnary in {filename}, so the scenario wasn't retained")
    
    #flattened_history = np.concatenate(history_total_movement_list)
    #flattened_history = flattened_history[flattened_history > min_distance]
    #flattened_future = np.concatenate(future_total_movement_list)
    #flattened_future = flattened_future[flattened_future > min_distance]
    #mean_future = np.mean(flattened_future)
    #mean_history = np.mean(flattened_history)

    #plt.figure(figsize=(10, 6))
    #plt.hist(flattened_history, bins=10, edgecolor='g', alpha=0.3, label="History Movement")
    #plt.hist(flattened_future, bins=10, edgecolor='b', alpha=0.3, label="Future Movement")

    #plt.axvline(mean_history, color='g', linestyle='dashed', linewidth=1.5, label=f"History Mean: {mean_history:.2f}")
    #plt.axvline(mean_future, color='b', linestyle='dashed', linewidth=1.5, label=f"Future Mean: {mean_future:.2f}")

    #plt.title("Histogram of Total Movement for Future and History Trajectories")
    #plt.xlabel("Total Movement")
    #plt.ylabel("Frequency")
    #plt.legend()
    #plt.grid(axis='y', linestyle='--', alpha=0.7)
    #plt.show()
    
              
if __name__ == "__main__":
    main()


In [None]:
data_real_dir = '/data/tii/data/nuscenes/npy_clean'
max_plots = 100
figsize = (10, 10)
#fig = plt.figure(figsize=figsize)
for l_real in sorted(os.listdir(data_real_dir))[0:850:8]:
    print(l_real)
    data_real = np.load(os.path.join(data_real_dir, l_real))
    data_hist, data_future = data_real[:, :8, :], data_real[:, 7:, :]
    print(f'Real data has shape: {data_real.shape}')

    # Loop through each scene and plot separately
    plt.figure(figsize=figsize) 
    for ag in [i for i in range(data_hist.shape[0])]:  # Loop through each agent in the scene   
        agent_hist, agent_future = data_hist[ag], data_future[ag] 
        
        valid_agent_future = agent_future[(agent_future[:, 0] != 0.0) & (agent_future[:, 1] != 0.0)]
        plt.plot(valid_agent_future[:, 0], valid_agent_future[:, 1], 'g-o', linewidth=1.2, markersize=2.8)
        
        valid_agent_hist = agent_hist[(agent_hist[:, 0] != 0.0) & (agent_hist[:, 1] != 0.0)]
        plt.plot(valid_agent_hist[:, 0], valid_agent_hist[:, 1], 'k-o', linewidth=1.2, markersize=2.8)
    plt.show() 
    plt.close()
        
#plt.show() 
#plt.close()

#### 4- Standardize the x,y positions through a dataset specific mean and std

In [None]:
def calculate_dataset_statistics(input_dir):
    """ 
    Calculate Mean/Std and Min/Max values for x and y positions 
    throughout the dataset, excluding zero padding.
    """
    scenario_list = []
    for filename in os.listdir(input_dir):
        file_path = os.path.join(input_dir, filename)
        scenario = np.load(file_path)
        #scenario_list.append(scenario.reshape(-1, 2)) # if including padding 
        mask = np.any(scenario != 0, axis=-1)
        scenario_nopad = scenario[mask]
        scenario_list.append(scenario_nopad)    
    print('The dataset contains', len(scenario_list), 'scenarios.')
    dataset = np.concatenate(scenario_list, axis=0)
    mean = np.mean(dataset, axis=0)
    std = np.std(dataset, axis=0)
    min = np.min(dataset, axis=0)
    max = np.max(dataset, axis=0)
    print(f'Mean: {mean}, Std: {std}, Min: {min}, Max: {max}')
    return mean, std, min, max
 
def normalize(data, min, max):
    """Normalize data xy, excluding padding."""
    mask = np.any(data != 0, axis=-1)
    data_p = np.copy(data)
    data_p[mask] = 2 * (data[mask] - min) / (max - min) -1
    return data_p

def standardize(data, mean, std):
    """Standardize data xy, excluding padding."""
    mask = np.any(data != 0, axis=-1)
    data_p = np.copy(data)
    data_p[mask] = (data[mask] - mean) / std
    return data_p


def main():
    input_dir = '/data/tii/data/argoverse/npy_clean'
    output_dir = '/data/tii/data/argoverse/npy_stand'
    os.makedirs(output_dir, exist_ok=True)
    
    # Step 1: Calculate global min, max, mean, and std
    mean, std, min, max = calculate_dataset_statistics(input_dir)
    
    # Step 2: Process each scenario
    for filename in os.listdir(input_dir):
        file_path = os.path.join(input_dir, filename)
        scenario = np.load(file_path)
        
        # Normalize scenario:
        #scenario = normalize(scenario, min, max)
        
        # Standardize scenario:
        scenario = standardize(scenario, mean, std)
        
        # Save processed scenario
        output_path = os.path.join(output_dir, filename)
        np.save(output_path, scenario)
        print(f'Scenario {filename} processed and saved.')
    print("Processing complete.")

if __name__ == "__main__":
    main()


#### 5- Scale the standardized dataset

In [None]:
def main():
    scale_factor = 100  # choose the scale to avoid values near zero 
    #input_dir = '/data/tii/data/argoverse/3_npy_stand'
    #output_dir = '/data/tii/data/argoverse/4_npy_scaled_100'
    os.makedirs(output_dir, exist_ok=True)

    # Step 1: Process each scenario
    for filename in os.listdir(input_dir):
        file_path = os.path.join(input_dir, filename)
        scenario = np.load(file_path)
        
        # Scale scenario:
        scaled_scenario = scenario * scale_factor
        
        # Save processed scenario
        output_path = os.path.join(output_dir, filename)
        np.save(output_path, scaled_scenario)
        print(f'Scenario {filename} scaled and saved.')
        
    print("Scenarios scaling complete.")

if __name__ == "__main__":
    main()

#### 6- Visualize histogram of x or y positions

In [None]:
input_directory = '/data/tii/data/waymo/npy_stand'

# Initialize an empty list to accumulate y positions
all_y_positions = []

# Iterate over each file in the directory and accumulate y positions
for filename in os.listdir(input_directory):
    file_path = os.path.join(input_directory, filename)
    data = np.load(file_path)
    
    # Extract y positions and append to the list
    y_positions = data[:, :, 1].flatten()
    all_y_positions.extend(y_positions)

# Convert the accumulated y positions to a numpy array for plotting
all_y_positions = np.array(all_y_positions)

# Plot the histogram
plt.figure(figsize=(10, 6))
plt.hist(all_y_positions, bins=100, color='skyblue', edgecolor='black', alpha=0.7)
plt.title('Histogram of Y Positions Across Dataset')
plt.xlabel('Y Position')
plt.ylabel('Frequency')
plt.yscale('log')  # Use log scale for better visibility of distribution
plt.grid(axis='y', linestyle='--', linewidth=0.7)
plt.axvline(0, color='red', linestyle='--', label='Y = 0')
plt.legend()

# Set x-axis ticks to help visualize index range
max_value = int(np.ceil(np.max(np.abs(all_y_positions))))
plt.xticks(np.linspace(-max_value, max_value, num=10, endpoint=True))

# Show the plot
plt.show()


#### 7- train test split from main dir

In [25]:
def split_dataset(main_dir, train_dir, test_dir, train_ratio=0.8):
    """
    Split files from a main dataset directory into train and test directories with a given ratio.

    Parameters:
        main_dir (str): Path to the main dataset directory containing the files.
        train_dir (str): Path to the train directory where 80% of the files will be moved.
        test_dir (str): Path to the test directory where 20% of the files will be moved.
        train_ratio (float): Proportion of files to move to the train directory (default is 0.8 for 80%).
    """
    # Get list of all files in the main directory
    all_files = os.listdir(main_dir)
    total_files = len(all_files)

    # Shuffle the file list for randomness
    random.shuffle(all_files)

    # Calculate split index
    split_index = int(total_files * train_ratio)

    # Split files for train and test
    train_files = all_files[:split_index]
    test_files = all_files[split_index:]

    # Create train and test directories if they don't exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Move files to train directory
    for file_name in train_files:
        shutil.copy(os.path.join(main_dir, file_name), os.path.join(train_dir, file_name))

    # Move files to test directory
    for file_name in test_files:
        shutil.copy(os.path.join(main_dir, file_name), os.path.join(test_dir, file_name))

    print(f"Copied {len(train_files)} files to {train_dir}")
    print(f"Copied {len(test_files)} files to {test_dir}")

# Example usage
main_dir = "/data/tii/data/nuscenes/4_npy_scaled_100"
train_dir = "/data/tii/data/autod/train_merged_scaled_100"
test_dir = "/data/tii/data/autod/test_merged_scaled_100"
split_dataset(main_dir, train_dir, test_dir)


Copied 674 files to /data/tii/data/autod/train_merged_scaled_100
Copied 169 files to /data/tii/data/autod/test_merged_scaled_100


#### 8- Pad scenarios to max number of agents

In [14]:
# Paths to your dataset
input_directory = '/data/tii/data/nuscenes/3_npy_stand'

# Step 1: Determine the maximum, minimum, second minimum, and third minimum N
max_N = 0
min_N = float('inf')
unique_N = set()  # To store unique N values

for filename in os.listdir(input_directory):
    if filename.endswith('.npy'):
        file_path = os.path.join(input_directory, filename)
        data = np.load(file_path)
        N, L, D = data.shape
        # Update max and min N
        max_N = max(max_N, N)
        min_N = min(min_N, N)
        # Store the unique N values
        unique_N.add(N)

# Convert the set of unique N values to a sorted list
sorted_N = sorted(unique_N)

# Get the second and third minimum if they exist
second_min_N = sorted_N[1] if len(sorted_N) > 1 else None
third_min_N = sorted_N[2] if len(sorted_N) > 2 else None

# Get the second and third maximum if they exist
second_max_N = sorted_N[-2] if len(sorted_N) > 1 else None
third_max_N = sorted_N[-3] if len(sorted_N) > 2 else None

print(f"Maximum number of agents (N) across all files: {max_N}")
print(f"Second maximum number of agents (N): {second_max_N}")
print(f"Third maximum number of agents (N): {third_max_N}")

print(f"Minimum number of agents (N) across all files: {min_N}")
print(f"Second minimum number of agents (N): {second_min_N}")
print(f"Third minimum number of agents (N): {third_min_N}")

Maximum number of agents (N) across all files: 44
Second maximum number of agents (N): 40
Third maximum number of agents (N): 33
Minimum number of agents (N) across all files: 1
Second minimum number of agents (N): 2
Third minimum number of agents (N): 3


In [None]:
# Paths to your dataset
#input_directory = '/data/tii/data/argoverse/npy'
#output_directory = '/data/tii/data/argoverse/npy_prep'
os.makedirs(output_directory, exist_ok=True)

max_N = 49
# Step 2: Pad each file to max_N
for filename in os.listdir(input_directory):
    if filename.endswith('.npy'):
        file_path = os.path.join(input_directory, filename)
        data = np.load(file_path)
        N, L, D = data.shape
        
        if N < max_N:
            # Create a new array with the shape (max_N, L, D) and fill it with zeros
            padded_data = np.zeros((max_N, L, D))
            
            # Copy the original data into the new array
            padded_data[:N, :, :] = data
            
            # Save the padded data
            output_path = os.path.join(output_directory, filename)
            np.save(output_path, padded_data)
        else:
            # If no padding is needed, just copy the original file to the output directory
            output_path = os.path.join(output_directory, filename)
            np.save(output_path, data)

print("Padding completed and saved to the output directory.")

#### 0- Other

##### Move/Rename files from dir_A to dir_D knowing dir_B

In [None]:
import os
import shutil

def move_matching_files(dir_A, dir_B, dir_D):
    """
    Move files from dir_A to dir_D where the filenames match those in dir_B.

    Parameters:
    dir_A (str): The source directory from which to move files.
    dir_B (str): The reference directory to check for matching filenames.
    dir_D (str): The destination directory where files will be moved.
    """

    # Ensure the destination directory exists
    os.makedirs(dir_D, exist_ok=True)

    # Get the set of filenames in dir_B
    files_in_B = set(os.listdir(dir_B))

    # Iterate over files in dir_A
    for filename in os.listdir(dir_A):
        if filename in files_in_B:
            # If a matching file is found, construct full paths
            source_path = os.path.join(dir_A, filename)
            destination_path = os.path.join(dir_D, filename)

            # Move the file from dir_A to dir_D
            shutil.move(source_path, destination_path)
            print(f"Moved: {filename} from {dir_A} to {dir_D}")

if __name__ == "__main__":
    dir_A = '/data/tii/data/nuscenes/npy_train'  
    dir_B = '/data/tii/data/nuscenes/npy_clean_test'  
    dir_D = '/data/tii/data/nuscenes/npy_test'  

    move_matching_files(dir_A, dir_B, dir_D)


In [None]:
def rename_files(dir_a, dir_b, dir_c):
    # Get sorted list of files from both directories
    files_a = sorted(os.listdir(dir_a))
    files_b = sorted(os.listdir(dir_b))

    # Check if both directories have the same number of files
    if len(files_a) != len(files_b):
        print("Error: The number of files in both directories must be the same.")
        return

    # Rename files in directory B using names from directory A
    for file_a, file_b in zip(files_a, files_b):
        # Get full paths
        path_a = os.path.join(dir_a, file_a)
        path_b = os.path.join(dir_b, file_b)
        
        # Determine the new name for the file in B
        destination_path = os.path.join(dir_c, file_a)
        shutil.move(path_b, destination_path)
        
        # Moved the renamed file in C
        print(f"Renamed {file_b} to {file_a}")

# Example usage
dir_a = '/data/tii/data/nuscenes_trainval_npy'
dir_b = '/data/tii/data/nuscenes_maps/nuscenes_trainval_raster_npy_copy'
dir_c = '/data/tii/data/nuscenes_maps/nuscenes_trainval_raster'

os.makedirs(dir_c, exist_ok=True)
rename_files(dir_a, dir_b, dir_c)

##### Plot npy and rasterized maps

In [None]:
map_dir = '/data/tii/data/nuscenes_maps/nuscenes_trainval_maps_test'
max_plots = 3
figsize = (10, 10)
for l in sorted(os.listdir(map_dir))[:max_plots]:
    print(l)
    data = np.load(os.path.join(map_dir, l))
    # Loop through each scene and plot separately
    plt.figure(figsize=figsize) 
    for ag in [i for i in range(data.shape[0])]:  # Loop through each agent in the scene   
        plt.plot(data[ag, :, 0], data[ag, :, 1], 'gray', linewidth=0.8)
    plt.show() 
    plt.close()  

In [None]:
map_dir = '/data/tii/data/nuscenes_maps/nuscenes_trainval_raster_test'

for l in sorted(os.listdir(map_dir))[:10]:
    print(l)
    data = np.load(os.path.join(map_dir, l))
    fig, axes = plt.subplots(1, 4, figsize=(16, 4))
    for i in range(4):
        axes[i].imshow(data[i], cmap='gray')  # Plot each slice in grayscale
        axes[i].set_title(f'Image {i+1}')  # Set title for each subplot
        axes[i].axis('off')  # Turn off axis labels and ticks

    plt.show()  # Display the plots

##### Replace padded values with saturation

In [None]:
def list_npy_files(directory):
    """List all .npy files in the specified directory."""
    return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.npy')]

def replace_padded_values(data):
    """Replace padded values (0.0) with the first next or previous non-zero (x, y) values."""
    for i in range(data.shape[0]):
        for j in range(data.shape[1]):
            # If current value is zero, find the next or previous non-zero value
            if np.all(data[i, j] == 0):
                # Find next non-zero value
                k = j + 1
                while k < data.shape[1] and np.all(data[i, k] == 0):
                    k += 1
                
                if k < data.shape[1]:
                    data[i, j] = data[i, k]
                else:
                    # If no next non-zero value, find the previous non-zero value
                    k = j - 1
                    while k >= 0 and np.all(data[i, k] == 0):
                        k -= 1
                    
                    if k >= 0:
                        data[i, j] = data[i, k]
    
    return data

def process_grouped_npy_files(input_directory, output_directory):
    """Process each grouped npy file to replace padded values and save the modified files."""
    os.makedirs(output_directory, exist_ok=True)
    npy_files = list_npy_files(input_directory)

    for file_path in npy_files:
        data = np.load(file_path)
        modified_data = replace_padded_values(data)
        output_path = os.path.join(output_directory, os.path.basename(file_path))
        np.save(output_path, modified_data)
        print(f"Processed and saved {output_path}")

def main():
    input_directory = '/data/tii/data/nuscenes_trainval_veh_norm_npy'
    output_directory = '/data/tii/data/nuscenes_trainval_veh_norm_satur_npy'

    process_grouped_npy_files(input_directory, output_directory)

if __name__ == "__main__":
    main()

##### Add padding to seq length

In [None]:
# Define paths
input_directory = '/data/tii/data/nuscenes_trainval_veh_processed_maxag_npy'
output_directory = '/data/tii/data/nuscenes_trainval_veh_final_npy'
os.makedirs(output_directory, exist_ok=True)

# Define the desired sequence length after padding
desired_length = 20

# Function to pad sequences
def pad_sequence(sequence, pad_len, position):
    """
    Pad a sequence to the desired length.
    
    Args:
        sequence (np.ndarray): The original sequence of shape (L, D).
        pad_len (int): The total length to pad the sequence to.
        position (str): Where to add padding. Options are 'beginning', 'middle', 'end', or 'beginning_end'.
    
    Returns:
        np.ndarray: The padded sequence of shape (pad_len, D).
    """
    original_length = sequence.shape[0]
    pad_amount = pad_len - original_length
    pad_before = 0
    pad_middle = 0
    pad_after = 0
    
    if position == 'beginning':
        pad_before = pad_amount
    elif position == 'middle':
        pad_middle = pad_amount
    elif position == 'end':
        pad_after = pad_amount
    elif position == 'beginning_end':
        pad_before = pad_amount // 2
        pad_after = pad_amount - pad_before
    else:
        raise ValueError("Invalid position argument. Choose from 'beginning', 'middle', 'end', 'beginning_end'.")

    padding_before = np.zeros((pad_before, sequence.shape[1]))
    padding_middle = np.zeros((pad_middle, sequence.shape[1]))
    padding_after = np.zeros((pad_after, sequence.shape[1]))
    
    padded_sequence = np.vstack((padding_before, sequence[:sequence.shape[0]//2, :], padding_middle, sequence[sequence.shape[0]//2:, :], padding_after))
    return padded_sequence

# Loop through each file in the input directory
for filename in os.listdir(input_directory):
    if filename.endswith('.npy'):
        file_path = os.path.join(input_directory, filename)
        
        # Load the data
        data = np.load(file_path)
        
        # Initialize the padded data array
        padded_data = np.zeros((data.shape[0], desired_length, data.shape[2]))
        
        for i in range(data.shape[0]):
            # Decide where to add padding ('beginning', 'middle', 'end', 'beginning_end')
            positions = ['beginning', 'middle', 'end', 'beginning_end']
            pos = positions[i % len(positions)]  # For illustration, cycle through the positions
            
            padded_data[i] = pad_sequence(data[i], desired_length, pos)
        
        # Save the padded data
        output_path = os.path.join(output_directory, filename)
        np.save(output_path, padded_data)

print("Padding completed and saved to the output directory.")


# Maps preprocessing