In [None]:
import pandas as pd
import numpy as np

In [None]:
agents_df = pd.read_csv('rand_agents_table0.csv')
frame_df = pd.read_csv('rand_frames_table1.csv')
scene_df = pd.read_csv('rand_scenes_table1.csv')

In [None]:
agents_df = agents_df.merge(frame_df[['frame_index', 'scene_index']], on='frame_index')
agents_df

In [None]:

agents_df['speed'] = np.sqrt(agents_df['velocity_x']**2 + agents_df['velocity_y']**2) 
# put speed column before groupby mean
agents_df_grouped = agents_df.groupby(['scene_index','track_id'], as_index = False).mean()
drop_rows = ['PERCEPTION_LABEL_NOT_SET', 'PERCEPTION_LABEL_DONTCARE', 'PERCEPTION_LABEL_MOTORCYCLIST',
             'PERCEPTION_LABEL_ANIMAL']
agents_df_grouped = agents_df_grouped.drop(drop_rows, axis=1)
agents_df_grouped = agents_df_grouped.drop(agents_df_grouped.columns[12:20], axis=1)
# agents_df_grouped['speed'] = np.sqrt(agents_df_grouped['velocity_x']**2 + agents_df_grouped['velocity_y']**2)
agents_df_grouped

In [None]:
unknown = np.array(agents_df_grouped['PERCEPTION_LABEL_UNKNOWN'])
car = np.array(agents_df_grouped['PERCEPTION_LABEL_CAR'])
cyclist = np.array(agents_df_grouped['PERCEPTION_LABEL_CYCLIST'])
ped = np.array(agents_df_grouped['PERCEPTION_LABEL_PEDESTRIAN'])
category = np.array([unknown, car, cyclist, ped])
category_arr = category.T
categories = np.argmax(category_arr, axis=1)
agents_df_grouped['PERCEPTION_CATEGORY'] = categories
agents_df_grouped

In [None]:
def find_misrecorded_oversized_track_ids(agents_df_grouped):
    """
        filters the objects/track_ids with unusually big lengths/widths from agents_df_grouped.
        In the US, all vehicles cannot have a sidelength that's more than 19.8 meters;
        Some cars in the agent_table have mean extents that are higher than that, 
        so it was likely a data mistake.
        
        Args:
            agents_df_grouped (pd.DataFrame): the agents table AFTER you applied groupby mean, not the original csv agents_df
        Returns:
            (pd.DataFrame): table with misrecorded, oversized track ids
    """    
    # This means getting all rows with extent_x or extent_y that are longer than 19.8
    # "|" means or. We have to check both extents x and y because we don't know which one is length or width.
    # overbig_track_ids is a table with all of those big vehicles
    MAX_LENGTH_VEHICLES = 19.8 # federally mandated maximum length of vehicles in meters
    
    misrecorded_oversized_track_ids_rows = agents_df_grouped[(agents_df_grouped.extent_x > MAX_LENGTH_VEHICLES) |
                                                        (agents_df_grouped.extent_y > MAX_LENGTH_VEHICLES)]
    return misrecorded_oversized_track_ids_rows

def remove_misrecorded_oversized_track_ids_from_given_table(input_agent_table,
                                                            misrecorded_oversized_track_ids_rows):
    # Really confusing operation: To be simple, this just removes all instances of the
    # misrecorded oversized track_ids from the given agent table
    misrecorded_oversized_track_ids_rows = misrecorded_oversized_track_ids_rows[["scene_index", "track_id"]]
    merged = pd.merge(input_agent_table,misrecorded_oversized_track_ids_rows, how='outer', 
                      on=["scene_index", "track_id"], indicator=True)
    left_anti_merge = merged[merged['_merge'] == 'left_only']#.drop(columns=["_merged"])
    left_anti_merge = left_anti_merge.drop(columns=["_merge"])
    input_agent_table_with_oversized_track_ids_removed = left_anti_merge   
    return input_agent_table_with_oversized_track_ids_removed


In [None]:
misrecorded_oversized_track_ids_rows = find_misrecorded_oversized_track_ids(agents_df_grouped)
# remove misrecorded_oversized_track_ids from the original agents_df
agents_df = remove_misrecorded_oversized_track_ids_from_given_table(agents_df,
                                                                    misrecorded_oversized_track_ids_rows)
# remove misrecorded_oversized_track_ids from the agents_df_grouped
agents_df_grouped = remove_misrecorded_oversized_track_ids_from_given_table(agents_df_grouped,
                                                                    misrecorded_oversized_track_ids_rows)

In [None]:
agents_df

In [None]:
agents_df_grouped