### Google drive mounting

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### imports

In [None]:
import pandas as pd
import numpy as np
import os

In [None]:
# Define columns to keep and rename
image_want_cols = ['key.segment_context_name', 'key.frame_timestamp_micros', 'key.camera_name', '[CameraImageComponent].image']
feature_rename_map = {'key.segment_context_name':'segment', 
                      'key.frame_timestamp_micros':'frame_timestamp',
                      'key.camera_name':'camera', 
                      '[CameraImageComponent].image':'image'}

# Create empty DataFrame with schema
image_df = pd.DataFrame(columns=feature_rename_map.values())

# Define folder path
folder_path = 'drive/MyDrive/object_detection/data/waymo_camera_image_751_799/'

# Loop through files in folder
for file in os.listdir(folder_path):
    print(file)
    # Read in Parquet file
    data = pd.read_parquet(folder_path + file)[image_want_cols].rename(columns=feature_rename_map)

    # Keep only rows with camera_name == 1
    data = data.loc[data['camera'] == 1]
    
    # Append data to image_df
    image_df = pd.concat((image_df,data),axis=0)

# Define output file path
output_file_path = 'drive/MyDrive/object_detection/data/camera1_data_part16.parquet'

# Write image_df to Parquet file
image_df.to_parquet(output_file_path)

pd.read_parquet(output_file_path)

training_camera_image_9288629315134424745_4360_000_4380_000.parquet
training_camera_image_898816942644052013_20_000_40_000.parquet
training_camera_image_9907794657177651763_1126_570_1146_570.parquet
training_camera_image_972142630887801133_642_740_662_740.parquet
training_camera_image_9415086857375798767_4760_000_4780_000.parquet
training_camera_image_9175749307679169289_5933_260_5953_260.parquet
training_camera_image_8859409804103625626_2760_000_2780_000.parquet
training_camera_image_8811210064692949185_3066_770_3086_770.parquet
training_camera_image_9465500459680839281_1100_000_1120_000.parquet
training_camera_image_9179922063516210200_157_000_177_000.parquet
training_camera_image_9142545919543484617_86_000_106_000.parquet
training_camera_image_9509506420470671704_4049_100_4069_100.parquet
training_camera_image_9547911055204230158_1567_950_1587_950.parquet
training_camera_image_9654060644653474834_3905_000_3925_000.parquet
training_camera_image_8938046348067069210_3800_000_3820_000.p

Unnamed: 0,segment,frame_timestamp,camera,image
9288629315134424745_4360_000_4380_000;1553902446613209,9288629315134424745_4360_000_4380_000,1553902446613209,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
9288629315134424745_4360_000_4380_000;1553902446712972,9288629315134424745_4360_000_4380_000,1553902446712972,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
9288629315134424745_4360_000_4380_000;1553902446812763,9288629315134424745_4360_000_4380_000,1553902446812763,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
9288629315134424745_4360_000_4380_000;1553902446912592,9288629315134424745_4360_000_4380_000,1553902446912592,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
9288629315134424745_4360_000_4380_000;1553902447012471,9288629315134424745_4360_000_4380_000,1553902447012471,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
...,...,...,...,...
9758342966297863572_875_230_895_230;1507250252156193,9758342966297863572_875_230_895_230,1507250252156193,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
9758342966297863572_875_230_895_230;1507250252256151,9758342966297863572_875_230_895_230,1507250252256151,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
9758342966297863572_875_230_895_230;1507250252356151,9758342966297863572_875_230_895_230,1507250252356151,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
9758342966297863572_875_230_895_230;1507250252456191,9758342966297863572_875_230_895_230,1507250252456191,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...


In [None]:
box_cols = ['key.segment_context_name', 'key.frame_timestamp_micros',
            'key.camera_name', 'key.camera_object_id',
            '[CameraBoxComponent].box.center.x',
            '[CameraBoxComponent].box.center.y', '[CameraBoxComponent].box.size.x',
            '[CameraBoxComponent].box.size.y', '[CameraBoxComponent].type',
            '[CameraBoxComponent].difficulty_level.detection',
            '[CameraBoxComponent].difficulty_level.tracking']

# Create empty DataFrame with schema
box_df = pd.DataFrame(columns=box_cols)

# Define folder path
folder_path = 'drive/MyDrive/object_detection/data/waymo_camera_box/'

# Loop through files in folder
for file in os.listdir(folder_path):
    print(file)
    # Read in Parquet file
    data = pd.read_parquet(folder_path + file)

    # # Append data to image_df
    box_df = pd.concat((box_df,data),axis=0)

box_col_map = {'key.segment_context_name':'segment', 'key.frame_timestamp_micros':'frame_timestamp',
               'key.camera_name':'camera', 'key.camera_object_id':'camera_object_id',
               '[CameraBoxComponent].box.center.x':'box_center_x',
               '[CameraBoxComponent].box.center.y':'box_center_y', 
               '[CameraBoxComponent].box.size.x':'box_size_x',
               '[CameraBoxComponent].box.size.y':'box_size_y', 
               '[CameraBoxComponent].type':'type',
               '[CameraBoxComponent].difficulty_level.detection':'detection_difficulty',
               '[CameraBoxComponent].difficulty_level.tracking':'tracking_dificulty'}
box_df.rename(columns=box_col_map, inplace=True)
box_df.to_parquet('drive/MyDrive/object_detection/data/camera1_box.parquet')