In [16]:
import os
import tensorflow as tf
import math
import numpy as np
import itertools


from waymo_open_dataset.utils import range_image_utils
from waymo_open_dataset.utils import transform_utils
from waymo_open_dataset.utils import  frame_utils
from waymo_open_dataset import dataset_pb2 as open_dataset
import hashlib
from tqdm import tqdm

TODO: As we want to use more tf records (we have them) add their paths here

In [2]:
filenames = ['/home/bcj/stanford/cs230/project/waymo_data/segment-10023947602400723454_1120_000_1140_000_with_camera_labels.tfrecord']

In [3]:
NUM_SAMPLES = 100

In [4]:
from waymo_open_dataset.utils.box_utils import is_within_box_3d

In [5]:
def prep_data(frame, num_points = 40000):
    mesh_vertices, instance_labels, semantic_labels, instance_bboxes = None, None, None, None
    
    # Transform frame to point cloud
    (range_images, camera_projections,
 range_image_top_pose) = frame_utils.parse_range_image_and_camera_projection(
    frame)
    points, cp_points = frame_utils.convert_range_image_to_point_cloud(
    frame,
    range_images,
    camera_projections,
    range_image_top_pose) 
    
    # 3d points in vehicle frame.
    points_all = np.concatenate(points, axis=0)
    
    # sub sample points
    if num_points < 0 :
        mesh_vertices = points_all
    else:
        sub_samp_points = points_all[np.random.choice(np.arange(points_all.shape[0]), num_points),:]
        mesh_vertices = sub_samp_points
    
    
    num_detected_objects = len(frame.laser_labels)
    
    instance_labels = np.zeros((len(mesh_vertices),))
    semantic_labels = np.zeros((len(mesh_vertices),))
    instance_bboxes = np.zeros((num_detected_objects, 7))
    
    # loop through each ground truth object
    for instance_id, detected_object in enumerate(frame.laser_labels):
        box = detected_object.box
        points_in_bbox = is_within_box_3d(point=tf.convert_to_tensor(mesh_vertices, dtype=tf.float32), box=tf.convert_to_tensor(np.array([
            box.center_x,
            box.center_y,
            box.center_z,
            box.width,
            box.length,
            box.height,
            box.heading,
        ]).reshape((1,7)), dtype=tf.float32)).numpy()
        
        
        # Do a check if we have all points
        if num_points < 0:
            print(np.sum(points_in_bbox > 0 ), detected_object.num_lidar_points_in_box)
            
        return mesh_vertices, instance_labels, semantic_labels, instance_bboxes
        
        
        
        
        
        
    
        
        
                       

# Run data generation pipeline

In [17]:
dataset = tf.data.TFRecordDataset(filenames, compression_type='')
num_samples_so_far = 0
batch_size_for_print = 10

BASE_DATA_DIR = "."

for data in tqdm(dataset):
    
    # Read data from tfrecord
    frame = open_dataset.Frame()
    frame.ParseFromString(bytearray(data.numpy()))
    
    scan_name = hashlib.md5(str(frame.context).encode()).hexdigest()
    print(scan_name)
    
    # process frame into data format we want
    mesh_vertices, instance_labels, semantic_labels, instance_bboxes = prep_data(frame, 40000)
    
    # save to npy file with given schema (i.e. frame.context.name_{file_type}.npy)
    FILENAME_TEMPLATE = "{scan_name}_{data_type}.npy"
    
    ## Write mesh verticies
    with open(FILENAME_TEMPLATE.format(scan_name=scan_name, data_type="vert"), 'wb') as f:
        np.save(f, mesh_vertices)
        
    ## Write instance labels
    with open(FILENAME_TEMPLATE.format(scan_name=scan_name, data_type="ins_label"), 'wb') as f:
        np.save(f, instance_labels)
        
    ## Write semantic labels
    with open(FILENAME_TEMPLATE.format(scan_name=scan_name, data_type="sem_label"), 'wb') as f:
        np.save(f, semantic_labels)
    
    ## Write instance_bboxes labels
    with open(FILENAME_TEMPLATE.format(scan_name=scan_name, data_type="bbox"), 'wb') as f:
        np.save(f, instance_bboxes)
    
    
    
    # Logging and stopping when we've sampled enough
    num_samples_so_far += 1
    if num_samples_so_far > NUM_SAMPLES:
        break
    
    if not (num_samples_so_far % batch_size_for_print):
        print("{}% of samples written".format(round(100.0*num_samples_so_far/NUM_SAMPLES, 2)))

0it [00:00, ?it/s]

654b2f55434f07e7759c5f435487b7bc


1it [00:00,  1.19it/s]

67421721497544c3083a0bb82c868f7f


2it [00:01,  1.22it/s]

8df9e40cefbebac59f19dd2e163ebf26


3it [00:02,  1.25it/s]

202b494ee83899981d99e395f363e0cd


4it [00:03,  1.27it/s]

2dcdd7e14ec33275b92bf5c259b60306


5it [00:03,  1.28it/s]

eac3ad08210990d79bc68bf632058e47


6it [00:04,  1.28it/s]

eac3ad08210990d79bc68bf632058e47


7it [00:05,  1.28it/s]

c1af69d0bb131e26e8f9dcfa7a4b4483


8it [00:06,  1.29it/s]

f4d21d5427d1e432f37c855be116fd5d


9it [00:06,  1.29it/s]

0f02a426035c859101fb9d9d22c27fd9


10it [00:07,  1.29it/s]

10.0% of samples written
500a4111f59ad51fad832c5c0fc01fba


11it [00:08,  1.24it/s]

500a4111f59ad51fad832c5c0fc01fba


12it [00:09,  1.20it/s]

def11b8bdf33900ad92f7196d801a944


13it [00:10,  1.22it/s]

6e4df69f5647771358d871d47d674bba


14it [00:11,  1.24it/s]

6e4df69f5647771358d871d47d674bba


15it [00:11,  1.26it/s]

e3aeee8b80b45428501d703118669b3f


16it [00:12,  1.28it/s]

a415e9407788aca088766d11507011d2


17it [00:13,  1.29it/s]

9933c01db931831878950016fe0b7400


18it [00:14,  1.29it/s]

a911f582f4abd13e95695ed9da85530c


19it [00:14,  1.29it/s]

6827474c5c2b4d8b3e4f7fc2f378d15d


20it [00:15,  1.30it/s]

20.0% of samples written
ab7cb4088ee6d4139d1465960c807da1


21it [00:16,  1.30it/s]

ab7cb4088ee6d4139d1465960c807da1


22it [00:17,  1.25it/s]

6fe59cfa922bf5710a8a05671ad56163


23it [00:18,  1.22it/s]

ae543ba41f9e7ed4777e3b1953b9f7e3


24it [00:19,  1.12it/s]

6fe59cfa922bf5710a8a05671ad56163


25it [00:20,  1.13it/s]

6fe59cfa922bf5710a8a05671ad56163


26it [00:20,  1.17it/s]

567de87f5423ad5fe719c56326942597


27it [00:21,  1.21it/s]

7be3cd59fd1f51e473b819efd04a946d


28it [00:22,  1.24it/s]

f9da4f7ab906cea172f076ba1085fc0e


29it [00:23,  1.26it/s]

f50fcc5074f06c07a2904b5d24bb8ff7


30it [00:23,  1.27it/s]

30.0% of samples written
7be490126ad85b7aad03bb41d6d58110


31it [00:24,  1.29it/s]

758ab2dbc5f8298f21e8749ce83db2f0


32it [00:25,  1.20it/s]

758ab2dbc5f8298f21e8749ce83db2f0


33it [00:26,  1.13it/s]

55f1fb17d1a8c06eb9caad9b6f60524c


34it [00:27,  1.19it/s]

2aaf3bfa610cf3f96b455eabfea461fe


35it [00:28,  1.24it/s]

2aaf3bfa610cf3f96b455eabfea461fe


36it [00:28,  1.28it/s]

36a70fb0e2fdb34066aa28009a6c9100


37it [00:29,  1.31it/s]

b4ccc5783efcb60f0a2dd3da258de329


38it [00:30,  1.28it/s]

b4ccc5783efcb60f0a2dd3da258de329


39it [00:31,  1.19it/s]

52fbef56c089544e23d90d8fc5a3381d


40it [00:32,  1.10it/s]

40.0% of samples written
5d7795b2a1c8915f86b8d5912238a750


41it [00:33,  1.10it/s]

d6d3bd41b3f366aa868f07be85945983


42it [00:34,  1.17it/s]

07b3be048637b5ef145d3e7137f72e76


43it [00:34,  1.22it/s]

07b3be048637b5ef145d3e7137f72e76


44it [00:35,  1.27it/s]

9acdaa53164f6ddce6b0c73ce0ac42ab


45it [00:36,  1.30it/s]

615ea666a167c8d534d75f7f42b63052


46it [00:37,  1.32it/s]

342df20cb2b515b49b45628ecadd78c0


47it [00:37,  1.33it/s]

d8bd94408620e3a218a025a6988c35ca


48it [00:38,  1.32it/s]

3b08d7cbd6103397b023201b73aad5ac


49it [00:39,  1.30it/s]

8b560a08b25d3f0de7dddca9d7c1c07d


50it [00:40,  1.24it/s]

50.0% of samples written
da5f62ce327462517ccca46534b1d170


51it [00:41,  1.25it/s]

215ef0674de7346d8b90997ea9c7ffbc


52it [00:41,  1.27it/s]

2184a2b2fa37695c48d3ab48534edf46


53it [00:42,  1.28it/s]

6ba048c82d95fe1ebe1657b594d4974c


54it [00:43,  1.26it/s]

e92271090655ca4321f001fd8cf45e46


55it [00:44,  1.28it/s]

792433fa849f8966448f7b570036a068


56it [00:44,  1.28it/s]

b3084122e66294dd6d94509f382cbee8


57it [00:45,  1.23it/s]

3784eed7aab9207534514fd1928f8a1a


58it [00:46,  1.22it/s]

9838c85d6b2cf88a16591d4c6a30b9a3


59it [00:47,  1.10it/s]

9838c85d6b2cf88a16591d4c6a30b9a3


60it [00:48,  1.02s/it]

60.0% of samples written
9838c85d6b2cf88a16591d4c6a30b9a3


61it [00:49,  1.03it/s]

7930710c8488761eba6f0e69f62ccad8


62it [00:50,  1.09it/s]

13f4605e51175e4f42a12555bce629d2


63it [00:51,  1.14it/s]

13f4605e51175e4f42a12555bce629d2


64it [00:52,  1.16it/s]

f40c934a383e54fde3639ea344c13576


65it [00:53,  1.14it/s]

f502ef4972251ed004a36586eb5c3197


66it [00:54,  1.13it/s]

16464c015591a5a0b9fbbe90109ae304


67it [00:55,  1.04it/s]

87a25fea6d3a2830c755db1570038e43


68it [00:55,  1.10it/s]

ceb44e6cdd73da8508233b775cec4dd2


69it [00:56,  1.16it/s]

8b68d482d9e19536add9a28b580ed290


70it [00:57,  1.19it/s]

70.0% of samples written
7f71e2894edf4a0e51cec3e597609068


71it [00:58,  1.22it/s]

1852d05edf94fb35cc782ca10e9494a3


72it [00:59,  1.19it/s]

2f2243d86935e87a8074e469b3a3beae


73it [01:00,  1.18it/s]

84e1e84e6b2dfe95c93b0510989c8a29


74it [01:01,  1.02it/s]

d4a000bf4054ffa2e4d81ba2210c373e


75it [01:02,  1.09it/s]

12e44f25a9b56d687d5d43d85df1caf2


76it [01:02,  1.11it/s]

01f8730b362f840ed5d958d2792523ce


77it [01:03,  1.10it/s]

ae62b89981e299fa51bf13c6e6aeb460


78it [01:04,  1.10it/s]

dd0616d85bce898fcb9e6f475e2dd241


79it [01:05,  1.09it/s]

c4aaacd3f2ce5f5ac5936af97fee8010


80it [01:06,  1.07it/s]

80.0% of samples written
25fe5daf6cd861e37c3dbb44f50bca25


81it [01:07,  1.04it/s]

25fe5daf6cd861e37c3dbb44f50bca25


82it [01:08,  1.03s/it]

7da7763269943d13ba80c035414bc274


83it [01:10,  1.07s/it]

973eaec0bc347532c5c44cea21a97e8e


84it [01:11,  1.05s/it]

e7e3d0f0e4b9b4435070ae605dd30674


85it [01:12,  1.03s/it]

e7e3d0f0e4b9b4435070ae605dd30674


86it [01:12,  1.02it/s]

7da7763269943d13ba80c035414bc274


87it [01:13,  1.03it/s]

7da7763269943d13ba80c035414bc274


88it [01:14,  1.09it/s]

b970c1215d192a9ee23417147c685a39


89it [01:15,  1.02s/it]

b970c1215d192a9ee23417147c685a39


90it [01:16,  1.06it/s]

90.0% of samples written
5f10f591146077ae159d794fee78f67e


91it [01:17,  1.13it/s]

5f10f591146077ae159d794fee78f67e


92it [01:18,  1.19it/s]

ad20a7ca3ecd77b4fbfca932f47ee3f3


93it [01:18,  1.24it/s]

9b2d6c8799cb18df2a61bfe400110f7c


94it [01:19,  1.28it/s]

278e2f7be35c2931eb0d99b55c369b26


95it [01:20,  1.30it/s]

71b85b038f2eea47a158e6369a81b3f8


96it [01:21,  1.30it/s]

8d075faceac76048d799263d9b17affc


97it [01:21,  1.32it/s]

690198b290e95ea306ae43b3244625fe


98it [01:22,  1.34it/s]

ab8d846ed5871b854d844a743ecf1f04


99it [01:23,  1.34it/s]

c9008d4a55cb8e34ab77151bab9a186d


100it [01:24,  1.34it/s]

100.0% of samples written
faf867c616e823e2606d74e23bfb623b


100it [01:25,  1.17it/s]


In [20]:
! pwd

/home/bcj/stanford/cs230/project/orig_votenet/votenet_data
