In [29]:
import os
import time
import tensorflow.compat.v1 as tf
import pickle

tf.enable_eager_execution() # No need for session to be created. Function instances are run immediately. 

from waymo_open_dataset import dataset_pb2 as open_dataset
from google.cloud import storage

import concurrent.futures as concurr

# CONFIG
project = "Waymo3DObjectDetection"
bucket_name = 'waymo_open_dataset_v_1_2_0_individual_files'
suffix = '.tfrecord'
data_destination = os.getcwd() + "/data/"
download_batch_size = 1

def download_blob(blob, c):
    """
    blob = single file name
    c = file counter
    """
    fname = f"{data_destination}blob_{c}{suffix}"
    blob.download_to_filename(fname)
    return fname

def strip_frame(frame, idx, blob_idx):
    """Strip frame from garbage such as LIDAR data"""
    
    cam_dict = {}
    for i, camera in enumerate(["FRONT", "FRONT_LEFT", "SIDE_LEFT", "FRONT_RIGHT", "SIDE_RIGHT"]):
        cam_dict[camera] = {}
        cam_dict[camera]['image'] = frame.images[i].image
        cam_dict[camera]['velocity'] = frame.images[i].velocity
        cam_dict[camera]['labels'] = frame.camera_labels[i]
        
        cam_dict[camera]['context']={'stats':frame.context.stats, 
                           'name': frame.context.name, 
                           'blob_idx':blob_idx,
                           'time_frame_idx':idx,
                           'timestamp': frame.timestamp_micros}
    return cam_dict

def save_frames(frames, blob_idx, dataset='training'):
    """Save frames into pickle format. To preprocess later"""
    for frame_idx, frame in enumerate(frames):
        for camera, camera_dict in frame.items():
            with open(f'{data_destination}{dataset}/{camera}/blob_{blob_idx}_frame_{frame_idx}.pickle', 'wb') as f:
                # Pickle the 'data' dictionary using the highest protocol available.
                pickle.dump(camera_dict, f, pickle.HIGHEST_PROTOCOL)
    return None

def load_frame(frame_idx, blob_idx, dataset='training'):
    with open(f'{data_destination}{dataset}/blob_{blob_idx}.pickle', 'rb') as f:
        # Load the 'data' dictionary using the highest protocol available.
        return pickle.load(f, pickle.HIGHEST_PROTOCOL)


# Retrieve frames from selected files to download
def get_and_strip_frames_from_one_blob(downloaded_blob, blob_idx):
    # Load into tf record dataset
    dataset = tf.data.TFRecordDataset(downloaded_blob, compression_type='')
    frames = []
    for idx, data in enumerate(dataset):
        frame = open_dataset.Frame()
        frame.ParseFromString(bytearray(data.numpy()))
        # Function to strip away LIDAR and other garbage from frame
        frame = strip_frame(frame, idx, blob_idx)
        frames.append(frame)
    return frames

def download_process_save_1_blob(blob, blob_idx, dataset='training'):
    """Like dem descriptive func names eh?"""

    print(f"Downloading blob_{blob_idx}")
    blob_fname = download_blob(blob, blob_idx)
    
#     print(f"Getting and stripping all frames from blob_{blob_idx}")
    frames = get_and_strip_frames_from_one_blob(blob_fname, blob_idx)

#     print(f"Saving frames for blob {blob_idx}")
    save_frames(frames, blob_idx, dataset)

#     print(f'No longer need tfrecord blob_{blob_idx}. Deleting now.')
    os.remove(f'data/blob_{blob_idx}.tfrecord')

    return f"blob_{blob_idx}"
          

In [32]:
# Initialise a client
storage_client = storage.Client(project= project) #storage.Client(project= "Waymo3DObjectDetection", credentials=credentials)
# Create a bucket object for our bucket
bucket = storage_client.get_bucket(bucket_name)
# Get blob files in bucket
blobs = [blob for blob in storage_client.list_blobs(bucket_name, prefix='training/')]

n_blobs = len(blobs) # Number of blobs in the training dataset
print(f'Total number of blobs is {n_blobs}')




Total number of blobs is 798


In [33]:
# TRAINING
start = time.time()
downloaded_blobs = []

# Start from idx 610
thread_iterable = [(blob,blob_idx, 'training') for blob_idx, blob in enumerate(blobs)]

# thread_iterable = ((blob,blob_idx, 'training') for blob_idx, blob in enumerate(blobs))

with concurr.ThreadPoolExecutor(max_workers = 2) as executor:

    results = executor.map(lambda args: download_process_save_1_blob(*args), thread_iterable)
    for r in results:
        print(f'\n Time elapsed {time.time() - start}')
        downloaded_blobs.append(r)
        
end = time.time()
print(f'Total time taken {end - start}')


Downloading blob_0
Downloading blob_1
Downloading blob_2
 Time elapsed 24.397799730300903

Downloading blob_3

 Time elapsed 27.30626082420349
Downloading blob_4
 Time elapsed 50.84785008430481

Downloading blob_5

 Time elapsed 51.55322766304016
Downloading blob_6

 Time elapsed 77.04192209243774
Downloading blob_7
 Time elapsed 77.47947764396667

Downloading blob_8

 Time elapsed 102.2274956703186
Downloading blob_9
 Time elapsed 103.4895966053009

Downloading blob_10
 Time elapsed 126.99525213241577

Downloading blob_11
 Time elapsed 132.9642391204834

Downloading blob_12
Downloading blob_13
 Time elapsed 157.00641250610352


 Time elapsed 157.00861024856567
Downloading blob_14
Downloading blob_15
 Time elapsed 186.48069405555725


 Time elapsed 186.48396348953247
Downloading blob_16

 Time elapsed 211.79796981811523
Downloading blob_17
 Time elapsed 215.32360196113586

Downloading blob_18

 Time elapsed 250.21507668495178
Downloading blob_19
 Time elapsed 250.97174954414368

Downlo

Downloading blob_154

 Time elapsed 2361.2061851024628
Downloading blob_155

 Time elapsed 2361.2262032032013
Downloading blob_156
 Time elapsed 2397.323609828949

Downloading blob_157
 Time elapsed 2397.6095044612885

Downloading blob_158
 Time elapsed 2437.9579672813416

Downloading blob_159
 Time elapsed 2438.4182205200195

Downloading blob_160
Downloading blob_161
 Time elapsed 2478.596015930176

 Time elapsed 2478.596228122711

Downloading blob_162
Downloading blob_163

 Time elapsed 2517.4469513893127

 Time elapsed 2517.4471757411957
Downloading blob_170
 Time elapsed 2660.594094514847

Downloading blob_171

 Time elapsed 2665.8793823719025
Downloading blob_172
 Time elapsed 2707.1163244247437

Downloading blob_173
 Time elapsed 2707.4052295684814

Downloading blob_174
 Time elapsed 2746.5386533737183

Downloading blob_175
 Time elapsed 2747.177332639694

Downloading blob_176Downloading blob_177


 Time elapsed 2785.433651447296

 Time elapsed 2785.4358189105988
Downloading blob

Downloading blob_317

 Time elapsed 5543.642785549164
Downloading blob_318
 Time elapsed 5582.784359693527

Downloading blob_319
 Time elapsed 5583.658168792725

Downloading blob_320
 Time elapsed 5625.355860233307

Downloading blob_321
 Time elapsed 5625.375597953796

Downloading blob_322
Downloading blob_323
 Time elapsed 5668.981666564941


 Time elapsed 5668.983967781067
Downloading blob_324
 Time elapsed 5709.166360616684

Downloading blob_325

 Time elapsed 5710.0905838012695
Downloading blob_326
 Time elapsed 5747.624959230423

Downloading blob_327

 Time elapsed 5748.207949876785
Downloading blob_328
 Time elapsed 5787.35852265358

Downloading blob_329

 Time elapsed 5787.80326628685
Downloading blob_330
 Time elapsed 5825.390530824661

Downloading blob_331
 Time elapsed 5825.686336040497

Downloading blob_332
Downloading blob_333
 Time elapsed 5865.504799127579

 Time elapsed 5865.504916906357

Downloading blob_334
Downloading blob_335

 Time elapsed 5903.931147813797

 Time e

Downloading blob_476
Downloading blob_477
 Time elapsed 8691.850664377213


 Time elapsed 8691.851590633392
Downloading blob_478
 Time elapsed 8731.438796758652

Downloading blob_479
 Time elapsed 8731.724051952362

Downloading blob_480
 Time elapsed 8770.60246014595

Downloading blob_481
 Time elapsed 8770.622926473618

Downloading blob_482
Downloading blob_483

 Time elapsed 8805.744594097137

 Time elapsed 8805.744791507721
Downloading blob_484
Downloading blob_485
 Time elapsed 8842.26822400093


 Time elapsed 8842.268498182297
Downloading blob_486
 Time elapsed 8885.855516195297

Downloading blob_487
 Time elapsed 8888.863206863403

Downloading blob_488
Downloading blob_489

 Time elapsed 8930.342457294464

 Time elapsed 8930.342804670334
Downloading blob_490
 Time elapsed 8971.417881250381

Downloading blob_491
 Time elapsed 8973.560017347336

Downloading blob_492
 Time elapsed 9010.416852235794

Downloading blob_493
 Time elapsed 9012.366620063782

Downloading blob_494
 Time ela

Downloading blob_633

 Time elapsed 11760.971068382263
Downloading blob_634Downloading blob_635

 Time elapsed 11796.722533464432

 Time elapsed 11796.722680330276

Downloading blob_636
 Time elapsed 11826.319087982178

Downloading blob_637
 Time elapsed 11830.037066936493

Downloading blob_638
 Time elapsed 11866.998615264893

Downloading blob_639
 Time elapsed 11867.25579380989

Downloading blob_640
 Time elapsed 11907.919927835464

Downloading blob_641

 Time elapsed 11909.128427028656
Downloading blob_642
Downloading blob_643

 Time elapsed 11953.703551054

 Time elapsed 11953.703670978546
Downloading blob_644
Downloading blob_645
 Time elapsed 11999.783445358276

 Time elapsed 11999.783560037613

Downloading blob_646
 Time elapsed 12039.106375455856

Downloading blob_647
 Time elapsed 12039.791391849518

Downloading blob_648Downloading blob_649

 Time elapsed 12080.833261728287


 Time elapsed 12080.839803934097
Downloading blob_650
 Time elapsed 12120.139984369278

Downloading bl

Downloading blob_784
 Time elapsed 14834.618936777115

Downloading blob_785
 Time elapsed 14835.225970506668

Downloading blob_786
 Time elapsed 14872.77053141594

Downloading blob_787
 Time elapsed 14873.366738796234

Downloading blob_788
Downloading blob_789
 Time elapsed 14912.959205389023


 Time elapsed 14912.959699630737
Downloading blob_790
 Time elapsed 14954.18170285225

Downloading blob_791
 Time elapsed 14954.365981340408

Downloading blob_792
 Time elapsed 14992.468943119049

Downloading blob_793

 Time elapsed 14993.882733106613
Downloading blob_794
 Time elapsed 15030.570506811142

Downloading blob_795
 Time elapsed 15031.521943569183

Downloading blob_796
 Time elapsed 15070.604353427887

Downloading blob_797

 Time elapsed 15071.566086053848

 Time elapsed 15108.717267990112

 Time elapsed 15108.717420101166
Total time taken 15108.721873998642


In [None]:
# VALIDATION
# val blobs
blobs = [blob for blob in storage_client.list_blobs(bucket_name, prefix='validation/')]

start = time.time()
downloaded_blobs = []

thread_iterable = ((blob,blob_idx, 'validation') for blob_idx, blob in enumerate(blobs))

with concurr.ThreadPoolExecutor(max_workers = 2) as executor:

    results = executor.map(lambda args: download_process_save_1_blob(*args), thread_iterable)
    for r in results:
        print(f'\n Time elapsed {time.time() - start}')
        downloaded_blobs.append(r)
        
end = time.time()
print(f'Total time taken {end - start}')


In [2]:
# USED TO LOOK AT BLOB AGAIN

# Initialise a client
storage_client = storage.Client(project= project) #storage.Client(project= "Waymo3DObjectDetection", credentials=credentials)
# Create a bucket object for our bucket
bucket = storage_client.get_bucket(bucket_name)
# Get blob files in bucket
blobs = [blob for blob in storage_client.list_blobs(bucket_name, prefix='validation/')]

n_blobs = len(blobs) # Number of blobs in the training dataset
print(f'Total number of blobs is {n_blobs}')




Total number of blobs is 202


In [28]:
# blob_fname = download_blob(blobs[0], 0)
dataset = tf.data.TFRecordDataset(blob_fname, compression_type='')
for c,data in enumerate(dataset):
    frame = open_dataset.Frame()
    frame.ParseFromString(bytearray(data.numpy()))
    if frame.no_label_zones:
        print(frame.no_label_zones)


In [26]:
frame.

[]