# main.ipynb

## 1. Enviroment verification

In [1]:
import os
import sys
import tensorflow as tf

# 1.1 Use Colab or other machine
if 'google.colab' in sys.modules:
    print('Running on CoLab')
    use_colab = True
else:
    print('Not running on CoLab')
    use_colab = False

# 1.2 Use GPU or not
gpus = tf.config.list_physical_devices('GPU')

if len(gpus) > 0:
    print('GPU detected')
    use_gpu = True
    for i, gpu in enumerate(gpus):
        print(f"GPU {i} - Name: {gpu.name}, Type: {gpu.device_type}")
    !nvidia-smi
else:
    print('No GPU detected')
    use_gpu = False

del gpus

# 1.3 Get current path
current_path = os.getcwd()
print(f'Current path: {current_path}')

Running on CoLab
No GPU detected
Current path: /content


## 2. Setup your src package

In [2]:
%%time
if use_colab and not (os.path.exists('/content/requirements.txt') and os.path.exists('/content/src')):
    !ACCESS_TOKEN="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"&&\
    BRANCHE_NAME="feature/dvc-pipeline"&&\
    git clone -b ${BRANCHE_NAME} https://${ACCESS_TOKEN}@github.com/tc-huang/waymo-project.git
    !mv /content/waymo-project/src /content/src
    !mv /content/waymo-project/env/requirements_docker.txt /content/requirements.txt
    !rm -r /content/waymo-project
    !pip install -r /content/requirements.txt
    if use_gpu:
        pass
        # !git clone https://github.com/rapidsai/rapidsai-csp-utils.git
        # !python rapidsai-csp-utils/colab/pip-install.py
    print("Need to restart runtime!!!")

from waymo_open_dataset import v2

CPU times: user 585 ms, sys: 20.6 ms, total: 606 ms
Wall time: 608 ms


#### Network speed

In [3]:
!pip install speedtest-cli
from src.utils import network
network.measure_network_speed()

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Download Speed: 3218.31 Mbps
Upload Speed: 547.50 Mbps


#### Memory size

In [4]:
from src.utils import memory
memory.show_memory_info()

Total memory size: 12.68 GB
Available memory size: 11.48 GB
Used memory size: 0.88 GB


## 3. Load data from Google Cloud Storage by dask dataframe

### Columns of camera_image

camera_image: CameraImageComponent
- string                                   key.segment_context_name
- int64                                    key.frame_timestamp_micros
- int8                                     key.camera_name
- binary                                   [CameraImageComponent].image (336693 bytes)
- fixed_size_list<item: double>[16]        [CameraImageComponent].pose.transform
- float                                    [CameraImageComponent].velocity.linear_velocity.x
- float                                    [CameraImageComponent].velocity.linear_velocity.y
- float                                    [CameraImageComponent].velocity.linear_velocity.z
- double                                   [CameraImageComponent].velocity.angular_velocity.x
- double                                   [CameraImageComponent].velocity.angular_velocity.y
- double                                   [CameraImageComponent].velocity.angular_velocity.z
- double                                   [CameraImageComponent].pose_timestamp
- double                                   [CameraImageComponent].rolling_shutter_params.shutter
- double                                   [CameraImageComponent].rolling_shutter_params.camera_trigger_time
- double                                   [CameraImageComponent].rolling_shutter_params.camera_readout_done_time

### Columns of comera_box

camera_box: CameraBoxComponent
- string                                   key.segment_context_name
- int64                                    key.frame_timestamp_micros
- int8                                     key.camera_name
- string                                   key.camera_object_id
- double                                   [CameraBoxComponent].box.center.x
- double                                   [CameraBoxComponent].box.center.y
- double                                   [CameraBoxComponent].box.size.x
- double                                   [CameraBoxComponent].box.size.y
- int8                                     [CameraBoxComponent].type
- int8                                     [CameraBoxComponent].difficulty_level.detection
- int8                                     [CameraBoxComponent].difficulty_level.tracking

### Columns of camera_calibration

camera_calibration: CameraCalibrationComponent
- string                                   key.segment_context_name
- int8                                     key.camera_name
- double                                   [CameraCalibrationComponent].intrinsic.f_u
- double                                   [CameraCalibrationComponent].intrinsic.f_v
- double                                   [CameraCalibrationComponent].intrinsic.c_u
- double                                   [CameraCalibrationComponent].intrinsic.c_v
- double                                   [CameraCalibrationComponent].intrinsic.k1
- double                                   [CameraCalibrationComponent].intrinsic.k2
- double                                   [CameraCalibrationComponent].intrinsic.p1
- double                                   [CameraCalibrationComponent].intrinsic.p2
- double                                   [CameraCalibrationComponent].intrinsic.k3
- fixed_size_list<item: double>[16]        [CameraCalibrationComponent].extrinsic.transform
- int32                                    [CameraCalibrationComponent].width
- int32                                    [CameraCalibrationComponent].height
- int8                                     [CameraCalibrationComponent].rolling_shutter_direction

### Columns of stats

stats: StatsComponent
- string                                   key.segment_context_name
- int64                                    key.frame_timestamp_micros
- string                                   [StatsComponent].time_of_day
- string                                   [StatsComponent].location
- string                                   [StatsComponent].weather
- list<item: int8>                         [StatsComponent].lidar_object_counts.types
- list<item: int32>                        [StatsComponent].lidar_object_counts.counts
- list<item: int8>                         [StatsComponent].camera_object_counts.types
- list<item: int32>                        [StatsComponent].camera_object_counts.counts

In [5]:
from src.data import load_gcs_parquet
# 3.1 Get GCSFileSystem
GCSFileSystem = load_gcs_parquet.get_GCSFileSystem(use_colab)

Running in Google Colab => Use colab authentication


In [6]:
# Define columns to load
# Set columns to None to load all columns
columns_camera_image = ['key.segment_context_name', 'key.frame_timestamp_micros', 'key.camera_name', '[CameraImageComponent].image']
columns_camera_box = ['key.segment_context_name', 'key.frame_timestamp_micros', 'key.camera_name', 'key.camera_object_id',
                      '[CameraBoxComponent].box.center.x', '[CameraBoxComponent].box.center.y', '[CameraBoxComponent].box.size.x', '[CameraBoxComponent].box.size.y',
                      '[CameraBoxComponent].type', '[CameraBoxComponent].difficulty_level.detection', '[CameraBoxComponent].difficulty_level.tracking']
#* columns_camera_calibration = None
#* columns_stats = None

In [7]:
from dask import diagnostics
diagnostics.ProgressBar().register()
# Define the size of each dask dataframe partition
partition_size = None # 128MB is the default size
# partition_size = '64MB'

# 3.2 Loading data waymo_open_dataset_v_2_0_0/training/camera_image/*.parquet from GCS
# Wall time: 2min 24s
#* %time df_training_camera_image = load_gcs_parquet.get_df('training', 'camera_image', GCSFileSystem, columns=columns_camera_image, partition_size=partition_size)

# 3.3 Loading data waymo_open_dataset_v_2_0_0/training/camera_box/*.parquet from GCS
# Wall time: 1.69 s
#* %time df_training_camera_box = load_gcs_parquet.get_df('training', 'camera_box', GCSFileSystem, columns=columns_camera_box, partition_size=partition_size)

# 3.4 Loading data waymo_open_dataset_v_2_0_0/training/camera_calibration/*.parquet from GCS
# Wall time: 1.85 s
#* %time df_training_camera_calibration = load_gcs_parquet.get_df('training', 'camera_calibration', GCSFileSystem, columns=columns_camera_calibration, partition_size=partition_size)

# 3.5 Loading data waymo_open_dataset_v_2_0_0/training/stats/*.parquet from GCS
# Wall time: 1.8 s
#* %time df_training_stats = load_gcs_parquet.get_df('training', 'stats', GCSFileSystem, columns=columns_stats, partition_size=partition_size)

# 3.6 Loading data waymo_open_dataset_v_2_0_0/validation/camera_image/*.parquet from GCS

%time df_validation_camera_image = load_gcs_parquet.get_df('validation', 'camera_image', GCSFileSystem, columns=columns_camera_image, partition_size=partition_size)

# 3.7 Loading data waymo_open_dataset_v_2_0_0/validation/camera_box/*.parquet from GCS

%time df_validation_camera_box = load_gcs_parquet.get_df('validation', 'camera_box', GCSFileSystem, columns=columns_camera_box, partition_size=partition_size)

# 3.8 Loading data waymo_open_dataset_v_2_0_0/validation/camera_calibration/*.parquet from GCS
#* %time df_validation_camera_calibration = load_gcs_parquet.get_df('validation', 'camera_calibration', GCSFileSystem, , columns=columns_camera_calibration, partition_size=partition_size)

# 3.9 Loading data waymo_open_dataset_v_2_0_0/validation/stats/*.parquet from GCS
#* %time df_validation_stats = load_gcs_parquet.get_df('validation', 'stats', GCSFileSystem, partition_size=partition_size)

Loading data waymo_open_dataset_v_2_0_0/validation/camera_image/*.parquet from GCS...
[########################################] | 100% Completed | 32.88 s
CPU times: user 5.1 s, sys: 502 ms, total: 5.6 s
Wall time: 34.8 s
Loading data waymo_open_dataset_v_2_0_0/validation/camera_box/*.parquet from GCS...
CPU times: user 37.2 ms, sys: 3.17 ms, total: 40.4 ms
Wall time: 224 ms


## 4. Training

## 5. Create Submission
reference: [Waymo Open Dataset 3D Camera-Only Detection Tutorial](https://github.com/waymo-research/waymo-open-dataset/blob/master/tutorial/tutorial_camera_only.ipynb)

In [None]:
# from src.submit import format

# # Prepare predictions. Please modify accordingly to process your inference results.
# context_names = ['1305342127382455702_3720_000_3740_000']

# frame_timestamps = {
#     # Please make sure that the timestamps match frame.timestamp_micros.
#     '1305342127382455702_3720_000_3740_000': [1511019682029265, 1511019682129243]
# }

# prediction_objects = {}
# for context_name in context_names:
#   prediction_objects[context_name] = {}
#   for timestamp in frame_timestamps[context_name]:
#     # Create objects based on inference results
#     prediction_objects[context_name][timestamp] = format.make_inference_objects(
#         context_name=context_name,
#         timestamp=timestamp,
#         boxes=np.random.rand(3, 4),
#         classes=np.random.randint(low=1, high=4, size=(3,)),
#         scores=np.random.rand(3,),
#         camera_names=np.random.randint(low=1, high=5, size=(3,))
#     )

In [None]:
# context_name = context_names[0]
# timestamp = frame_timestamps[context_name][0]
# print(prediction_objects[context_name][timestamp][0])

In [None]:
# test_info = format.test_submit_info()
# print(test_info)

In [None]:
# submission_file_base='./MySubmission'

# format.pack_to_submission(
#     submission_file_base=submission_file_base,
#     prediction_objects=prediction_objects,
#     **test_info 
# )
# print(f'Then you can upload {submission_file_base}.tar.gz to the challenge website.')

## Make a ground truth validation answer for testing the submition format

In [None]:
from src.submit import format as format_
import numpy as np

def make_ground_truth_answer(partition_dataframe):
    
    prediction_objects = {}
    
    for index, row in partition_dataframe.iterrows():
      context_name = row['key.segment_context_name']
      timestamp = row['key.frame_timestamp_micros']
      if context_name not in prediction_objects:
        prediction_objects[context_name] = {}
      if timestamp not in prediction_objects[context_name]:
        prediction_objects[context_name][timestamp] = []
      
      prediction_objects[context_name][timestamp].append(
        format_.make_inference_objects(
            context_name=context_name,
            timestamp=timestamp,
            boxes=np.array(
                [
                    [
                        row['[CameraBoxComponent].box.center.x'],
                        row['[CameraBoxComponent].box.center.y'],
                        row['[CameraBoxComponent].box.size.x'],
                        row['[CameraBoxComponent].box.size.y'],
                    ]
                ]
            ),
            classes=np.array([row['[CameraBoxComponent].type']]),
            scores=np.array([1.0]),
            camera_names=np.array([row['key.camera_name']])
        )[0]
      )
    return prediction_objects

In [None]:
answers = df_validation_camera_box.map_partitions(make_ground_truth_answer)
%time t = answers.compute()

In [None]:
test_info = format_.test_submit_info()
print(test_info)

if use_colab:
    submission_file_base='/content/submit_testing_validation_v0'
else:
    submission_file_base='/data/submit_testing_validation_v0'

format_.pack_to_submission(
    submission_file_base=submission_file_base,
    prediction_objects=t,
    num_submission_shards=1,
    **test_info 
)

print(f'Then you can upload {submission_file_base}.tar.gz to the challenge website.')

## 6. Compute Metrics
- reference: 
    - [Waymo Open Dataset 3D Camera-Only Detection Tutorial](https://github.com/waymo-research/waymo-open-dataset/blob/master/tutorial/tutorial_camera_only.ipynb)
    - [2D Detection](https://waymo.com/intl/en_us/open/challenges/2020/2d-detection/)
        - Metrics:
            - Primary metric: Average Precision (AP): 
                - ∫p(r)dr where p(r)is the PR curve
            - IoU Overlap Threshold:
                - Vehicle 0.7, Pedestrian 0.5, Cyclist 0.5, Sign 0.5
            - Sensor Names:
                - C: All cameras
                - I: Invalid
            - Label Difficulty Breakdown:
                - Each ground truth label is categorized into different difficulty levels (two levels for now):
                    - LEVEL_1, if not marked as LEVEL_2 in the released data.
                    - LEVEL_2, if marked as LEVEL_2 in the released data. When evaluating, LEVEL_2 metrics are computed by considering both LEVEL_1 and LEVEL_2 ground truth.
            - Metric Breakdown:
                - The following metric breakdowns are supported:
                    - OBJECT_TYPE: Breakdown by object type ("ALL_NS" refers to all objects except signs: Vehicle, Cyclist, and Pedestrian)
                    - RANGE: Breakdown by the distance between object center and vehicle frame origin. [0, 35m), [35m, 50m), [50m, +inf)


In [None]:
from waymo_open_dataset.metrics.python import wod_detection_evaluator
from src.evaluation import metrics

config = metrics.get_config()
evaluator = wod_detection_evaluator.WODDetectionEvaluator(config=config)
# print(evaluator._config)

# for _ in range(num_evals):
#     for _ in range(num_batches_per_eval):
#         predictions, groundtruth = predictor.predict(...)  # pop a batch.
#         evaluator.update_state(groundtruths, predictions)
# evaluator.result()  # finish one full eval and reset states.

## Test evalueation by fake files

In [None]:
from waymo_open_dataset import label_pb2
from waymo_open_dataset.protos import metrics_pb2
from waymo_open_dataset.metrics.ops import py_metrics_ops
from waymo_open_dataset.metrics.python import config_util_py as config_util
import tensorflow as tf

def compute_let_detection_metrics(prediction_frame_id,
                                  prediction_bbox,
                                  prediction_type,
                                  prediction_score,
                                  ground_truth_frame_id,
                                  ground_truth_bbox,
                                  ground_truth_type,
                                  ground_truth_difficulty,
                                  recall_at_precision=None,
                                  name_filter=None,
                                  config=metrics.get_config()):
  """Returns dict of metric name to metric values`.

  Notation:
    * M: number of predicted boxes.
    * D: number of box dimensions. The number of box dimensions can be one of
         the following:
           4: Used for boxes with type TYPE_AA_2D (center_x, center_y, length,
              width)
           5: Used for boxes with type TYPE_2D (center_x, center_y, length,
              width, heading).
           7: Used for boxes with type TYPE_3D (center_x, center_y, center_z,
              length, width, height, heading).
    * N: number of ground truth boxes.

  Args:
    prediction_frame_id: [M] int64 tensor that identifies frame for each
      prediction.
    prediction_bbox: [M, D] tensor encoding the predicted bounding boxes.
    prediction_type: [M] tensor encoding the object type of each prediction.
    prediction_score: [M] tensor encoding the score of each prediciton.
    ground_truth_frame_id: [N] int64 tensor that identifies frame for each
      ground truth.
    ground_truth_bbox: [N, D] tensor encoding the ground truth bounding boxes.
    ground_truth_type: [N] tensor encoding the object type of each ground truth.
    ground_truth_difficulty: [N] tensor encoding the difficulty level of each
      ground truth.
    config: The metrics config defined in protos/metrics.proto.

  Returns:
    A dictionary of metric names to metrics values.
  """
  num_ground_truths = tf.shape(ground_truth_bbox)[0]
  num_predictions = tf.shape(prediction_bbox)[0]
  ground_truth_speed = tf.zeros((num_ground_truths, 2), tf.float32)
  prediction_overlap_nlz = tf.zeros((num_predictions), tf.bool)

  config_str = config.SerializeToString()
  ap, aph, apl, pr, _, _, _ = py_metrics_ops.detection_metrics(
      prediction_frame_id=tf.cast(prediction_frame_id, tf.int64),
      prediction_bbox=tf.cast(prediction_bbox, tf.float32),
      prediction_type=tf.cast(prediction_type, tf.uint8),
      prediction_score=tf.cast(prediction_score, tf.float32),
      prediction_overlap_nlz=prediction_overlap_nlz,
      ground_truth_frame_id=tf.cast(ground_truth_frame_id, tf.int64),
      ground_truth_bbox=tf.cast(ground_truth_bbox, tf.float32),
      ground_truth_type=tf.cast(ground_truth_type, tf.uint8),
      ground_truth_difficulty=tf.cast(ground_truth_difficulty, tf.uint8),
      ground_truth_speed=ground_truth_speed,
      config=config_str)
  breakdown_names = config_util.get_breakdown_names_from_config(config)
  metric_values = {}
  for i, name in enumerate(breakdown_names):
    if name_filter is not None and name_filter not in name:
      continue
    # Average Precision
    metric_values['{}/LET-mAP'.format(name)] = ap[i]
    # Average Precision Weighted by Heading
    # metric_values['{}/LET-mAPH'.format(name)] = aph[i]
    # metric_values['{}/LET-mAPL'.format(name)] = apl[i]
    
  return metric_values


def parse_metrics_objects_binary_files(ground_truths_path, predictions_path):
  with tf.io.gfile.GFile(ground_truths_path, 'rb') as f:
    ground_truth_objects = metrics_pb2.Objects.FromString(f.read())
  with tf.io.gfile.GFile(predictions_path, 'rb') as f:
    predictions_objects = metrics_pb2.Objects.FromString(f.read())
  eval_dict = {
      'prediction_frame_id': [],
      'prediction_bbox': [],
      'prediction_type': [],
      'prediction_score': [],
      'ground_truth_frame_id': [],
      'ground_truth_bbox': [],
      'ground_truth_type': [],
      'ground_truth_difficulty': [],
  }

  # Parse and filter ground truths.
  for obj in ground_truth_objects.objects:
    # Ignore objects that are not in Cameras' FOV.
    if not obj.object.most_visible_camera_name:
      continue
    # Ignore objects that are fully-occluded to cameras.
    if obj.object.num_lidar_points_in_box == 0:
      continue
    # Fill in unknown difficulties.
    if obj.object.detection_difficulty_level == label_pb2.Label.UNKNOWN:
      obj.object.detection_difficulty_level = label_pb2.Label.LEVEL_2
    eval_dict['ground_truth_frame_id'].append(obj.frame_timestamp_micros)
    # Note that we use `camera_synced_box` for evaluation.
    ground_truth_box = obj.object.camera_synced_box
    eval_dict['ground_truth_bbox'].append(
        np.asarray([
            ground_truth_box.center_x,
            ground_truth_box.center_y,
            # ground_truth_box.center_z,
            ground_truth_box.length,
            ground_truth_box.width,
            # ground_truth_box.height,
            # ground_truth_box.heading,
        ], np.float32))
    eval_dict['ground_truth_type'].append(obj.object.type)
    eval_dict['ground_truth_difficulty'].append(
        np.uint8(obj.object.detection_difficulty_level))

  # Parse predictions.
  for obj in predictions_objects.objects:
    eval_dict['prediction_frame_id'].append(obj.frame_timestamp_micros)
    prediction_box = obj.object.box
    eval_dict['prediction_bbox'].append(
        np.asarray([
            prediction_box.center_x,
            prediction_box.center_y,
            # prediction_box.center_z,
            prediction_box.length,
            prediction_box.width,
            # prediction_box.height,
            # prediction_box.heading,
        ], np.float32))
    eval_dict['prediction_type'].append(obj.object.type)
    eval_dict['prediction_score'].append(obj.score)

  for key, value in eval_dict.items():
    eval_dict[key] = tf.stack(value)
  return eval_dict

In [None]:
WAYMO_OPEN_DATASET_DIR = '/data/fake/'
FAKE_GROUND_TRUTHS_BIN = (
    WAYMO_OPEN_DATASET_DIR + 'fake_ground_truths.bin')
FAKE_PREDICTIONS_BIN = (
    WAYMO_OPEN_DATASET_DIR + 'fake_predictions.bin')

eval_dict = parse_metrics_objects_binary_files(FAKE_GROUND_TRUTHS_BIN,
                                               FAKE_PREDICTIONS_BIN)

print(f"eval_dict")
for key in eval_dict.keys():
  print(f" {key}")
  print(f"    type: {type(eval_dict[key])}")
  print(f"    shape: {eval_dict[key].shape}")

In [None]:
metrics_dict = compute_let_detection_metrics(**eval_dict)
for key, value in metrics_dict.items():
  print(f'{key:<55}: {value}')

## Cancel resources

In [None]:
client.close()