
Copyright 2022 Google LLC.

Licensed under the Apache License, Version 2.0 (the "License");

In [1]:
#@title License
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#  Instructions to use PRIME dataset parser

This colab provides the details to parse the
[PRIME](https://arxiv.org/abs/2110.11346) dataset available on Google Cloud Storage: [gs://gresearch/prime](https://console.cloud.google.com/storage/browser/gresearch/prime)

You may download the dataset either by using the Google Cloud Storage web interface or using gsutil:

```
gsutil cp -r gs://gresearch/prime /tmp/prime/
```

The PRIME dataset contains **10** microarchitectural parameters, including number of compute units, on-chip memory size, and number of SIMD units, along with the Runtime (millisecond) and Chip Area Usage (mm$^2$) for all the nine applications that we evaluated in our paper.

The data in this dataset are collected using an industry-grade cycle-accurate simulator. 

This dataset contains both infeasible and feasible data points as described in
[PRIME](https://arxiv.org/abs/2110.11346). The descriptors of the collected
data are presented in the table below (Table 1).

|                  | # of Infeasible | # of Feasible | Max Runtime (ms) | Min Runtime (ms) | Average Runtime (ms) |
|------------------|-----------------|---------------|------------------|------------------|----------------------|
| **MobileNetEdgeTPU** |          384355 |        115711 |         16352.26 |           252.22 |               529.13 |
| **MobilenetV2**      |          744718 |        255414 |          7398.13 |           191.35 |               375.05 |
| **MobilenetV3**      |          797460 |        202672 |          7001.46 |           405.19 |               993.75 |
| **M4**               |          791984 |        208148 |         35881.35 |           335.59 |               794.33 |
| **M5**               |          698618 |        301514 |         35363.55 |           202.55 |               440.52 |
| **M6**               |          756468 |        243664 |          4236.90 |           127.79 |               301.74 |
| **UNet**             |          449578 |         51128 |        124987.51 |           610.96 |              3681.75 |
| **T-RNN Dec**        |          405607 |         94459 |          4447.74 |           128.05 |               662.44 |
| **T-RNN Enc**        |          410933 |         88880 |          5112.82 |           127.97 |               731.20 |

In [2]:
#@title Listing the data for the studied application
!gsutil ls gs://gresearch/prime

gs://gresearch/prime/MobileNetEdgeTPU/
gs://gresearch/prime/MobilenetEdgeTPU/
gs://gresearch/prime/MobilenetV2/
gs://gresearch/prime/MobilenetV3/
gs://gresearch/prime/m4/
gs://gresearch/prime/m5/
gs://gresearch/prime/m6/
gs://gresearch/prime/t_rnn_dec/
gs://gresearch/prime/t_rnn_enc/
gs://gresearch/prime/u-net/


In [12]:
#@title Importing the necessary libraries
import tensorflow as tf
import numpy as np

In [6]:
#@title APIs for parsing PRIME datasets
def parse_prime_tfrecords(proto):
  prime_feature_description = {  
    'param_1': tf.io.FixedLenFeature([], tf.float32),
    'param_2': tf.io.FixedLenFeature([], tf.float32),
    'param_3': tf.io.FixedLenFeature([], tf.float32),
    'param_4': tf.io.FixedLenFeature([], tf.float32),
    'param_5': tf.io.FixedLenFeature([], tf.float32),
    'param_6': tf.io.FixedLenFeature([], tf.float32),
    'param_7': tf.io.FixedLenFeature([], tf.float32),
    'param_8': tf.io.FixedLenFeature([], tf.float32),
    'param_9': tf.io.FixedLenFeature([], tf.float32),
    'param_10': tf.io.FixedLenFeature([], tf.float32),
    'runtime': tf.io.FixedLenFeature([], tf.float32),
    'area': tf.io.FixedLenFeature([], tf.float32),
    'infeasible':tf.io.FixedLenFeature([], tf.int64),
  }
  return tf.io.parse_single_example(proto, prime_feature_description)

In [10]:
#@title Parsing the dataset for the studied application
model_name = 'm4' #@param ["MobilenetEdgeTPU", "MobilenetV2", "MobilenetV3", "m4", "m5", "m6", "t_rnn_dec", "t_rnn_enc", "u-net"]
filenames = tf.io.gfile.glob(f'gs://gresearch/prime/{model_name}/*.tfrecord')
raw_dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=64)
parsed_dataset = raw_dataset.map(parse_prime_tfrecords)

In [13]:
#@title Reproducing the data in the Table 1
number_of_infeasibles = 0
number_of_feasibles = 0
latency = []
# Parsing the whole dataset for an application may take 15-20 minutes.
for p in parsed_dataset:
  if p['infeasible'] == 0:
    number_of_feasibles+=1
    latency.append(p['runtime'])
  else:
    number_of_infeasibles+=1
print(f'{number_of_infeasibles},{number_of_feasibles},{max(latency)},{min(latency)},{np.average(latency)}')

791984,208148,35881.3515625,335.59100341796875,794.33349609375
