Skip to content

Commit

Permalink
Modules with no inlining done have empty feature values and raise an …
Browse files Browse the repository at this point in the history
…IndexError in _get_feature_info(). This commit fixes that issue.
  • Loading branch information
kshiteejm committed Mar 5, 2022
1 parent cad0a9d commit ef77833
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions compiler_opt/tools/sparse_bucket_generator.py
Expand Up @@ -26,6 +26,7 @@

from absl import app
from absl import flags
from absl import logging

import numpy as np
import tensorflow as tf
Expand Down Expand Up @@ -126,12 +127,21 @@ def _generate_vocab(feature_values_arrays, feature_name):

def main(_) -> None:
"""Generate num_buckets quantiles for each feature."""
tf.io.gfile.makedirs(FLAGS.output_dir)
dataset = tf.data.Dataset.list_files(FLAGS.input)
dataset = tf.data.TFRecordDataset(dataset)

sequence_features = {}
for raw_example in dataset.take(1):
sequence_features = _get_feature_info(raw_example)
for raw_example in dataset:
try:
sequence_features = _get_feature_info(raw_example)
logging.info('Found valid sequence_features dict: %s', sequence_features)
break
except Exception: # pylint: disable=broad-except
# modules with no inlining done have empty feature values and
# raise an IndexError.
# continue until an inlined module with non-empty feature values is found.
continue

parser_fn = create_tfrecord_parser_fn(sequence_features)
dataset = dataset.map(parser_fn, num_parallel_calls=tf.data.AUTOTUNE)
Expand Down

0 comments on commit ef77833

Please sign in to comment.