Skip to content

Commit

Permalink
updates to imagenet_predictions:
Browse files Browse the repository at this point in the history
- add placer dependencies to BUILD to run with cns data
- restructure workflow so test data actually used
- some small cleanup

PiperOrigin-RevId: 291010804
  • Loading branch information
Sara Hooker authored and Copybara-Service committed Jan 22, 2020
1 parent fb3f2d8 commit cdc1496
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 58 deletions.
123 changes: 75 additions & 48 deletions pruning_identified_exemplars/pie_dataset_gen/imagenet_predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,18 @@
flags.DEFINE_string("data_directory", "",
"The location of the tfrecords used for training.")
flags.DEFINE_integer("batch_size", 1, "Batch size for creating new dataset.")
flags.DEFINE_string("output_path", "",
flags.DEFINE_string("output_path", "/tmp/output/",
"Directory path to save the csv data to.")
flags.DEFINE_string("ckpt_dir", "", "Ckpt to extract predictions from.")
flags.DEFINE_enum("mode", "eval", ("eval", "train"),
"Mode designated as train or eval.")
flags.DEFINE_float("label_smoothing", 0.1,
"Relax confidence in the labels by (1-label_smoothing).")
# set this flag to true to do a test run of this code with synthetic data
flags.DEFINE_bool("test_small_sample", False,
flags.DEFINE_bool("test_small_sample", True,
"Boolean for whether to test internally.")


imagenet_params = {
"num_eval_images": 50000,
"num_label_classes": 1000,
Expand All @@ -70,7 +71,8 @@ def predictions_from_checkpoint_dir(directory_path, filename, params,
global_step: Training Step at which eval metrics were stored.
Returns:
Pandas dataframe with predictions for all images on specified shard.
When run on full dataset (test_small_sample=False) returns a pandas
dataframe with predictions for all images on specified shard.
Raises:
ValueError when checkpoint is not stored in the correct format.
Expand Down Expand Up @@ -104,67 +106,92 @@ def predictions_from_checkpoint_dir(directory_path, filename, params,

predictions = model_utils.initiate_task_helper(
ckpt_directory=ckpt_directory, model_params=params)
df = pd.DataFrame.from_records(list(predictions))
df["exp"] = split[8]
df["split"] = split[9]
df["filename"] = filename
df["pruning_method"] = split[11]
df["fraction_pruned"] = split[12]
df["start_pruning_step"] = split[13]
df["end_pruning_step"] = split[14]
df["pruning_frequency"] = split[15]
df["global_step"] = global_step
return df
if not FLAGS.test_small_sample:
df = pd.DataFrame.from_records(list(predictions))
df["exp"] = split[8]
df["split"] = split[9]
df["filename"] = filename
df["pruning_method"] = split[11]
df["fraction_pruned"] = split[12]
df["start_pruning_step"] = split[13]
df["end_pruning_step"] = split[14]
df["pruning_frequency"] = split[15]
df["global_step"] = global_step
return df


def main(argv):
del argv # Unused.

if FLAGS.mode == "eval":
file_path = "validation*"
file_path = "val*"
else:
file_path = "train*"

data_directory = os.path.join(FLAGS.data_directory, file_path)
filenames = tf.gfile.Glob(data_directory)
if FLAGS.test_small_sample:
filenames = []
data_directory = FLAGS.data_directory
else:
data_directory = os.path.join(FLAGS.data_directory, file_path)

shard_count = 0
for filename in sorted(filenames):
shard = os.path.basename(filename)
dest_dir = os.path.join(FLAGS.output_path, "imagenet",
"predictions_dataframe", FLAGS.mode, shard)
if not tf.gfile.IsDirectory(dest_dir):
tf.gfile.MkDir(dest_dir)

params = imagenet_params
# shuffle is set to false to prevent output ordering of images
update_params = {
"sloppy_shuffle": False,
"num_cores": 8,
"base_learning_rate": 0.1,
"weight_decay": 1e-4,
"lr_schedule": [(1.0, 5), (0.1, 30), (0.01, 60), (0.001, 80)],
"momentum": 0.9,
"data_format": "channels_last",
"output_dir": dest_dir,
"label_smoothing": FLAGS.label_smoothing,
}
params.update(update_params)
global_step = int(os.path.basename(FLAGS.ckpt_dir).split("-")[1])
filenames = tf.io.gfile.glob(data_directory)

ckpt_directory = FLAGS.ckpt_dir

params = imagenet_params
# shuffle is set to false to prevent output ordering of images
update_params = {
"mode": FLAGS.mode,
"sloppy_shuffle": False,
"num_cores": 8,
"base_learning_rate": 0.1,
"weight_decay": 1e-4,
"lr_schedule": [(1.0, 5), (0.1, 30), (0.01, 60), (0.001, 80)],
"momentum": 0.9,
"data_format": "channels_last",
"label_smoothing": FLAGS.label_smoothing,
}
params.update(update_params)
global_step = int(os.path.basename(ckpt_directory).split("-")[1])

if FLAGS.test_small_sample:
logging.info(
"test_small_sample is set to True, ignoring data directory."
)
params["output_dir"] = FLAGS.output_path
params["num_eval_images"] = 10
df = predictions_from_checkpoint_dir(
directory_path=data_directory,
filename=filename,
filename=filenames,
params=params,
ckpt_directory=FLAGS.ckpt_dir,
ckpt_directory=ckpt_directory,
global_step=global_step)
timestamp = str(time.time())
output_path = os.path.join(dest_dir,
"predictions_dataframe_{}.csv".format(timestamp))
with tf.gfile.Open(output_path, "w") as f:
df.to_csv(f)
shard_count += 1
logging("number of shards processed: ", shard_count)
logging.info("testing workflow complete")
else:
shard_count = 0
for filename in sorted(filenames):
shard = os.path.basename(filename)
dest_dir = os.path.join(FLAGS.output_path, "imagenet",
"predictions_dataframe", FLAGS.mode, shard)
if not tf.gfile.IsDirectory(dest_dir):
tf.gfile.MkDir(dest_dir)

params["output_dir"] = dest_dir

df = predictions_from_checkpoint_dir(
directory_path=data_directory,
filename=filename,
params=params,
ckpt_directory=ckpt_directory,
global_step=global_step)
timestamp = str(time.time())
output_path = os.path.join(
dest_dir, "predictions_dataframe_{}.csv".format(timestamp))
with tf.gfile.Open(output_path, "w") as f:
df.to_csv(f)
shard_count += 1
print("number of shards processed: ", shard_count)


if __name__ == "__main__":
Expand Down
10 changes: 5 additions & 5 deletions pruning_identified_exemplars/utils/data_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def parser_tfrecords(serialized_example):
"""Parses a single tf.Example into image and label tensors."""
data = {}
if params['test_small_sample']:
print('test small sample')
data['image_raw'] = serialized_example
data['label'] = tf.constant(0, tf.int32)
data['human_label'] = tf.constant('human_label', tf.string)
Expand All @@ -57,16 +56,17 @@ def parser_tfrecords(serialized_example):
tf.reshape(features['image/class/label'], shape=[]),
dtype=tf.int32) - 1
image = tf.image.decode_jpeg(features['image/encoded'], 3)
# training is set to false in prediction mode
image = preprocessing_helper.preprocess_image(
image=image, image_size=224, is_training=False)

human_label = tf.cast(
tf.reshape(features['image/class/text'], shape=[]), dtype=tf.string)
print('human_label')
if params['task'] == 'imagenet_training':
# training is set to false in prediction mode
image = preprocessing_helper.preprocess_image(
image=image, image_size=224, is_training=True)
else:
# training is set to false in prediction mode
image = preprocessing_helper.preprocess_image(
image=image, image_size=224, is_training=False)

if params['task'] == 'pie_dataset_gen':
data['image_raw'] = image_bytes
Expand Down
15 changes: 10 additions & 5 deletions pruning_identified_exemplars/utils/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,17 @@ def model_fn_w_pruning(features, labels, mode, params):
A EstimatorSpec for the model
"""

images = features["image_raw"]
labels = features["label"]
task = params["task"]

if params["task"] in [
"pie_dataset_gen", "imagenet_predictions", "robustness_imagenet_c",
"robustness_imagenet_a", "ckpt_prediction"
if task in ["pie_dataset_gen", "imagenet_predictions"]:
images = features[0]
labels = features[1]
else:
images = features

if task in [
"pie_dataset_gen", "robustness_imagenet_c", "robustness_imagenet_a",
"ckpt_prediction"
]:
human_labels = features["human_label"]

Expand Down

0 comments on commit cdc1496

Please sign in to comment.