# Simple TFRecord validation

This notebook helps visualize simple stats about a TFRecords dataset to make sure it's formatted properly.

In [1]:
import os
import os.path as op
import tensorflow as tf
import tensorflow_data_validation as tfdv

# Define stats_options to only look at classes. Don't try to load raw image data here
stats_options = tfdv.StatsOptions(feature_whitelist=['image/object/class/text'])

#tfr_fpaths = [op.join(os.environ['DATA_DIR'], 'divot_detect', 'rocks_sample', 'data_rocks_rocks_v2.record')]
#tfr_fpaths = [op.join(os.environ['DATA_DIR'], 'divot_detect', 'craters_sample', 'craters_moon_20200706.record')]
tfr_fpaths = [op.join(os.environ['DATA_DIR'], 'divot_detect', 'craters_sample', 'harish_labels_v3', 'lroc-nac', 'tf.record')]


In [2]:
def print_example_info(example_dict):
    """Print out some high level information about a TFRecord example"""
    
    
    print(f'\nImage file name: {example_dict["image/filename"].bytes_list.value[0]}')
    print(f'Beginning of image byte str: {example_dict["image/encoded"].bytes_list.value[0][:20]}')
    print(f'Image dimensions (w x h): {example_dict["image/width"].int64_list.value} x '
          f'{example_dict["image/height"].int64_list.value}')

    print(f'Class labels: {example_dict["image/object/class/text"].bytes_list}')

In [5]:
# Loop over each TFRecord filenames
for tfr_fpath in tfr_fpaths:
    print(f'Analyzing {tfr_fpath}')

    # Compute and visualize stats about number of unique classes 
    stats = tfdv.generate_statistics_from_tfrecord(data_location=tfr_fpath, stats_options=stats_options)
    raw_dataset = tf.data.TFRecordDataset(tfr_fpath)

    # This will automatically plot
    viz = tfdv.visualize_statistics(stats)
    
    # Get and print examples from the TFRecord dataset
    tf_iterator = tf.compat.v1.io.tf_record_iterator(tfr_fpath)
    example_dicts = [dict(tf.train.Example.FromString(ex).features.feature) 
                     for ex in tf_iterator]
    for example_dict in example_dicts:
        print_example_info(example_dict)


Analyzing /Users/wronk/Data/divot_detect/craters_sample/harish_labels_v3/lroc-nac/tf.record



Image file name: b'./images/_static_NAC_ROI_ALPHNSUSLOA_E129S3581_tile_19600_5250_0_0.png'
Beginning of image byte str: b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00'
Image dimensions (w x h): [350] x [350]
Class labels: value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
value: "crater"
v

In [6]:
example_dict.keys()

dict_keys(['image/filename', 'image/object/bbox/xmax', 'image/object/mask', 'image/width', 'image/format', 'image/object/class/label', 'image/height', 'image/object/class/text', 'image/object/bbox/ymin', 'image/source_id', 'image/encoded', 'image/object/bbox/ymax', 'image/object/bbox/xmin'])

In [7]:
example_dict

{'image/filename': bytes_list {
   value: "./images/_static_M1348265832LE_tile_0_700_0_0.png"
 },
 'image/object/bbox/xmax': float_list {
   value: 0.09428571164608002
   value: 0.16285714507102966
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.1542857140302658
   value: 0.1542857140302658
   value: 0.1542857140302658
   value: 0.2314285784959793
   value: 0.2314285784959793
   value: 0.2314285784959793
   value: 0.2314285784959793
   value: 0.2314285784959793
   value: 0.2314285784959793
   value: 0.2314285784959793
  

In [8]:
example_dict

{'image/filename': bytes_list {
   value: "./images/_static_M1348265832LE_tile_0_700_0_0.png"
 },
 'image/object/bbox/xmax': float_list {
   value: 0.09428571164608002
   value: 0.16285714507102966
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.25999999046325684
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.27714285254478455
   value: 0.1542857140302658
   value: 0.1542857140302658
   value: 0.1542857140302658
   value: 0.2314285784959793
   value: 0.2314285784959793
   value: 0.2314285784959793
   value: 0.2314285784959793
   value: 0.2314285784959793
   value: 0.2314285784959793
   value: 0.2314285784959793
  

TypeError: object of type 'FloatList' has no len()