# Simple TFRecord validation

This notebook helps visualize simple stats about a TFRecords dataset to make sure it's formatted properly.

In [1]:
import os
import os.path as op
import tensorflow as tf
import tensorflow_data_validation as tfdv

# Define stats_options to only look at classes. Don't try to load raw image data here
stats_options = tfdv.StatsOptions(feature_whitelist=['image/object/class/text'])

#tfr_fpaths = [op.join(os.environ['DATA_DIR'], 'divot_detect', 'rocks_sample', 'data_rocks_rocks_v2.record')]
tfr_fpaths = [op.join(os.environ['DATA_DIR'], 'divot_detect', 'craters_sample', 'craters_moon_20200701.record')]

In [2]:
# Loop over each TFRecord filenames
for tfr_fpath in tfr_fpaths:
    print(f'Analyzing {tfr_fpath}')

    # Compute and visualize stats about number of unique classes 
    stats = tfdv.generate_statistics_from_tfrecord(data_location=tfr_fpath, stats_options=stats_options)
    raw_dataset = tf.data.TFRecordDataset(tfr_fpath)

    # This will automatically plot
    viz = tfdv.visualize_statistics(stats)
    
    # Get and print one example from the TFRecord dataset
    tf_iterator = tf.python_io.tf_record_iterator(tfr_fpath)
    example = tf_iterator.__next__()
    example_text = tf.train.Example.FromString(example)
    
    print('First TF example:')
    print(example_text.features)



Analyzing /Users/wronk/Data/divot_detect/craters_sample/craters_moon_20200701.record




Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


First TF example:
feature {
  key: "image/encoded"
  value {
    bytes_list {
      value: "\377\330\377\340\000\020JFIF\000\001\001\000\000\001\000\001\000\000\377\333\000C\000\010\006\006\007\006\005\010\007\007\007\t\t\010\n\014\024\r\014\013\013\014\031\022\023\017\024\035\032\037\036\035\032\034\034 $.\' \",#\034\034(7),01444\037\'9=82<.342\377\300\000\013\010\003\350\003\350\001\001\021\000\377\304\000\037\000\000\001\005\001\001\001\001\001\001\000\000\000\000\000\000\000\000\001\002\003\004\005\006\007\010\t\n\013\377\304\000\265\020\000\002\001\003\003\002\004\003\005\005\004\004\000\000\001}\001\002\003\000\004\021\005\022!1A\006\023Qa\007\"q\0242\201\221\241\010#B\261\301\025R\321\360$3br\202\t\n\026\027\030\031\032%&\'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz\203\204\205\206\207\210\211\212\222\223\224\225\226\227\230\231\232\242\243\244\245\246\247\250\251\252\262\263\264\265\266\267\270\271\272\302\303\304\305\306\307\310\311\312\322\323\324\325\326\327\330\331\332\341\3