In [1]:
# https://medium.com/mostly-ai/tensorflow-records-what-they-are-and-how-to-use-them-c46bc4bbb564

In [2]:
import tensorflow as tf

In [3]:
# data dictionary from the example, a single row with two features ('Movie', 'Movie Ratigns')
# that present multiple values
# this characteristic is not required for supervised-learning databases
data = {
    'Age': 29,
    'Movie': ['The Shawshank Redemption', 'Fight Club'],
    'Movie Ratings': [9.0, 9.7],
    'Suggestion': 'Inception',
    'Suggestion Purchased': 1.0,
    'Purchase Price': 9.99
}

print(data)

{'Age': 29, 'Movie': ['The Shawshank Redemption', 'Fight Club'], 'Movie Ratings': [9.0, 9.7], 'Suggestion': 'Inception', 'Suggestion Purchased': 1.0, 'Purchase Price': 9.99}


In [4]:
# from the data dictionary to the TFRecord example
example = tf.train.Example(
    # features within the example
    features=tf.train.Features(
        # individual feature definition
        feature={'Age': tf.train.Feature(int64_list=tf.train.Int64List(value=[data['Age']])),
                 'Movie': tf.train.Feature(bytes_list=tf.train.BytesList(value=[m.encode('utf-8') for m in data['Movie']])),
                 'Movie Ratings': tf.train.Feature(float_list=tf.train.FloatList(value=data['Movie Ratings'])),
                 'Suggestion': tf.train.Feature(bytes_list=tf.train.BytesList(value=[data['Suggestion'].encode('utf-8')])),
                 'Suggestion Purchased': tf.train.Feature(float_list=tf.train.FloatList(value=[data['Suggestion Purchased']])),
                 'Purchase Price': tf.train.Feature(float_list=tf.train.FloatList(value=[data['Purchase Price']]))
                }
    )
)

print(example)

features {
  feature {
    key: "Age"
    value {
      int64_list {
        value: 29
      }
    }
  }
  feature {
    key: "Movie"
    value {
      bytes_list {
        value: "The Shawshank Redemption"
        value: "Fight Club"
      }
    }
  }
  feature {
    key: "Movie Ratings"
    value {
      float_list {
        value: 9.0
        value: 9.699999809265137
      }
    }
  }
  feature {
    key: "Purchase Price"
    value {
      float_list {
        value: 9.989999771118164
      }
    }
  }
  feature {
    key: "Suggestion"
    value {
      bytes_list {
        value: "Inception"
      }
    }
  }
  feature {
    key: "Suggestion Purchased"
    value {
      float_list {
        value: 1.0
      }
    }
  }
}



In [6]:
# write TFRecord file
with tf.python_io.TFRecordWriter('../data/tfrecord/customer_1.tfrecord') as writer:
    writer.write(example.SerializeToString())

In [7]:
# read and print data:
# ToDo: the following line might need to be changed when running a script
sess = tf.InteractiveSession()

# read TFRecord file
reader = tf.TFRecordReader()
filename_queue = tf.train.string_input_producer(['../data/tfrecord/customer_1.tfrecord'])

_, serialized_example = reader.read(filename_queue)

# define features
read_features = {
    'Age': tf.FixedLenFeature([], dtype=tf.int64),
    # the number of movies the suggestion is based on may vary
    'Movie': tf.VarLenFeature(dtype=tf.string),
    # and so does the number of ratings assigned to the movies
    'Movie Ratings': tf.VarLenFeature(dtype=tf.float32),
    'Suggestion': tf.FixedLenFeature([], dtype=tf.string),
    'Suggestion Purchased': tf.FixedLenFeature([], dtype=tf.float32),
    'Purchase Price': tf.FixedLenFeature([], dtype=tf.float32)}

# extract features from serialized data
read_data = tf.parse_single_example(serialized=serialized_example,
                                    features=read_features)

# important! many tf.train functions use tf.train.QueueRunner,
# so we need to start it before we read
tf.train.start_queue_runners(sess)

# Print features
for name, tensor in read_data.items():
    print('{}: {}'.format(name, tensor.eval()))

Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.TFRecordDataset`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(string_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To constru