# Reading TFRecords


Our TFRecords have 3 fields:
1. user_reviews - all of the user reviews concatenated and converted using the word_2_idx dictionary to integers
2. item_reviews - all of the item reviews concatenated and converted using the word_2_idx dictionary to integers
3. rating - The rating that user gave that item

In [1]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [46]:
def get_pad_fn(max_len, fill_val):
    def pad_and_slice(tensor):
        padded_vec = tf.pad(tensor, [[0, max_len]], constant_values=tf.constant(fill_val, tf.int64))
        return tf.slice(padded_vec, [0], [max_len])
    return pad_and_slice

def get_parse_fn(pad_fn):
    def parse_fn(record):
        features = {
                "user_review": tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
                "item_review": tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
                "rating": tf.FixedLenFeature([1], tf.float32)
            }
        parsed_features = tf.parse_single_example(record, features)
        return pad_fn(parsed_features["user_review"]), pad_fn(parsed_features["item_review"]), parsed_features["rating"]
    return parse_fn

In [47]:
dataset = tf.data.TFRecordDataset("data/train.tfrecords")
dataset = dataset.map(get_parse_fn(get_pad_fn(400, -1)))
dataset = dataset.batch(16)
iterator = dataset.make_one_shot_iterator()
data_point = iterator.get_next()
data_point[0].eval(session=tf.Session())

array([[32238, 16868, 56211, ...,    -1,    -1,    -1],
       [ 9094, 17991, 47869, ...,    -1,    -1,    -1],
       [32238, 50351,  9094, ...,    -1,    -1,    -1],
       ...,
       [51169, 48041, 29945, ...,    -1,    -1,    -1],
       [49920, 19882, 37611, ..., 47629, 35047, 12302],
       [ 5663, 15128,  8785, ...,    -1,    -1,    -1]])