In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras


print(tf.test.is_gpu_available())
print(tf.__version__)
print(sys.version_info)

for module in mpl,np,pd,sklearn,tf,keras:
    print(module.__name__,module.__version__)

True
2.0.0
sys.version_info(major=3, minor=7, micro=7, releaselevel='final', serial=0)
matplotlib 3.2.2
numpy 1.18.5
pandas 1.0.5
sklearn 0.21.2
tensorflow 2.0.0
tensorflow_core.keras 2.2.4-tf


In [3]:
# tfrecord 文件格式
# -->tf.train.Example
#   -> tf.train.Features -> {"key": tf.train.Feature}
#         -> tf.train.Feature -> tf.train.ByteList/FloatList/Int64List

favorites_books = [name.encode("utf-8") for name in ["machine learning", "cc150"]]
# favorites_books = [name for name in ["machine learning", "cc150"]]
favorites_books_bytelist = tf.train.BytesList(value = favorites_books)



hours_floatlist = tf.train.FloatList(value = [15.5,9.5,7.0,8.0])
print(hours_floatlist)

age_int64list = tf.train.Int64List(value = [42])
print(age_int64list)

features = tf.train.Features(
    feature = {
        "favorite_books":tf.train.Feature(
            bytes_list = favorites_books_bytelist
        ),
        "hours":tf.train.Feature(
            float_list = hours_floatlist
        ),
        "age":tf.train.Feature(int64_list=age_int64list),
    }
)
print(features)

value: 15.5
value: 9.5
value: 7.0
value: 8.0

value: 42

feature {
  key: "age"
  value {
    int64_list {
      value: 42
    }
  }
}
feature {
  key: "favorite_books"
  value {
    bytes_list {
      value: "machine learning"
      value: "cc150"
    }
  }
}
feature {
  key: "hours"
  value {
    float_list {
      value: 15.5
      value: 9.5
      value: 7.0
      value: 8.0
    }
  }
}



In [None]:
example = tf.train.Example(features = features)
print(example)

In [None]:
# 序列化
serialize_exmaple = example.SerializeToString()
print(serialize_exmaple)

In [None]:
output_dir = "tfrecord_basic"

if not os.path.exists(output_dir):
    os.mkdir(output_dir)
filename = "test.rfrecords"
filename_fullpath = os.path.join(output_dir,filename)
with tf.io.TFRecordWriter(filename_fullpath) as writer:
    for i in range(3):
        writer.write(serialize_exmaple)

In [None]:
dataset = tf.data.TFRecordDataset([filename_fullpath])

In [None]:
for serialize_exmaple_tensor in dataset:
    print(serialize_exmaple_tensor)

In [None]:
expected_features ={
    
    "favorite_books":tf.io.VarLenFeature(dtype=tf.string),
    "hours":tf.io.VarLenFeature(dtype=tf.float32),
    "age":tf.io.FixedLenFeature([],dtype=tf.int64)
}
dataset = tf.data.TFRecordDataset([filename_fullpath])
for serialize_exmaple_tensor in dataset:
    example = tf.io.parse_single_example(
        serialize_exmaple_tensor,
        expected_features
    )
    books =tf.sparse.to_dense(example["favorite_books"],default_value=b"")
    for book in books:
        print(book.numpy().decode("utf-8"))
    

In [None]:
# 把tfrecords文件存成压缩文件
filename_fullpath_zip = filename_fullpath + ".zip"
options = tf.io.TFRecordOptions(compression_type="GZIP")
 
with tf.io.TFRecordWriter(filename_fullpath_zip,options) as writer:
    for i in range(3):
        writer.write(serialize_exmaple)

In [None]:
# 读取tfrecords压缩文件
dataset_zip = tf.data.TFRecordDataset([filename_fullpath_zip],compression_type="GZIP")
for serialize_exmaple_tensor in dataset_zip:
    example = tf.io.parse_single_example(
        serialize_exmaple_tensor,
        expected_features
    )
    books =tf.sparse.to_dense(example["favorite_books"],default_value=b"")
    for book in books:
        print(book.numpy().decode("utf-8"))
    