In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

## 4.1 处理CIFAR-10数据集

In [4]:
LABEL_BYTES = 1
IMAGE_SIZE = 32
IMAGE_DEPTH = 3

# 图片数据为 32*32*3 = 3072字节

IMAGE_BYTES = IMAGE_SIZE **2 * IMAGE_DEPTH

# 共有十类标签
NUM_CLASSES = 10

In [7]:
def read_cifar10(data_file, batch_size):
    """从CIFAR-10数据文件中读取批数据
    Parameters:
      data_file: CIFAR-10 数据文件
      batch_size: 批数据大小
      
    Returns:
      images: 形如[batch_size, image_size, image_size, 3]的图像数据
      labels: 形如[batch_size, NUM_CLASSES]的标签批数据
    
    """
    
    # 一张图片3072字节,label 1字节，共3073字节
    record_bytes = LABEL_BYTES + IMAGE_BYTES
    # 创建文件名列表
    data_files = tf.gfile.Glob(data_file)
    # 创建文件名队列,shuffle=True表示
    file_queue = tf.train.string_input_producer(data_files, shuffle=True)
    # 创建二进制文件对应的Reader实例，按照记录大小从文件名队列中读取样例
    reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
    _, value = reader.read(file_queue)
    
    # 将样例拆分为类别标签和图片
    record = tf.reshape(tf.decode_raw(value, tf.uint8), [record_bytes])
    label = tf.cast(tf.slice(record, [LABEL_BYTES]), tf.int32)
    # 将字符串转为图片张量
    depth_major = tf.reshape(tf.slice(record, [LABEL_BYTES], [IMAGE_BYTES]),
                            [IMAGE_DEPTH, IMAGE_SIZE, IMAGE_SIZE])
    # 转化为height*width*depth
    image = tf.cast(tf.transpose(depth_major, [1,2,0]), tf.float32)
    
    # 创建样例队列
    example_queue = tf.RandomShuffleQueue(
            capacity=16*batch_size,
            min_after_dequeue=8*batch_size,
            dtypes=[tf.float32, tf.int32],
            shapes=[[IMAGE_SIZE, IMAGE_SIZE, IMAGE_DEPTH], [1]])
    num_threds = 4
    
    # 创建样例队列的入队操作
    example_enqueue_op = example_queue.enqueue([image, label])
    # 将定义的4个现成全部添加到queue runner中
    tf.train.add_queue_runner(tf.train.queue_runner(
    example_queue, [example_enqueue_op] * num_threds))
    
    # 读取图片和标签
    images, labels = example_queue.dequeue_many(batch_size)
    labels = tf.reshape(labels, [batch_size, 1])
    indices = tf.reshape(tf.range(0, batch_size, 1), [batch_size, 1])
    labels = tf.sparse_to_dense(
            tf.concat(values=[0, labels], axis=1),
            [batch_size, NUM_CLASSES], 1.0, 0.0)
    
    # 展示images 和labels 的数据结构
    assert len(imgaes.get_shape()) == 4
    assert images.get_shape()[0] == batch_size
    assert images.get_shape()[-1] == 3
    assert len(labels.get_shape()) == 2
    assert labels.get_shape()[0] == batch_size
    assert labels.get_shape()[1] == NUM_CLASSES
    
    return images, labels

In [None]:
BATCH_SIZE = 24
x = tf.placeholder(tf.float32,[BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, IMAGE_DEPTH],'x')
y = tf.placeholder(tf.int32, [BATCH_SIZE, 1],'y')

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    batch_xs, batch_ys = read_cifar10('/Users/yanghao/github/data/dl_data/cifar-10-python.tar.gz',BATCH_SIZE)
    sess.run(train_op)