In [2]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import numpy as np

mnist_path = './mnist.npz'
def load_mnist():
    
    path = mnist_path #放置mnist.py的目录。注意斜杠
    f = np.load(path)
    x_train, y_train = f['x_train'], f['y_train']
    x_test, y_test = f['x_test'], f['y_test']
    f.close()
    return ((x_train, y_train), (x_test, y_test))
(x_train, y_train), (x_test, y_test) = load_mnist()
x_train = np.reshape(x_train,[-1,784])
x_test = np.reshape(x_test,[-1,784])
print(x_train.shape)


(60000, 784)


In [4]:
class MINIST():
    def __init__(self,x,y,num,need_shuffle):
        sample_images = []
        sample_lables = []
        for i in range(num):
            sample_images.append(x[i])
            sample_lables.append(y[i])
        sample_images = np.vstack(sample_images)
        sample_lables = np.hstack(sample_lables)
        self.sample_images = sample_images
        self.sample_lables = sample_lables
        print(self.sample_images.shape)
        print(self.sample_lables.shape)
        self.num_examples = self.sample_images.shape[0]
        print(self.num_examples)
        self.need_shuffle = need_shuffle
        self.indicator = 0
        if self.need_shuffle:
            self.shuffle()
    
    def shuffle(self):
        p = np.random.permutation(self.num_examples)
        self.sample_images = self.sample_images[p]
        self.sample_lables = self.sample_lables[p]
    
    def next_batch(self,batch_size):
        end_indicator = self.indicator + batch_size
        if end_indicator > self.num_examples:
            if self.need_shuffle:
                self.shuffle()
                self.indicator = 0
                end_indicator = self.indicator + batch_size
            else:
                raise Exception("have no more examples")
        
        if end_indicator > self.num_examples:
            raise("the batch_size is too large")
        
        batch_date = self.sample_images[self.indicator:end_indicator]
        batch_labels = self.sample_lables[self.indicator:end_indicator]
        self._indicator = end_indicator
        return (batch_date , batch_labels)
       
    
train_data = MINIST(x_train,y_train,1000,True)



(1000, 784)
(1000,)
1000


In [26]:
if __name__ == '__main__':
    # 读入数据。
    with tf.name_scope("input"):
        # 训练图像的占位符。
        x = tf.placeholder(tf.float32, [None, 784])
        # 训练图像对应分类（标签）的占位符。
        y = tf.placeholder(tf.int64, [None])
        # 因为卷积要求输入的是4维数据，因此对形状进行转换。
        # NHWC(默认)   NCHW
        # N number样本的数量
        # H height图像的高度
        # W width图像的宽度v    
        # C channel图像的通道数
        x_image = tf.reshape(x, [-1, 28, 28, 1])

    # 卷积层1。
    with tf.name_scope("conv_layer1"):
        # 定义权重。（w就是滑动窗口）
        # 5, 5, 1, 32  =>  滑动窗口的高度，滑动窗口的宽度，输入通道数，输出通道数。
        w = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1), name="w")
        # 定义偏置。
        b = tf.Variable(tf.constant(0.0, shape=[32]), name="b")
        # 进行卷积计算。
        # strides=[1, 1, 1, 1] 步幅。针对输入的NHWC定义的增量。
        # padding： SAME 与VALID。SAME，只要滑动窗口不全移除输入区域就可以。
        # VALID，滑动窗口必须完全在输入区域之内。
        conv = tf.nn.bias_add(tf.nn.conv2d(x_image, w, strides=[1, 1, 1, 1], padding='SAME'), b, name="conv")
        # 使用激活函数进行激活。
        activation = tf.nn.relu(conv)
        # 池化操作。
        # ksize：池化的窗口。
        pool = tf.nn.max_pool(activation, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    # 卷积层2。
    with tf.name_scope("conv_layer2"):
        w = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1), name="w")
        b = tf.Variable(tf.constant(0.0, shape=[64]), name="b")
        conv = tf.nn.bias_add(tf.nn.conv2d(pool, w, strides=[1, 1, 1, 1], padding='SAME'), b, name="conv")
        activation = tf.nn.relu(conv)
        pool = tf.nn.max_pool(activation, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        
    # 全连接层1。
    with tf.name_scope("full_layer1"):
        # 7 * 7 * 64
        # 原始图像是28 * 28，经过卷积与激励后，没有改变，经过2 * 2池化后，变成 14 * 14。
        # 第一层卷积之后结果为14 * 14，经过第二层卷积与激励后，没有改变，经过2 * 2池化后，变成 7 * 7。
        # 第二层卷积之后，我们图像的形状为  NHWC  =>  [N, 7, 7, 64]
        # 4维变成2二维，将后面三维拉伸成为1维。  =》  [N, 7 * 7 * 64]
        w = tf.Variable(tf.truncated_normal([7 * 7 * 64, 1024], stddev=0.1), name="w")
        b = tf.Variable(tf.constant(0.0, shape=[1024]), name="b")
        # 将第二层卷积之后的结果转换成二维结构。
        pool = tf.reshape(pool, [-1, 7 * 7 * 64])
        activation = tf.nn.relu(tf.matmul(pool, w) + b)
        # 执行dropout（随机丢弃）
        keep_prob = tf.placeholder(tf.float32)
        # 进行随机丢弃，keep_prob指定神经元的保留率。
        drop = tf.nn.dropout(activation, keep_prob)
        
    # 全连接层2。
    with tf.name_scope("full_layer2"):
        w = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1), name="w")
        b = tf.Variable(tf.constant(0.0, shape=[10]), name="b")
        logits = tf.matmul(drop, w) + b
    
    # 损失值与准确率计算层。
    with tf.name_scope("compute"):                   
        # 计算损失值。
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits))
        tf.summary.scalar('loss',loss)
        
        train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
        # 计算准确率
        predict  = tf.argmax(logits ,1)
        correct = tf.equal(predict,y)
        
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
        tf.summary.scalar("accuracy",accuracy)
        merge = tf.summary.merge_all()

LOG_DIR = '.'
out_dir = os.path.join(LOG_DIR,'sun')
if not os.path.exists():
    mkdir(out_dir)

        
   

    
    batch_size = 32
    with tf.Session() as sess:
        # 对全局变量进行初始化。
        sess.run(tf.global_variables_initializer())
        # 可以尝试更大的次数，可以将准确率提升到99%以上。
        for i in range(1, 100):
            train_writer = tf.summary.FileWriter('log/train',sess.graph)
            test_writer = tf.summary.FileWriter('log/tets')
            
            batch_data , batch_labels = train_data.sample_images,train_data.sample_lables
            # 每100步报告一次在验证集上的准确度
            merge_op = [accuracy]
            should_merge = ((i+1)% 10 ==0)
            if should_merge:
                merge_op.append(merge)
            merge_op_result = sess.run(merge_op,
                    feed_dict={x: batch_data, y: batch_labels, keep_prob: 1.0})
                
                print("step {i}, training accuracy {train_accuracy * 100:.2f}%")
            train_writer.add_summery(merge_op_result[-1],i+1)


step {i}, training accuracy {train_accuracy * 100:.2f}%
step {i}, training accuracy {train_accuracy * 100:.2f}%
step {i}, training accuracy {train_accuracy * 100:.2f}%
step {i}, training accuracy {train_accuracy * 100:.2f}%


In [21]:
arr = np.array([[1,2,10],[4,5,6],[0,8,9]])
print(arr)
z = np.argmax(arr,0)
print(z)

[[ 1  2 10]
 [ 4  5  6]
 [ 0  8  9]]
[1 2 0]
