In [None]:
#训练神经网络的过程大概可以分为以下三个步骤：
# 1 定义神经网络的结构和前向传播的输出结果
# 2 定义损失函数以及选择反响传播优化的算法
# 3 生成会话并且在训练数据上反复运行反向传播优化算法。

In [39]:
import tensorflow as tf
from numpy.random import RandomState
print(tf.__version__)

1.12.0


In [40]:
#定义训练数据的大小
batch_size = 64

#定义神经网络的参数
w1 = tf.Variable(tf.random_normal((2, 3), stddev=1, seed=1))
w2 = tf.Variable(tf.random_normal((3, 1), stddev=1, seed=1))

#在shape的一个维度使用None可以方便使用不同的batch大小。
#在训练时需要把数据分成比较小的batch，但是在测试时，可以一次性使用全部的数据。
#当数据集比较小时，这样比较方便测试，但数据集比较大时，将大量数据放入一个batch可能会导致内存溢出。
x = tf.placeholder(tf.float32, shape=(None, 2), name='x-input')
y_ = tf.placeholder(tf.float32, shape=(None, 1), name='y-input')

#定义神经网络的前向传播过程。
#a = tf.matmul(x, w1)
#y = tf.matmul(a, w2)

#tf.nn.relu 为非线性激活函数
a = tf.nn.relu(tf.matmul(x, w1), name=None)
y = tf.nn.relu(tf.matmul(a, w2), name=None)

#定义损失函数和反响传播的算法。
y = tf.sigmoid(y)
cross_entropy = -tf.reduce_mean(
    y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))
    +(1-y) * tf.log(tf.clip_by_value(1-y, 1e-10, 1.0))
)
train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

In [41]:
#通过随机数生成一个模拟数据集。
rdm = RandomState(1)
dataset_size = 12800000

X = rdm.rand(dataset_size, 2)
#定义规则来给出样本的标签，在这里所有x1+x2<1的样例都被认为是正样本。其他为负样本。
#大部分解决分类问题的神经网络都会采用0和1的表示方法来表示正样本和负样本。
Y = [[int(x1+x2 < 1)] for (x1, x2) in X]


In [42]:
#创建一个会话来运行tensorflow程序。
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    
    #初始化变量
    sess.run(init_op)
    
    #训练前的神经网络的参数
    print(sess.run(w1))
    print(sess.run(w2))
    
    #设定训练的轮数。
    STEPS = 100
    for i in range(STEPS):
        #每次选取batch_size个样本进行训练。
        start = (i * batch_size) % dataset_size
        end = min(start+batch_size, dataset_size)
        
        #通过选取的样本训练神经网络并更新参数。
        sess.run(train_step, feed_dict={x: X[start:end], y_: Y[start:end]})
        if i % 1000 == 0:
            #每隔一段时间计算在所有数据上的交叉熵并输出。
            total_cross_entropy = sess.run(cross_entropy, feed_dict={x: X, y_: Y})
            print(i, total_cross_entropy)
    #在训练后神经网络的参数值。
    print(sess.run(w1))
    print(sess.run(w2))

[[-0.8113182   1.4845988   0.06532937]
 [-2.4427042   0.0992484   0.5912243 ]]
[[-0.8113182 ]
 [ 1.4845988 ]
 [ 0.06532937]]
0 0.5186172
[[-0.8113182   1.5861262   0.18124548]
 [-2.4427042   0.20038475  0.7067059 ]]
[[-0.8113182 ]
 [ 1.5871149 ]
 [ 0.17046751]]


In [60]:
import tensorflow as tf
from numpy.random import RandomState

batch_size = 8

x = tf.placeholder(tf.float32, shape=(None, 2), name='x-input')
y_ = tf.placeholder(tf.float32, shape=(None, 1), name='y-input')

w1 = tf.Variable(tf.random_normal((2, 1), stddev=1, seed=1))

y = tf.matmul(x, w1)

loss_less = 10
loss_more = 1

loss = tf.reduce_sum(tf.where(tf.greater(y, y_), (y - y_) * loss_more, (y_ - y) * loss_less))

train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

rdm = RandomState(1)
dataset_size = 128

X = rdm.rand(dataset_size, 2)
Y = [[x1 + x2 + rdm.rand()/10.0-0.05] for (x1, x2) in X]

with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    STEPS = 5000
    for i in range(STEPS):
        start = (i * batch_size) % dataset_size
        end = min(start + batch_size, dataset_size)
        sess.run(train_step, feed_dict={x: X[start:end], y_: Y[start:end]})
    print(sess.run(w1))

[[1.0193471]
 [1.0428091]]


In [61]:
#TFLearn


In [11]:
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression
 
import tflearn.datasets.mnist as mnist

trainX, trainY, testX, testY = mnist.load_data(data_dir="E:\mnist", one_hot=True)
# 将图像数据resize成卷积卷积神经网络输入的格式。
trainX = trainX.reshape([-1, 28, 28, 1])
testX = testX.reshape([-1, 28, 28, 1])
 
# 构建神经网络。
net = input_data(shape=[None, 28, 28, 1], name='input')
net = conv_2d(net, 32, 5, activation='relu')
net = max_pool_2d(net, 2)
net = conv_2d(net, 64, 5, activation='relu')
net = max_pool_2d(net, 2)
net = fully_connected(net, 500, activation='relu')
net = fully_connected(net, 10, activation='softmax')
# 定义学习任务。指定优化器为sgd，学习率为0.01，损失函数为交叉熵。
net = regression(net, optimizer='sgd', learning_rate=0.01, loss='categorical_crossentropy')

Extracting E:\mnist\train-images-idx3-ubyte.gz
Extracting E:\mnist\train-labels-idx1-ubyte.gz
Extracting E:\mnist\t10k-images-idx3-ubyte.gz
Extracting E:\mnist\t10k-labels-idx1-ubyte.gz


In [12]:
import time
Timestamp1 = time.clock()
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(trainX, trainY, n_epoch=20,
          validation_set=([testX, testY]),
          show_metric=True)
Timestamp2 = time.clock()
print("Total Time used:", Timestamp2 - Timestamp1)

IndexError: list index out of range

In [7]:
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from keras import backend as K

num_classes = 10
img_rows, img_cols = 28, 28
 
# 通过Keras封装好的API加载MNIST数据。其中trainX就是一个60000 * 28 * 28的数组，
# trainY是每一张图片对应的数字。
(trainX, trainY), (testX, testY) = mnist.load_data(path='E:\mnist\mnist.npz')

# 根据对图像编码的格式要求来设置输入层的格式。
if K.image_data_format() == 'channels_first':
    trainX = trainX.reshape(trainX.shape[0], 1, img_rows, img_cols)
    testX = testX.reshape(testX.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    trainX = trainX.reshape(trainX.shape[0], img_rows, img_cols, 1)
    testX = testX.reshape(testX.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
    
trainX = trainX.astype('float32')
testX = testX.astype('float32')
trainX /= 255.0
testX /= 255.0
 
# 将标准答案转化为需要的格式（one-hot编码）。
trainY = keras.utils.to_categorical(trainY, num_classes)
testY = keras.utils.to_categorical(testY, num_classes)

In [8]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(5, 5), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(500, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
 
# 定义损失函数、优化函数和评测方法。
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.SGD(),
              metrics=['accuracy'])

In [9]:
model.fit(trainX, trainY,
          batch_size=128,
          epochs=10,
          validation_data=(testX, testY))
 
# 在测试数据上计算准确率。
score = model.evaluate(testX, testY)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.05965520703401417
Test accuracy: 0.9813
