In [1]:
%tensorflow_version 2.x  #选择tensorflow 2.0
import tensorflow as tf
tf.__version__

`%tensorflow_version` only switches the major version: `1.x` or `2.x`.
You set: `2.x  #选择tensorflow 2.0`. This will be interpreted as: `2.x`.


TensorFlow 2.x selected.


'2.0.0-rc2'

In [2]:
print("GPU Available ", tf.test.is_gpu_available)

GPU Available  <function is_gpu_available at 0x7ff0a476f400>


In [3]:
!nvidia-smi  #gpu信息

Tue Oct  8 13:57:12 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 430.40       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   53C    P8    31W / 149W |      0MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [4]:
A = tf.constant([[1,2],[3,4]])
B = tf.constant([[5,6],[7,8]])
C = tf.matmul(A,B)
print(C)

tf.Tensor(
[[19 22]
 [43 50]], shape=(2, 2), dtype=int32)


In [5]:
import numpy as np
random_float = tf.random.uniform(shape=())
zero_vector = tf.zeros(shape=(2))

print(A.shape)
print(A.dtype)
print(A.numpy())

(2, 2)
<dtype: 'int32'>
[[1 2]
 [3 4]]


In [0]:
D = tf.add(A,B)

In [8]:
x = tf.Variable(initial_value=3.)
with tf.GradientTape() as tape:   # 在tf.GradientTape()的上下文内，所有计算步骤都会被记录以用于求导
    y = tf.square(x)
y_grad = tape.gradient(y,x)     # 计算y=x^2关于x在x=3的导数
print([y,y_grad])

[<tf.Tensor: id=23, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=27, shape=(), dtype=float32, numpy=6.0>]


In [9]:
X = tf.constant([[1., 2.], [3., 4.]])
y = tf.constant([[1.], [2.]])
w = tf.Variable(initial_value=[[1.], [2.]])
b = tf.Variable(initial_value=1.)
with tf.GradientTape() as tape:
    L = 0.5 * tf.reduce_sum(tf.square(tf.matmul(X, w) + b - y))
w_grad, b_grad = tape.gradient(L, [w, b])        # 计算L(w,b)关于w,b的偏导数
print([L.numpy(), w_grad.numpy(), b_grad.numpy()])

[62.5, array([[35.],
       [50.]], dtype=float32), 15.0]




---
# **基础示例：线性回归**


In [0]:
X_raw = np.array([2013, 2014, 2015, 2016, 2017], dtype=np.float32)
y_raw = np.array([12000, 14000, 15000, 16500, 17500], dtype=np.float32)

X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())  #正则化x
y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())

In [11]:
# numpy 版本
a, b = 0, 0

num_epoch = 10000
learning_rate = 1e-3
for e in range(num_epoch):
    # 手动计算损失函数关于自变量（模型参数）的梯度
    y_pred = a * X + b
    grad_a, grad_b = (y_pred - y).dot(X), (y_pred - y).sum()

    # 更新参数
    a, b = a - learning_rate * grad_a, b - learning_rate * grad_b

print(a, b)

0.9763702027872221 0.057564988311377796


In [14]:
# tf2.0 版本
X = tf.constant(X)
y = tf.constant(y)

a = tf.Variable(initial_value=0.)
b = tf.Variable(initial_value=0.)
variables = [a,b]

num_epoch = 10000
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)     #tf.keras.optimizer
for e in range(num_epoch):
    # 使用tf.GradientTape()记录损失函数的梯度信息
    with tf.GradientTape() as tape:  
        y_pred = a*X + b   #以后会用model()类代替, tf.keras.Model, tf.keras.layers
        loss = 0.5*tf.reduce_sum(tf.square(y_pred-y))    # tf.keras.losses
    # TensorFlow自动计算损失函数关于自变量（模型参数）的梯度
    grads = tape.gradient(loss, variables)
    # TensorFlow自动根据梯度更新参数
    optimizer.apply_gradients(grads_and_vars=zip(grads,variables))

print(a,b)
#模型的评估 tf.keras.metrics

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.97637> <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.057565063>


In [0]:
class MyModel(tf.keras.Model):
    def __init__(self):
        super().__init__()     # Python 2 下使用 super(MyModel, self).__init__()
        # 此处添加初始化代码（包含 call 方法中会用到的层），例如
        # layer1 = tf.keras.layers.BuiltInLayer(...)
        # layer2 = MyCustomLayer(...)

    def call(self, input):
        # 此处添加模型调用的代码（处理输入并返回输出），例如
        # x = layer1(input)
        # output = layer2(x)
        return output

    # 还可以添加自定义的方法

![Keras模型类定义示意图](https://tf.wiki/_images/model.png)

In [16]:
# 通过模型类的方式改写前一个代码
X = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
y = tf.constant([[10.0], [20.0]])

class Linear(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.dense = tf.keras.layers.Dense(  #全连接层
            units=1,   #units ：输出张量的维度
            activation=None,  #activation：激活函数，默认为无激活函数。常用的激活函数包括tf.nn.relu、tf.nn.tanh和tf.nn.sigmoid
            kernel_initializer=tf.zeros_initializer(),  # W kernel_initializer 、 bias_initializer ：权重矩阵kernel和偏置向量bias两个变量的初始化器。默认为tf.glorot_uniform_initializer。设置为tf.zeros_initializer表示将两个变量均初始化为全0
            bias_initializer=tf.zeros_initializer()   # b use_bias：是否加入偏置向量bias,默认为True 
        )

    def call(self,input):
        output = self.dense(input)
        return output

# 以下代码结构与前节类似
model = Linear()   #实例化网络Linear
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)  #确定optimizer优化器
for i in range(100):
    with tf.GradientTape() as tape:      #打开运算记录
        y_pred = model(X)      # 调用模型 y_pred = model(X) 而不是显式写出 y_pred = a * X + b
        loss = tf.reduce_mean(tf.square(y_pred - y))
    grads = tape.gradient(loss, model.variables)    # 使用 model.variables 这一属性直接获得模型中的所有变量
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
print(model.variables)

[<tf.Variable 'linear/dense/kernel:0' shape=(3, 1) dtype=float32, numpy=
array([[0.40784496],
       [1.191065  ],
       [1.9742855 ]], dtype=float32)>, <tf.Variable 'linear/dense/bias:0' shape=(1,) dtype=float32, numpy=array([0.78322077], dtype=float32)>]




---
# **基础示例：多层感知机（MLP）**


多层感知机或多层全连接神经网络 流程：
1. 使用 **tf.keras.datasets** 获得数据集并预处理

2. 使用 **tf.keras.Model** 和 **tf.keras.layers** 构建模型

3. 构建模型训练流程，使用 **tf.keras.losses** 计算损失函数，并使用 **tf.keras.optimizer** 优化模型

4. 构建模型评估流程，使用 **tf.keras.metrics** 计算评估指标

## 数据获取及预处理： **tf.keras.datasets**

In [0]:
class MNISTLoader():
    def __init__(self):
        mnist=tf.keras.datasets.mnist
        (self.train_data,self.train_label),(self.test_data,self.test_label)=mnist.load_data()
        # MNIST中的图像默认为uint8（0-255的数字）。以下代码将其归一化到0-1之间的浮点数，并在最后增加一维作为颜色通道
        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]  np.expand_dims(axis=-1)在最后增加一个维度
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self,batch_size):
       # 从数据集中随机取出batch_size个元素并返回
        index = np.random.randint(0,np.shape(self.train_data)[0],batch_size)
        return self.train_data[index,:],self.train_label[index] 

## 模型的构建： tf.keras.Model 和 tf.keras.layers

In [0]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten() # Flatten层将除第一维（batch_size）以外的维度展平
        self.dense1 = tf.keras.layers.Dense(units=100,activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self,inputs):          # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)       # [batch_size, 100]
        x = self.dense2(x)
        output = tf.nn.softmax(x)    # [batch_size, 10]
        return output

## 模型的训练： tf.keras.losses 和 tf.keras.optimizer

先定义模型超参数：


In [0]:
num_epochs = 5
batch_size = 50
learning_rate = 0.001

实例化模型和数据读取类，并实例化一个 tf.keras.optimizer 的优化器（这里使用常用的 Adam 优化器）：

In [21]:
model = MLP()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


然后迭代进行以下步骤：

1. 从 DataLoader 中随机取一批训练数据；

2. 将这批数据送入模型，计算出模型的预测值；

3. 将模型预测值与真实值进行比较，计算损失函数（loss）。这里使用 tf.keras.losses 中的交叉熵函数作为损失函数；

4. 计算损失函数关于模型变量的导数；

5. 将求出的导数值传入优化器，使用优化器的 apply_gradients 方法更新模型参数以最小化损失函数。

In [22]:
num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X,y=data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y,y_pred=y_pred)  #sparse_categorical_crossentropy可直接传入标签，不用one-hot
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index,loss.numpy()))
    grads = tape.gradient(loss,model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads,model.variables))

batch 0: loss 2.250197
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
batch 1: loss 2.288867
batch 2: loss 2.222971
batch 3: loss 2.150999
batch 4: loss 1.906977
batch 5: loss 2.006940
batch 6: loss 1.782133
batch 7: loss 1.877084
batch 8: loss 1.842059
batch 9: loss 1.605330
batch 10: loss 1.647323
batch 11: loss 1.551296
batch 12: loss 1.536149
batch 13: loss 1.481353
batch 14: loss 1.503345
batch 15: loss 1.419806
batch 16: loss 1.449546
batch 17: loss 1.210968
batch 18: loss 1.265529
batch 19: loss 1.255735
batch 20: loss 1.198387
batch 21: loss 1.225810
batch 22: loss 0.989654
batch 23: loss 0.973528
batch 24: loss 0.984867
batch 25: loss 0.937330
batch 26: loss 1.048070
batch 27: loss 0.929489
batch 28: loss 0.896890
batch 29: loss 0.941883
batch 30: loss 0.878296
batch 31: loss 0.837311
batch 32: loss 0.770499
batch 33: loss 0.804016
batch 34: loss 0.700481
batch 35: loss 0.727899
batch 36: loss 0.695580
batch 37: loss 0.989866
batc

## 模型的评估： tf.keras.metrics

最后，我们使用测试集评估模型的性能。这里，我们使用 **tf.keras.metrics** 中的 **SparseCategoricalAccuracy** 评估器来评估模型在测试集上的性能，该评估器能够对模型预测的结果与真实结果进行比较，并输出预测正确的样本数占总样本数的比例。我们迭代测试数据集，每次通过 **update_state()** 方法向评估器输入两个参数： y_pred 和 y_true ，即模型预测出的结果和真实结果。评估器具有内部变量来保存当前评估指标相关的参数数值（例如当前已传入的累计样本数和当前预测正确的样本数）。迭代结束后，我们使用 result() 方法输出最终的评估指标值（预测正确的样本数占总样本数的比例）。

In [24]:
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(data_loader.num_test_data//batch_size)
for batch_index in range(num_batches):
    start_index,end_index = batch_index*batch_size,(batch_index+1)*batch_size
    y_pred = model.predict(data_loader.test_data[start_index:end_index])
    sparse_categorical_accuracy.update_state(y_true=data_loader.test_label[start_index:end_index],y_pred=y_pred)
print("test accuracy: %f" % sparse_categorical_accuracy.result())   

test accuracy: 0.972300
