In [1]:
import tensorflow as tf
import numpy as np

  from ._conv import register_converters as _register_converters


In [2]:
tf.enable_eager_execution()  # 激活动态图机制

# Tensorflow 基础

## 张量（tensor)
- tensor 有两个属性 shape和dtype

In [7]:
a = tf.constant(1)
b = tf.constant(2)
c = tf.add(a, b)
print(c)

A = tf.constant([[1,2], [3,4]])
B = tf.constant([[5,6], [7,8]])
C = tf.matmul(A, B)
print(C)

tf.Tensor(3, shape=(), dtype=int32)


## 变量（variable）
- 变量同样具有形状（shape）和类型（dtype）属性
- 使用变量需要有一个初始化过程，可以通过在 tf.get_variable() 中指定 initializer 参数来指定所使用的初始化器
- 变量与普通张量的一个重要区别是其默认能够被TensorFlow的自动求导机制所求导，因此往往被用于定义机器学习模型的参数

In [None]:
# 变量的初始化
x = tf.get_variable('x', shape=[1], initializer=tf.constant_initializer(3.))

In [9]:
# 对一元函数求导
# tf.GradientTape() 是一个自动求导的记录器，在其中的变量和计算步骤都会被自动记录
with tf.GradientTape() as tape:  # 在tf.GradientTape()上下文内 所有计算步骤都会被记录以用于求导
    y = tf.square(x)
y_grad = tape.gradient(y, x)  # 计算y关于x的导数

In [20]:
# 对多元函数求偏导
X = tf.constant([[1., 2.], [3., 4.]])
y = tf.constant([[1.], [2.]])
w = tf.get_variable('w', shape=[2,1], initializer=tf.constant_initializer([[1.], [2.]]))
b = tf.get_variable('b', shape=[1], initializer=tf.constant_initializer([1.]))
with tf.GradientTape() as tape:
    L = 0.5 * tf.reduce_sum(tf.square(tf.matmul(X, w) + b - y))
w_grad, b_grad = tape.gradient(L, [w, b])  # 计算L(w, b)关于w, b的偏导数
print([L.numpy(), w_grad.numpy(), b_grad.numpy()])

[62.5, array([[35.],
       [50.]], dtype=float32), array([15.], dtype=float32)]


## 基础示例：线性回归

In [21]:
X = tf.constant(X)
y = tf.constant(y)

In [22]:
a = tf.get_variable('a', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
b = tf.get_variable('b', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
variables = [a, b]

In [26]:
num_epoch = 10000
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-3)
for e in range(num_epoch):
    # 使用tf.GradientTape记录损失函数的梯度信息
    with tf.GradientTape() as tape:
        y_pred = a * X + b
        loss = 0.5 * tf.reduce_sum(tf.square(y_pred - y))
        # tensorflow自动计算损失函数关于模型参数的梯度
        grads = tape.gradient(loss, variables)
        # tensorflow自动根据梯度更新参数
        optimizer.apply_gradients(grads_and_vars=zip(grads, variables))

# TensorFlow 模型

## 线性回归

In [50]:
X = tf.constant([[1., 2., 3.], [4., 5., 6.]])
y = tf.constant([[10.], [20.]])

class Linear(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.dense = tf.keras.layers.Dense(units=1,
                                           kernel_initializer=tf.zeros_initializer(),
                                           bias_initializer=tf.zeros_initializer())
    def call(self, input):
            output = self.dense(input)
            return output

model = Linear()
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
for i in range(100):
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.reduce_mean(tf.square(y_pred - y))
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
print(model.variables)

## 多层感知机（MLP，深度前馈网络）

In [16]:
# 定义一个 DataLoader 类来读取MNIST数据集数据
class DataLoader():
    def __init__(self):
        mnist = tf.contrib.learn.datasets.load_dataset('mnist')
        self.train_data = mnist.train.images
        self.train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
        self.eval_data = mnist.test.images
        self.eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)  
    def get_batch(self, batch_size):
        index = np.random.randint(0, np.shape(self.train_data)[0], batch_size)
        return self.train_data[index, :], self.train_labels[index]

# 定义模型类
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)  # 100个神经元的隐层
        self.dense2 = tf.keras.layers.Dense(units=10)  # 输出层
    def call(self, inputs):  # 返回的是每个样本的logit值
        x = self.dense1(inputs)
        x = self.dense2(x)
        return x
    def predict(self, inputs):  # 返回的是每个样本的被分类到哪一个数字
        print('dd', self)
        logits = self(inputs)
        return tf.argmax(logits, axis=-1)

In [36]:

# 定义一些模型超参数
num_batches = 1000
batch_size = 50
learning_rate = 1e-3

# 实例化模型，数据读取类和优化器
model = MLP()
data_loader = DataLoader()
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

# 下面进行如下步骤
# 从DataLoader中随机取一批训练数据；
# 将这批数据送入模型，计算出模型的预测值；
# 将模型预测值与真实值进行比较，计算损失函数（loss）；
# 计算损失函数关于模型变量的导数；
# 使用优化器更新模型参数以最小化损失函数。

for batch_index in range(num_batches):
    X,y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_logit_pred = model(tf.convert_to_tensor(X))
        loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_logit_pred)
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

# 模型评估
num_eval_samples = np.shape(data_loader.eval_labels)[0]
y_pred = model.predict(data_loader.eval_data).numpy()
print("test accuracy: {}".format(sum(y_pred == data_loader.eval_labels) / num_eval_samples))

Extracting MNIST-data\train-images-idx3-ubyte.gz
Extracting MNIST-data\train-labels-idx1-ubyte.gz
Extracting MNIST-data\t10k-images-idx3-ubyte.gz
Extracting MNIST-data\t10k-labels-idx1-ubyte.gz


In [31]:
num_eval_samples = np.shape(data_loader.eval_labels)[0]

In [40]:
y_pred = model.predict(data_loader.eval_data).numpy()

dd <__main__.MLP object at 0x00000202834252E8>


In [32]:
print("test accuracy: {}".format(sum(y_pred == data_loader.eval_labels) / num_eval_samples))

test accuracy: 0.9464


In [35]:
y_pred

array([7, 2, 1, ..., 4, 5, 6], dtype=int64)

In [45]:
tf.convert_to_tensor(data_loader.eval_data)

<tf.Tensor: id=192680, shape=(10000, 784), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>

In [46]:
a = model(data_loader.eval_data).numpy()

In [47]:
tf.argmax(a, axis=-1)

<tf.Tensor: id=192719, shape=(10000,), dtype=int64, numpy=array([7, 2, 1, ..., 4, 5, 6], dtype=int64)>

In [49]:
a.shape

(10000, 10)