<a href="https://colab.research.google.com/github/henryji96/TensorFlow-2.x-Tutorials/blob/master/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E4%B8%8ETensorFlow%E5%85%A5%E9%97%A8%E5%AE%9E%E6%88%98-%E6%BA%90%E7%A0%81%E5%92%8CPPT/lesson13-%E5%89%8D%E5%90%91%E4%BC%A0%E6%92%AD%EF%BC%88%E5%BC%A0%E9%87%8F%EF%BC%89-%E5%AE%9E%E6%88%98/forward.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

TensorFlow 2.x selected.


In [0]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
import os

'''
os.environ["TF_CPP_MIN_LOG_LEVEL"]='1' # 这是默认的显示等级，显示所有信息
os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error 
os.environ["TF_CPP_MIN_LOG_LEVEL"]='3' # 只显示 Error  
'''
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [0]:
tf.__version__

'2.0.0-beta1'

In [0]:
print("Is there a GPU available: ", tf.test.is_gpu_available())

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Is there a GPU available:  True
Found GPU at: /device:GPU:0


In [0]:
tf.device('device:GPU:0')

## 1 load mnist

### 1.1 load data

In [0]:
# 加载mnist，如果没有缓存则自动下载解析
# x: [60k, 28, 28],
# y: [60k]
(x, y), _ = datasets.mnist.load_data()
print(type(x), x.dtype)
print(type(y), y.dtype)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
<class 'numpy.ndarray'> uint8
<class 'numpy.ndarray'> uint8


### 1.2 numpy to tensor & normalize x

In [0]:
# x: [0~255] => [0~1.]
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)

print(type(x), x.dtype)
print(type(y), y.dtype)

print(x.shape, y.shape)

print(tf.reduce_min(x), tf.reduce_max(x))
print(tf.reduce_min(y), tf.reduce_max(y))

<class 'tensorflow.python.framework.ops.EagerTensor'> <dtype: 'float32'>
<class 'tensorflow.python.framework.ops.EagerTensor'> <dtype: 'int32'>
(60000, 28, 28) (60000,)
tf.Tensor(0.0, shape=(), dtype=float32) tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(0, shape=(), dtype=int32) tf.Tensor(9, shape=(), dtype=int32)


### 1.3 tensor to Dataset instance

In [0]:
# 创建Dataset对象取batch
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)

# 迭代器
train_iter = iter(train_db)
sample = next(train_iter) # 取一个batch  batch_size为128
print('batch:', sample[0].shape, sample[1].shape)

batch: (128, 28, 28) (128,)


## 2 feed forward

In [0]:
# [b, 784] => [b, 256] => [b, 128] => [b, 10]
# [dim_in, dim_out], [dim_out]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1)) # std=1时梯度爆炸
b1 = tf.Variable(tf.zeros([256]))

w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))

w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

In [0]:
lr = 1e-3

for epoch in range(2): # iterate whole dataset
  for step, (x, y) in enumerate(train_db): # iterate one batch
    # h1 = x@w1 + b1
    # x - one batch [128, 28, 28]  --> [128, 28*28]
    # w1 - [784,256]
    # b1 - [256] ==> automatially tf.broadcast_to(b1, [x.shape[0], 256]) ==> [128, 256]

    x = tf.reshape(x, [-1, 28*28])

    # 参与gradient计算的代码， 构建计算图, watch tf.Variable类型
    with tf.GradientTape() as tape:
      h1 = x@w1 + b1
      h1 = tf.nn.relu(h1)
      # [b, 256] ==> [b, 128]
      h2 = h1@w2 + b2
      h2 = tf.nn.relu(h2)
      # [b, 128] ==> [b, 10]
      out = h2@w3 + b3

      # compute loss
      # out: [b, 10]
      # y: [b] ==> [b, 10]
      y_onehot = tf.one_hot(y, 10)
      #mse = mean(sum(y-out)^2)
      loss = tf.square(y_onehot - out) # [b, 10]
      loss = tf.reduce_mean(loss)    # scalar

    # compute gradients
    grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])

    # w1 = w1 - lr*w1_grad   (Variable - Tensor 返回 Tensor, tape无法继续watch)
    # assign_sub inplace update, 数据引用保持不变
    w1.assign_sub(lr * grads[0])
    b1.assign_sub(lr * grads[1])
    w2.assign_sub(lr * grads[2])
    b2.assign_sub(lr * grads[3])
    w3.assign_sub(lr * grads[4])
    b3.assign_sub(lr * grads[5])

    if step% 100 == 0:
      print("epoch: {}, step: {}, training loss: {}".format(epoch, step, float(loss))) # float(loss): tensor to scalar

epoch: 0, step: 0, training loss: 0.5016995668411255
epoch: 0, step: 100, training loss: 0.20781607925891876
epoch: 0, step: 200, training loss: 0.17083518207073212
epoch: 0, step: 300, training loss: 0.16126517951488495
epoch: 0, step: 400, training loss: 0.15797080099582672
epoch: 1, step: 0, training loss: 0.16217073798179626
epoch: 1, step: 100, training loss: 0.1532837152481079
epoch: 1, step: 200, training loss: 0.13839606940746307
epoch: 1, step: 300, training loss: 0.13728100061416626
epoch: 1, step: 400, training loss: 0.136625736951828
