<a href="https://colab.research.google.com/github/ailunguo/Test/blob/main/%E9%9D%A2%E5%90%91%E7%A0%94%E7%A9%B6%E4%BA%BA%E5%91%98Keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import keras

## 张量

In [2]:
# 常数张量
x = tf.constant([[5, 2], [1, 3]])
x

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[5, 2],
       [1, 3]], dtype=int32)>

In [3]:
x.numpy()

array([[5, 2],
       [1, 3]], dtype=int32)

In [4]:
print("dtype:", x.dtype)
print("shape:", x.shape)

dtype: <dtype: 'int32'>
shape: (2, 2)


In [5]:
print(tf.ones(shape=(2, 1)))
print(tf.zeros(shape=(2, 1)))

tf.Tensor(
[[1.]
 [1.]], shape=(2, 1), dtype=float32)
tf.Tensor(
[[0.]
 [0.]], shape=(2, 1), dtype=float32)


In [6]:
# 创建随机常数张量
x = tf.random.normal(shape=(2, 2), mean=0.0, stddev=1.0)

x = tf.random.uniform(shape=(2, 2), minval=0, maxval=10, dtype='int32')

## 变量

In [7]:
# 变量是用于存储可变状态(例如神经网络的权重)的特殊张量。
initial_value = tf.random.normal(shape=(2, 2))
a = tf.Variable(initial_value)
print(a)

<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[ 0.8556467 , -0.9760387 ],
       [ 0.11634047,  0.917724  ]], dtype=float32)>


In [9]:
# Variable可以使用.assign(value),.assign_add(increment)来更新值.assign_sub(decrement)
new_value = tf.random.normal(shape=(2, 2))
a.assign(new_value)
for i in range(2):
  for j in range(2):
    assert a[i, j] == new_value[i, j]

added_value = tf.random.normal(shape=(2, 2))
a.assign_add(added_value)
for i in range(2):
  for j in range(2):
    assert a[i, j] == new_value[i, j] + added_value[i, j]

## 在Tensorflow中的数学

In [15]:
a = tf.random.normal(shape=(2, 2))
b = tf.random.normal(shape=(2, 2))

c = a + b
d = tf.square(c)
e = tf.exp(d)

In [23]:
# 可微
a = tf.random.normal(shape=(2, 2))
b = tf.random.normal(shape=(2, 2))

with tf.GradientTape() as tape:
  tape.watch(a)
  c = tf.sqrt(tf.square(a) + tf.square(b))
  dc_da = tape.gradient(c, a)
  print(dc_da)

tf.Tensor(
[[ 0.6820381   0.9994569 ]
 [-0.67598087 -0.59693843]], shape=(2, 2), dtype=float32)


In [25]:
a = tf.Variable(a)

with tf.GradientTape() as tape:
  c = tf.sqrt(tf.square(a) + tf.square(b))
  dc_da = tape.gradient(c, a)
  print(dc_da)

tf.Tensor(
[[ 0.6820381   0.9994569 ]
 [-0.67598087 -0.59693843]], shape=(2, 2), dtype=float32)


In [26]:
# 通过嵌套来计算高阶导数
with tf.GradientTape() as outer_tape:
  with tf.GradientTape() as tape:
    c = tf.sqrt(tf.square(a) + tf.square(b))
    dc_da = tape.gradient(c, a)
  d2c_d2a = outer_tape.gradient(dc_da, a)
  print(d2c_d2a)

tf.Tensor(
[[0.3676372  0.00111532]
 [1.05185    0.55841327]], shape=(2, 2), dtype=float32)


## Keras层

In [30]:
class Linear(keras.layers.Layer):
  """y = w.x + b"""

  def __init__(self, units=32, input_dim=32):
    super().__init__()
    self.w = self.add_weight(
        shape=(input_dim, units),
        initializer='random_normal',
        trainable=True)
    self.b = self.add_weight(
        shape=(units,),
        initializer='zeros',
        trainable=True)

  def call(self, inputs):
    return tf.matmul(inputs, self.w) + self.b

In [31]:
linear_layer = Linear(units=4, input_dim=2)

y = linear_layer(tf.ones((2, 2)))
assert y.shape == (2, 4)

In [32]:
assert linear_layer.weights == [linear_layer.w, linear_layer.b]

## 层权重创建build(input_shape)

In [33]:
class Linear(keras.layers.Layer):
  """y = w.x + b"""

  def __init__(self, units=32):
    super().__init__()
    self.units = units

  def build(self, input_shape):
    self.w = self.add_weight(
        shape=(input_shape[-1], self.units),
        initializer='random_normal',
        trainable=True
    )
    self.b = self.add_weight(
        shape=(self.units,),
        initializer='random_normal',
        trainable=True
    )

  def call(self, inputs):
    return tf.matmul(inputs, self.w) + self.b

linear_layer = Linear(4)
y = linear_layer(tf.ones((2, 2)))
# 上述类Linear中的build()函数是在执行call()时才执行的

## 网络层的梯度

In [35]:
# Prepare a dataset
(x_train, y_train), _ = keras.datasets.mnist.load_data()
dataset = tf.data.Dataset.from_tensor_slices(
    (x_train.reshape(60000, 784).astype('float32') / 255, y_train)
)
dataset = dataset.shuffle(buffer_size=1024).batch(64)

linear_layer = Linear(10)

loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

optimizer = keras.optimizers.SGD(learning_rate=1e-3)

for step, (x, y) in enumerate(dataset):
  with tf.GradientTape() as tape:
    logits = linear_layer(x)

    loss = loss_fn(y, logits)

  gradients = tape.gradient(loss, linear_layer.trainable_weights)

  optimizer.apply_gradients(zip(gradients, linear_layer.trainable_weights))

  if step % 100 == 0:
    print('Step:',step, 'loss:', float(loss))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Step: 0 loss: 2.345146656036377
Step: 100 loss: 2.2982373237609863
Step: 200 loss: 2.1140570640563965
Step: 300 loss: 2.0613176822662354
Step: 400 loss: 1.939256191253662
Step: 500 loss: 1.8842382431030273
Step: 600 loss: 1.7714017629623413
Step: 700 loss: 1.7904040813446045
Step: 800 loss: 1.6409804821014404
Step: 900 loss: 1.6009747982025146


## 可训练和不可训练的权重

In [36]:
# 通过trainable_weights和non_trainable_weights来设置可训练和不可训练的权重
class ComputeSum(keras.layers.Layer):
  """Returns the sum of the inputs."""

  def __init__(self, input_dim):
    super().__init__()
    self.total = self.add_weight(
        initializer='zeros',
        shape=(input_dim,),
        trainable=False
    )

  def call(self, inputs):
    self.total.assign_add(tf.reduce_sum(inputs, axis=0))
    return self.total

my_sum = ComputeSum(2)
x = tf.ones((2, 2))

y = my_sum(x)
print(y.numpy())

[2. 2.]
