In [None]:

## 激活函数导数

### Sigmoid函数导数

Sigmoid函数表达式：$$\sigma(x) = \frac{1}{1 + e^{-x}}$$
Sigmoid函数的导数表达式：$$\frac{d}{dx} \sigma(x) = \sigma(1-\sigma)$$

In [44]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers,datasets,Sequential,models

print(tf.__version__)


2.3.0


In [51]:
# @tf.function
def load_data():
    (x,y),(x_test,y_test) = datasets.fashion_mnist.load_data()

    def pre_process(x,y):
        x = tf.cast(x,tf.float32)/255.
        y = tf.cast(y,tf.int32)
        return x,y

    batch_sz = 128
    db = tf.data.Dataset.from_tensor_slices((x,y))
    db = db.map(pre_process).shuffle(1000).batch(batch_sz)

    db_test = tf.data.Dataset.from_tensor_slices((x_test,y_test))
    db_test = db_test.map(pre_process).batch(batch_sz)

    return db,db_test

In [52]:
# @tf.function
def build_model():
    model= Sequential([
        layers.Dense(256,activation=tf.nn.relu),
        layers.Dense(128,activation=tf.nn.relu),
        layers.Dense(64,activation=tf.nn.relu),
        layers.Dense(32,activation=tf.nn.relu),
        layers.Dense(10)
    ])

    model.build(input_shape=[None,28*28])
    return model

In [53]:
# @tf.function
def train(db,model,epochs=5):

    for epoch in range(epochs):
        for step,(x,y) in enumerate(db):
            x = tf.reshape(x,[-1,28*28])
            with tf.GradientTape() as tape:
                logits = model(x)
                y_hot = tf.one_hot(y,depth=10)
                loss_entropy = tf.reduce_mean(tf.losses.categorical_crossentropy(y_hot,logits,from_logits=True))

            grads = tape.gradient(loss_entropy,model.trainable_variables)
            # print(grads[0].shape)
            for p,grad_p in zip(model.trainable_variables,grads):
                p.assign(p-0.001*grad_p)

            if step % 100 == 0:
                print(epoch,step," : ",loss_entropy)
    


In [54]:
db,db_test = load_data()
model = build_model()
model.summary()
train(db,model)

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_40 (Dense)             (None, 256)               200960    
_________________________________________________________________
dense_41 (Dense)             (None, 128)               32896     
_________________________________________________________________
dense_42 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_43 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_44 (Dense)             (None, 10)                330       
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
_________________________________________________________________
0 0  :  tf.Tensor(2.3248749, shape=(), dtype=float32)
0 100  :  tf.Tensor(2.2754993, shape=(), dtype=float32)
0 20