In [34]:
import tensorflow as tf
from   tensorflow.keras import datasets, layers, optimizers

In [35]:
(x, y),(x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [36]:
x.shape, y.shape

((60000, 28, 28), (60000,))

In [37]:
x.min(), x.max(), x.mean()

(0, 255, 33.318421449829934)

In [38]:
 x_test.shape, y_test.shape

((10000, 28, 28), (10000,))

In [39]:
y[:4]

array([5, 0, 4, 1], dtype=uint8)

In [40]:
y_onehot = tf.one_hot(y, depth=10)
y_onehot[:4] 

<tf.Tensor: shape=(4, 10), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>

In [41]:
(x, y), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [42]:
x.shape, y.shape, x_test.shape, y_test.shape

((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))

In [43]:
x.min(), x.max()

(0, 255)

In [44]:
y[:4]

array([[6],
       [9],
       [9],
       [4]], dtype=uint8)

In [45]:
db = tf.data.Dataset.from_tensor_slices(x_test)
db

<TensorSliceDataset shapes: (32, 32, 3), types: tf.uint8>

In [46]:
next(iter(db)).shape

TensorShape([32, 32, 3])

In [47]:
db = tf.data.Dataset.from_tensor_slices((x_test, y_test))

In [48]:
next(iter(db))[0].shape

TensorShape([32, 32, 3])

In [49]:
next(iter(db))[1].shape

TensorShape([1])

In [50]:
db = db.shuffle(10000)
db

<ShuffleDataset shapes: ((32, 32, 3), (1,)), types: (tf.uint8, tf.uint8)>

## 数据预处理

In [51]:
def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32)/255.
    y = tf.cast(y, dtype=tf.int32)
    y = tf.one_hot(y, depth=10)
    return x, y

In [52]:
db2 = db.map(preprocess)

In [53]:
res = next(iter(db2))

In [54]:
res[0].shape, res[1].shape

(TensorShape([32, 32, 3]), TensorShape([1, 10]))

In [55]:
res[1][:2]

<tf.Tensor: shape=(1, 10), dtype=float32, numpy=array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]], dtype=float32)>

In [56]:
db3 = db2.batch(32)
res = next(iter(db3))
res[0].shape, res[1].shape

(TensorShape([32, 32, 32, 3]), TensorShape([32, 1, 10]))

In [57]:
def mnist_dataset():
    (x, y), (x_val, y_val) = datasets.fashion_mnist.load_data()
    y = tf.one_hot(y, depth=10)
    y_val = tf.one_hot(y_val, depth=10)
    
    ds = tf.data.Dataset.from_tensor_slices((x, y))
    ds = ds.map(preprocess)
    ds = ds.shuffle(60000).batch(100)
    
    ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
    ds_val = ds_val.map(preprocess)
    ds_val = ds_val.shuffle(10000).batch(100)
    return ds, ds_val

# mnist_tensor

In [58]:
import tensorflow as tf
from tensorflow.keras import datasets
import os

In [59]:
tf.__version__

'2.4.1'

In [60]:
def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32)/255.
    y = tf.cast(y, dtype=tf.int32)
    y = tf.one_hot(y, depth=10)
    
    return x, y

In [61]:
(x, y),(x_test, y_test) = datasets.mnist.load_data()
print('x:', x.shape, 'y:', y.shape, 'x_test:',x_test.shape, 'y_test:',y_test.shape)

x: (60000, 28, 28) y: (60000,) x_test: (10000, 28, 28) y_test: (10000,)


从输入张量 x 和 y 创建一个数据集对象 train_db。这个数据集对象在机器学习中常用于数据的批处理、数据加载和数据集管理。通过这种方式，可以将 x 和 y 数据对应地组织成样本，方便后续训练模型时使用

In [62]:
train_db = tf.data.Dataset.from_tensor_slices((x, y))

这段代码是针对一个训练数据集进行操作的。具体来说，该代码将训练数据集进行随机重排(shuffle)，然后按批次大小为128进行分组(batch)，接着对每个样本应用预处理函数(preprocess)，最后重复(repeat)这个过程30次，以便将整个数据集多次用于训练。这种操作通常用于机器学习模型的训练过程。

In [63]:
train_db = train_db.shuffle(60000).batch(128).map(preprocess).repeat(30)

In [65]:
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.shuffle(10000).batch(128).map(preprocess)

这行代码的意思是从`train_db`数据集中获取下一个元素，并将其拆分为`x`和`y`两个变量。在这种情况下，`train_db`可能是一个可迭代的数据集，比如一个数据加载器，每次迭代会返回一个包含输入数据`x`和标签数据`y`的元素。通过使用`next`函数和`iter`方法，代码提取了下一个元素，并将其分配给`x`和`y`这两个变量。

请确保在运行这行代码之前，`train_db`已经被正确定义和初始化，并且包含了您希望处理的数据。如果`train_db`是一个自定义的数据集或数据加载器，您可能需要先检查数据的格式和内容，以确保代码能正确地从中获取`x`和`y`。


In [66]:
x, y = next(iter(train_db))

In [67]:
print('train sample:', x.shape, y.shape)

train sample: (128, 28, 28) (128, 10)


w1,b1 = tf.Variable(tf.random.truncated_normal([784, 512], stddev=0.1)),tf.Variable(tf.zeros([512]))
这段代码使用 TensorFlow 库创建了两个变量 `w1` 和 `b1`，其中 `w1` 是一个形状为 `[784, 512]` 的张量（即一个矩阵），通过 `tf.random.truncated_normal([784, 512], stddev=0.1)` 函数生成具有截断正态分布的随机值，标准差为 0.1。而 `b1` 则是一个形状为 `[512]` 的张量（即一个向量），通过 `tf.zeros([512])` 函数生成全部为零的张量。这通常在神经网络中用于初始化权重和偏差参数。

如果你想训练一个神经网络，这段代码可能被用于初始化隐藏层的权重 `w1` 和偏置 `b1`。


In [75]:
def main():
    lr = 1e-3
    
    w1,b1 = tf.Variable(tf.random.truncated_normal([784, 512], stddev=0.1)),tf.Variable(tf.zeros([512]))
    
    w2,b2 = tf.Variable(tf.random.truncated_normal([512, 256], stddev=0.1)),tf.Variable(tf.zeros([256]))
    
    w3,b3 = tf.Variable(tf.random.truncated_normal([256, 10], stddev=0.1)),tf.Variable(tf.zeros([10]))
    
    for step, (x,y) in enumerate(train_db):
        
        x = tf.reshape(x, (-1, 784))
        
        with tf.GradientTape() as tape:
            
            h1 = tf.nn.relu(x@w1+b1)
            
            h2 = tf.nn.relu(h1@w2+b2)
            
            out = h2@w3+b3
            
            loss = tf.square(y, out)
            loss = tf.reduce_mean(loss, axis=1)
            
            grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
            
            for p, g in zip([w1, b1, w2, b2, w3, b3], grads):
                p.assign_sub(lr * g)
                
            if step % 100 == 0:
                print(step, 'loss:', float(loss))
                
            if step % 500 == 0:
                total, total_correct = 0., 0
                
                for step, (x,y) in enumerate(test_db):
                    h1 = tf.nn.relu(x @ w1 + b1)
                    h2 = tf.nn.relu(h1@w2+b2)
                    out = h2@w3+b3
                    
                    pred = tf.argmax(out, axis=1)
                    
                    y = tf.argmax(y, axis=1)
                    
                    correct = tf.equal(pred, y)
                    
                    total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
                    total += x.shape[0]
                    
                print(step, 'Evaluate ACC:', total_correct/total )
                    
                    


In [76]:
# main()

2024-07-11 16:59:37.081372: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10


TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'

# 全链接层

In [86]:
import tensorflow as tf
from tensorflow import keras

x = tf.random.normal([2, 3])

model = keras.Sequential([
    keras.layers.Dense(2, activation='relu'),
    keras.layers.Dense(2, activation='relu'),
    keras.layers.Dense(2)
])

model.build(input_shape=[None, 3])
model.summary()

for p in model.trainable_variables:
    print(p.name, p.shape)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 2)                 8         
_________________________________________________________________
dense_13 (Dense)             (None, 2)                 6         
_________________________________________________________________
dense_14 (Dense)             (None, 2)                 6         
Total params: 20
Trainable params: 20
Non-trainable params: 0
_________________________________________________________________
dense_12/kernel:0 (3, 2)
dense_12/bias:0 (2,)
dense_13/kernel:0 (2, 2)
dense_13/bias:0 (2,)
dense_14/kernel:0 (2, 2)
dense_14/bias:0 (2,)


# 输出方式

In [88]:
a = tf.linspace(-6, 6, 10)
a

<tf.Tensor: shape=(10,), dtype=float64, numpy=
array([-6.        , -4.66666667, -3.33333333, -2.        , -0.66666667,
        0.66666667,  2.        ,  3.33333333,  4.66666667,  6.        ])>

In [89]:
tf.sigmoid(a)

<tf.Tensor: shape=(10,), dtype=float64, numpy=
array([0.00247262, 0.00931596, 0.0344452 , 0.11920292, 0.33924363,
       0.66075637, 0.88079708, 0.9655548 , 0.99068404, 0.99752738])>

In [92]:
x = tf.random.normal([1, 28, 28])*5
tf.reduce_min(x), tf.reduce_max(x)

(<tf.Tensor: shape=(), dtype=float32, numpy=-14.9226>,
 <tf.Tensor: shape=(), dtype=float32, numpy=15.413094>)

In [93]:
x = tf.sigmoid(x)
tf.reduce_min(x), tf.reduce_max(x)

(<tf.Tensor: shape=(), dtype=float32, numpy=3.305195e-07>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.99999976>)

## 所有概率之和为1 softmax 

In [96]:
a = tf.linspace(-2, 2, 5)
tf.sigmoid(a)

<tf.Tensor: shape=(5,), dtype=float64, numpy=array([0.11920292, 0.26894142, 0.5       , 0.73105858, 0.88079708])>

In [97]:
tf.nn.softmax(a)

<tf.Tensor: shape=(5,), dtype=float64, numpy=array([0.01165623, 0.03168492, 0.08612854, 0.23412166, 0.63640865])>

In [98]:
logits = tf.random.uniform([1, 10], minval=-2, maxval=2)
logits

<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[ 0.14830399, -0.88091946, -0.4907012 , -0.1055913 , -1.2015953 ,
        -0.9553523 , -1.8979688 , -1.6865168 , -1.1679001 , -1.4762025 ]],
      dtype=float32)>

In [100]:
prob = tf.nn.softmax(logits, axis=1)
prob

<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[0.24963708, 0.08919141, 0.13176276, 0.1936617 , 0.0647225 ,
        0.08279369, 0.03225701, 0.03985259, 0.0669405 , 0.04918072]],
      dtype=float32)>

In [101]:
tf.reduce_sum(prob)

<tf.Tensor: shape=(), dtype=float32, numpy=1.0>

# 损失函数

In [103]:
y = tf.constant([1, 2, 3, 0, 2]) 
y = tf.one_hot(y, depth=4)
y = tf.cast(y, dtype=tf.float32)

In [104]:
out = tf.random.normal([5, 4])

In [106]:
loss1 = tf.reduce_mean(tf.square(y-out))
loss1

<tf.Tensor: shape=(), dtype=float32, numpy=0.8125556>

In [107]:
loss2 = tf.square(tf.norm(y-out))/(5*4)
loss2

<tf.Tensor: shape=(), dtype=float32, numpy=0.8125556>

In [109]:
loss3 = tf.reduce_mean(tf.losses.MSE(y, out))
loss3

<tf.Tensor: shape=(), dtype=float32, numpy=0.8125556>

In [110]:
a = tf.fill([4],0.25)
a*tf.math.log(a)/tf.math.log(2.)

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([-0.5, -0.5, -0.5, -0.5], dtype=float32)>

In [112]:
-tf.reduce_sum(a*tf.math.log(a)/tf.math.log(2.))

<tf.Tensor: shape=(), dtype=float32, numpy=2.0>

In [113]:
a = tf.constant([0.1,0.1,0.1,0.7])
-tf.reduce_sum(a*tf.math.log(a)/tf.math.log(2.))

<tf.Tensor: shape=(), dtype=float32, numpy=1.3567796>

In [114]:
a = tf.constant([0.01,0.01,0.01,0.97])
-tf.reduce_sum(a*tf.math.log(a)/tf.math.log(2.))

<tf.Tensor: shape=(), dtype=float32, numpy=0.24194068>

## 交叉熵

In [115]:
tf.losses.categorical_crossentropy([0, 1, 0, 0],[0.25,0.25,0.25,0.25])

<tf.Tensor: shape=(), dtype=float32, numpy=1.3862944>

In [118]:
tf.losses.categorical_crossentropy([0, 1, 0, 0],[0.1,0.7,0.1,0.1])

<tf.Tensor: shape=(), dtype=float32, numpy=0.35667497>

In [120]:
tf.losses.categorical_crossentropy([0, 1, 0, 0],[0.1,0.1,0.7,0.1])

<tf.Tensor: shape=(), dtype=float32, numpy=2.3025851>

In [119]:
tf.losses.categorical_crossentropy([0, 1, 0, 0],[0.01,0.07,0.1,0.01])

<tf.Tensor: shape=(), dtype=float32, numpy=0.99852884>

In [122]:
tf.los ses.BinaryCrossentropy()([1], [0.1])

<tf.Tensor: shape=(), dtype=float32, numpy=2.3025842>

In [123]:
tf.losses.binary_crossentropy([1],[0.1])

<tf.Tensor: shape=(), dtype=float32, numpy=2.3025842>

In [124]:
x = tf.random.normal([1, 784])
w = tf.random.normal([784, 2])
b = tf.zeros([2])

In [131]:
logits = x@w+b
logits.shape

TensorShape([1, 2])

In [127]:
prob = tf.math.softmax(logits, axis=1)
prob

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[0.61015946, 0.38984048]], dtype=float32)>

In [143]:
a = tf.constant([0,1])
a = tf.reshape(a, [1,2])
a

<tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[0, 1]], dtype=int32)>

In [144]:
# 必须设定from_logits=True，默认没有
tf.losses.categorical_crossentropy(a, logits, from_logits=True)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([1.069153], dtype=float32)>

In [146]:
tf.losses.categorical_crossentropy(a, prob)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.94201756], dtype=float32)>