# 数据加载
+ tf.data.Dataset.from_tensor_slices: 该函数是dataset核心函数之一，给定的张量沿着它们的第一维被切片。此操作保留输入张量的结构，删除每个张量的第一个维，并将其用作数据集维。所有输入张量在其第一维度中必须具有相同的大小
+ tf.random.shuffle(a):打乱顺序

In [7]:
import tensorflow as tf
from tensorflow import keras

In [2]:
#mnist数据处理
#train:60k//test:10k
(x,y),(x_test,y_test) = keras.datasets.mnist.load_data()
#因为标签有0-9种情况 进行one_hot编码
y_onehot = tf.one_hot(y,depth=10)
x.shape

(60000, 28, 28)

In [None]:
# #cifar10数据获取
# #train 50k//test 10k
# (x,y),(x_test,y_test) = keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
  2121728/170498071 [..............................] - ETA: 37:20

## from_tensor_slices() 

In [8]:
db = tf.data.Dataset.from_tensor_slices(x_test)
# next(iter(db)).shape #[28,28]
db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
next(iter(db))[0].shape

TensorShape([28, 28])

## shuffle

In [4]:
# db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db = tf.random.shuffle(10000) #db = db.shuffle(10000)

## map:数据预处理 

In [10]:
def preprocessing(x,y):
    x = tf.cast(x,dtype=tf.float32)/255 #像素点范围为0~255
    y = tf.cast(y,dtype=tf.int32)
    y = tf.one_hot(y,depth=10)
    
    return x,y

In [11]:
db2 = db.map(preprocessing)

In [15]:
res = next(iter(db))
print(res[0].shape,res[1].shape)

(28, 28) ()


## .batch 
一次得到几张照片

In [14]:
db3 = db2.batch(32)
res = next(iter(db3))
print(res[0].shape,res[1].shape)

(32, 28, 28) (32, 10)


In [None]:
db_iter = next(iter(db3))
while True:
    next(db_iter)

## .repeat

In [None]:
#一直循环
db4 = db3.repeat()
#循环两次
db4 = db3.repeat(2)

In [None]:
def preprocessing(x,y):
    x = tf.cast(x,dtype=tf.float32)/255
    y = tf.cast(y,dtype=tf.int32)
    return x,y

def mnist_dataset():
    (x,y),(x_val,y_val) = keras.datasets.fasion_mnist.load_data() #换数据集了 后面还是X_test
    y = tf.one_hot(y,depth=10)
    y_val = tf.one_hot(y_val.depth=10)
    
    ds = tf.data.Dataset.from_tensor_slices((x,y))
    ds = ds.map(preprocessing)
    ds.shuffle(60000).batch(100)
    ds_val = tf.data.Dataset.from_tensor_slices((x_val,y_val))
    ds_val = ds_val.map(preprocessing)
    ds_val.shuffle(60000).batch(100)
    return ds,ds_val

# 测试张量

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
import os

## 获取数据 并转换类型 
+ with tf.GradientTape() as tape:自动微分API，所有操作记录在tape上(**在with下进行数学操作，并存储在tape中**)
    + tape.gradients()计算梯度 (得到gradients可能是列表类型的 储存超参数梯度)
    + tf.assign_sub(ref, value, use_locking=None, name=None):变量 ref 减去 value值，即 ref = ref - value

In [3]:
# x: [60k,28,28], [10,28,28]
# y: [60k], [10k]
(x, y), (x_test, y_test) = datasets.mnist.load_data()
# transform Tensor
# x: [0~255] ==》 [0~1.]
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)
x_test = tf.convert_to_tensor(x_test, dtype=tf.float32) / 255.
y_test = tf.convert_to_tensor(y_test, dtype=tf.int32)


In [5]:
# batch of 128
train_db = tf.data.Dataset.from_tensor_slices((x, y)).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)
f'batch: {sample[0].shape,sample[1].shape}'

'batch: (TensorShape([128, 28, 28]), TensorShape([128]))'

## 在图中创建默认节点 

In [11]:
# [b,784] ==> [b,256] ==> [b,128] ==> [b,10]
# [dim_in,dim_out],[dim_out]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

In [7]:
#learning rate:0.001
lr = 1e-3
lr

0.001

## 模型训练 梯度下降算法

In [15]:
for epoch in range(30):
    for step,(x,y) in enumerate(train_db):
        # enumerate(输出行号和内容)
        #x:[128,28,28]->[128,28*28]
        #y:[28]
        
        x = tf.reshape(x,[-1,28*28])
        with tf.GradientTape() as tape:
            #TensorFlow 为自动微分提供了 tf.GradientTape API ，根据某个函数的输入变量来计算它的导数
            # [b,784]@[784,256]+[256] ==> [b,256] + [256] ==> [b,256] + [b,256]
            h1 = x@w1+tf.broadcast_to(b1,[x.shape[0],256])  #(h1 = ax+b)
            h1 = tf.nn.relu(h1)
            # [b,256] ==> [b,128]
            # h2 = x@w2 + b2  # b2 can broadcast automatic
            h2 = h1@w2+b2
            h2 = tf.nn.relu(h2)
            #[b,128]==>[b,10]
            out = h2@w3+b3
            
            #compute losses
            y_onehot = tf.one_hot(y,depth=10)
            loss = tf.reduce_mean(tf.square(y_onehot-out))
            
        #compute gradients
        grads = tape.gradient(loss,[w1,b1,w2,b2,w3,b3])
        # w1 = w1 - lr * w1_grad
        # w1 = w1 - lr * grads[0]  # not in situ update
        # in situ update
        w1.assign_sub(lr * grads[0])
        b1.assign_sub(lr * grads[1])
        w2.assign_sub(lr * grads[2])
        b2.assign_sub(lr * grads[3])
        w3.assign_sub(lr * grads[4])
        b3.assign_sub(lr * grads[5])
        
        if step%100 == 0:
            print(f'epoch:{epoch},step:{step},loss:{float(loss)}')
            
    total_correct,total_num = 0,0
    for step,(x,y) in enumerate(test_db):
        x = tf.reshape(x,[-1,28*28])
        h1 = tf.nn.relu(x @ w1 + b1)
        h2 = tf.nn.relu(h1 @ w2 + b2)
        out = h2 @ w3 + b3
        
        # out: [b,10] ~ R
        # prob: [b,10] ~ (0,1)
        prob = tf.nn.softmax(out,axis=1)
        #[b,10]->[b]
        pred = tf.argmax(prob,axis=1)
        pred = tf.cast(pred,dtype=tf.int32)
        
        # y: [b]
        # [b], int32
        correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
        correct = tf.reduce_sum(correct)

        total_correct += int(correct)
        total_num += x.shape[0]
    acc = total_correct / total_num
    print(f'test acc: {acc}')          

epoch:0,step:0,loss:0.06983967870473862
epoch:0,step:100,loss:0.08042538911104202
epoch:0,step:200,loss:0.08418109267950058
epoch:0,step:300,loss:0.07312164455652237
epoch:0,step:400,loss:0.07665024697780609
test acc: 0.5933
epoch:1,step:0,loss:0.06795628368854523
epoch:1,step:100,loss:0.07856154441833496
epoch:1,step:200,loss:0.08198012411594391
epoch:1,step:300,loss:0.07137195765972137
epoch:1,step:400,loss:0.07486895471811295
test acc: 0.6082
epoch:2,step:0,loss:0.06624337285757065
epoch:2,step:100,loss:0.07687604427337646
epoch:2,step:200,loss:0.079964280128479
epoch:2,step:300,loss:0.06978429853916168
epoch:2,step:400,loss:0.07327441871166229
test acc: 0.6208
epoch:3,step:0,loss:0.06469006836414337
epoch:3,step:100,loss:0.075334332883358
epoch:3,step:200,loss:0.07811331748962402
epoch:3,step:300,loss:0.06834663450717926
epoch:3,step:400,loss:0.07182338088750839
test acc: 0.6339
epoch:4,step:0,loss:0.06327065080404282
epoch:4,step:100,loss:0.07393507659435272
epoch:4,step:200,loss:

# 简易网络构建

## 全连接层 
+ net = tf.keras.layers.Dense()
+ net.build(input_shape=()

In [16]:
import tensorflow as tf

In [26]:
#自动生成数据
x = tf.random.normal([4,784])
#构建512神经元的全连接层
#[4,784]==>[4,512]
net = tf.keras.layers.Dense(512)
out = net(x)
print(f'net的属性为{net.kernel.shape},net的bias的属性{net.bias.shape}')

net的属性为(784, 512),net的bias的属性(512,)


**如果没有调用Dense函数，我们是无法获取网络的属性，如下代码可见**

In [23]:
# try:
#     net = tf.keras.layers.Dense(10)
#     net.kernel.shape
# except Exception as e:
#     print(e)

'Dense' object has no attribute 'kernel'


In [30]:
net.weights

[<tf.Variable 'dense_8/kernel:0' shape=(784, 512) dtype=float32, numpy=
 array([[-0.00380588,  0.05256051,  0.0032768 , ..., -0.0501987 ,
         -0.03521321, -0.02314604],
        [ 0.05012886, -0.02610995,  0.03664938, ...,  0.05627951,
          0.03837027,  0.06231351],
        [ 0.02980496, -0.00142232, -0.04249971, ...,  0.02982023,
         -0.06580639, -0.02434242],
        ...,
        [-0.0073201 , -0.0187096 ,  0.01139774, ..., -0.01875522,
          0.05221038, -0.0283785 ],
        [-0.0045331 , -0.06044547,  0.015029  , ..., -0.04406556,
         -0.0294878 , -0.0392484 ],
        [-0.0401406 , -0.06779751,  0.00740139, ..., -0.01751142,
         -0.04666799,  0.04572296]], dtype=float32)>,
 <tf.Variable 'dense_8/bias:0' shape=(512,) dtype=float32, numpy=
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [34]:
#自己设置输入参数类型 通常可一通过输入参数自行设置
net = tf.keras.layers.Dense(10)
net.build(input_shape=(None,4))
net.kernel.shape

TensorShape([4, 10])

In [36]:
net.build(input_shape=([2,4]))
net.kernel

<tf.Variable 'kernel:0' shape=(4, 10) dtype=float32, numpy=
array([[ 0.01743668,  0.34845185, -0.33038166,  0.08518714,  0.05131823,
         0.51711893,  0.06760114, -0.06391621,  0.41007972,  0.04954326],
       [-0.2527593 ,  0.02567559,  0.44043446,  0.60767686,  0.39334548,
        -0.13640344, -0.4459053 ,  0.16502273,  0.3048514 ,  0.20030874],
       [-0.1994704 ,  0.11549509, -0.07134223, -0.4404649 ,  0.1787523 ,
        -0.05017459,  0.06334054, -0.64084655,  0.59652436, -0.5010998 ],
       [ 0.17165524, -0.0626235 ,  0.5010228 , -0.18125755, -0.4592874 ,
        -0.5692388 , -0.04370642,  0.29609007,  0.2729764 ,  0.10989302]],
      dtype=float32)>

## 多层网络
+ net = keras.Sequential()
+ net.build(input_shape=())
+ net.summary():param表示神经元个数

In [37]:
x = tf.random.normal([2,3])

#相同操作
# model = keras.Sequential()
# model.add(keras.layers.Dense(2))
# model.add(keras.activations('relu'))
model = keras.Sequential([
    keras.layers.Dense(2,activation='relu'),
    keras.layers.Dense(2,activation='relu'),
    keras.layers.Dense(2)
])

model.build(input_shape=([None,3]))
model.summary()

for p in model.trainable_variables:
    print(p.name,p.shape)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             multiple                  8         
_________________________________________________________________
dense_12 (Dense)             multiple                  6         
_________________________________________________________________
dense_13 (Dense)             multiple                  6         
Total params: 20
Trainable params: 20
Non-trainable params: 0
_________________________________________________________________
dense_11/kernel:0 (3, 2)
dense_11/bias:0 (2,)
dense_12/kernel:0 (2, 2)
dense_12/bias:0 (2,)
dense_13/kernel:0 (2, 2)
dense_13/bias:0 (2,)


# 输出方式
+ tf.sigmodid()#输出值的和不为1
+ tf.nn.softmax() #输出值的和为1
+ tf.tanh()

# 误差计算
+ tf.losses.MSE(a,b)
+ tf.losses.BinaryCrossentropy()([1],[0.1]):当处理二分类问题时使用该交叉熵损失。对于每一个样本的预测值都应该是浮点型的数值(floating)

## MSE 

In [42]:
import tensorflow as tf
y = tf.constant([0,1,5,3,2])
y = tf.one_hot(y,depth=4)
y = tf.cast(y,dtype=tf.float32)

out = tf.random.normal([5,4])
y.shape

TensorShape([5, 4])

In [41]:
loss1 = tf.reduce_mean(tf.square(y-out))
loss1

<tf.Tensor: id=1619635, shape=(), dtype=float32, numpy=0.5222829>

In [43]:
loss2 = tf.square(tf.norm(y-out))/( 5 * 4 )
loss2

<tf.Tensor: id=1619655, shape=(), dtype=float32, numpy=1.6566683>

In [44]:
loss3 = tf.reduce_mean(tf.losses.MSE(y,out))
loss3

<tf.Tensor: id=1619660, shape=(), dtype=float32, numpy=1.6566683>

## Classification 

In [45]:
tf.losses.BinaryCrossentropy()([1],[0.1])

<tf.Tensor: id=1619694, shape=(), dtype=float32, numpy=2.3025842>

In [48]:
tf.losses.binary_crossentropy([1],[0.1])

<tf.Tensor: id=1619719, shape=(), dtype=float32, numpy=2.3025842>