# tensorflow 实现简单线性回归

In [2]:
# tensorflow environment
import tensorflow as tf
import numpy as np
import random
import matplotlib.pyplot as plt

# refer to d2l
from d2l import tensorflow as d2l

# using type hint from third party tools
# deprecated since python3.9
# from typing import list

# ensure GPU available
print(tf.config.list_physical_devices("GPU"))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2022-09-07 00:48:17.518975: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-09-07 00:48:17.708283: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-09-07 00:48:17.709032: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


## 生成数据

In [3]:
def synthetic_data(w:list[float],b:float,num:int):
    """
    creat data
    """
    x = tf.zeros((num,w.shape[0]))
    x += tf.random.normal(shape=x.shape) # noise
 
    y= tf.matmul(x,tf.reshape(w,(-1,1))) + b
    y += tf.random.normal(shape=y.shape,stddev=0.01)
    y = tf.reshape(y,(-1,1))
    return x,y


In [15]:
# weights
w = tf.constant([1,2.1,3],dtype=float)
# bias
b = -1
n = 1000

features,lables=synthetic_data(w,b,n)
print("features:",features[0])
print("labels:",lables[0])

features: tf.Tensor([ 0.36662057 -0.49327502  2.0776083 ], shape=(3,), dtype=float32)
labels: tf.Tensor([4.5478196], shape=(1,), dtype=float32)


## 读取数据

In [19]:
def data_iter(batch_size,features,lables):
    """通过yield生成迭代器"""
    nums=len(features)
    # 随机读取样本
    indices = list(range(nums))
    random.shuffle(indices)
    for i in range(0,nums,batch_size):
        j = tf.constant(indices[i:min(i+batch_size,nums)])
        yield tf.gather(features,j),tf.gather(lables,j)
    pass

In [20]:
batch_size = 10

for x,y in data_iter(batch_size,features,lables):
    print(x,"\n")
    print(y)

tf.Tensor(
[[-0.7405471   0.7202956   0.18167621]
 [-1.32461    -0.09317806  0.9687353 ]
 [ 0.62665     0.03040845  2.8625088 ]
 [-0.48927128 -1.7456135   1.4974964 ]
 [-0.5657871   0.40087074 -0.92298055]
 [-0.65161574 -0.56388754 -0.46855757]
 [ 0.2715458   0.41306075  0.8009919 ]
 [ 0.35448503 -0.9134519  -0.59287906]
 [ 0.49183467 -0.8868478  -1.3228428 ]
 [ 0.7645307  -1.3575522  -1.9427031 ]], shape=(10, 3), dtype=float32) 

tf.Tensor(
[[ 0.32596514]
 [ 0.37364236]
 [ 8.265616  ]
 [-0.6630405 ]
 [-3.4892583 ]
 [-4.2316284 ]
 [ 2.5359395 ]
 [-4.3324466 ]
 [-6.33704   ]
 [-8.901195  ]], shape=(10, 1), dtype=float32)
tf.Tensor(
[[ 0.93169624 -1.0798886   1.8486785 ]
 [ 0.43987164 -0.41366577  0.21507356]
 [ 1.1490284  -0.24420726  0.16442178]
 [-0.8907803   0.44240406  1.086887  ]
 [-0.7990898   1.2862681  -0.9308861 ]
 [-1.078785   -1.0946889  -1.4012882 ]
 [ 0.49507526  0.04211868  0.70695907]
 [ 0.36220074 -0.8213041  -1.5678836 ]
 [-0.27394384  0.4154106  -1.128508  ]
 [ 0.27236

## 建立线性回归模型

In [22]:
# 初始化模型参数
tw = tf.Variable(tf.random.normal(shape=(3,1),mean=0,stddev=0.01),trainable=True)
tb = tf.Variable(tf.zeros(1))

In [23]:
# 定义模型
def liner_regression(x,w,b):
    """线性回归模型"""
    return tf.matmul(x,w)+b

## 选择合适的损失函数和优化器

In [24]:
# 损失函数
def squared_loss(y_hat,y):
    """均方损失"""
    return (y_hat - tf.reshape(y,y_hat.shape)) ** 2 / 2

In [34]:
# 优化算法
def sgd(params, grads,lr,batch_size):
    """小批量随机梯度下降"""
    for param, grad in zip(params, grads):
        param.assign_sub(lr*grad/batch_size)

In [78]:
# 训练及参数


lr = 0.03
num_epochs = 10
net = liner_regression
loss = squared_loss

for epoch in range(num_epochs):
    for _x,_y in data_iter(batch_size=batch_size,features=x,lables=y):
        with tf.GradientTape() as g:
            # print(_x,w)
            l = loss(net(_x,tw,tb),_y)
        dw,db=g.gradient(l,[tw,tb])
        sgd([tw,tb],[dw,db],lr,batch_size)
    train_l = loss(net(features,tw,tb),lables)
    print(f'epoch {epoch + 1}, loss {float(tf.reduce_mean(train_l)):f}')




epoch 1, loss 0.000661
epoch 2, loss 0.000645
epoch 3, loss 0.000631
epoch 4, loss 0.000616
epoch 5, loss 0.000602
epoch 6, loss 0.000588
epoch 7, loss 0.000575
epoch 8, loss 0.000562
epoch 9, loss 0.000550
epoch 10, loss 0.000537


In [80]:
print(f'w的估计误差: {w - tf.reshape(tw, w.shape)}')
print(f'b的估计误差: {b - tb}')

w的估计误差: [0.01305717 0.02177    0.01094007]
b的估计误差: [-1.183543]
