In [1]:
import paddle
from paddle.nn import Linear
import paddle.nn.functional as F
import numpy as np
import os
import random

In [9]:
def load_data():
    datafile = './dataset/housing.data'
    data = np.fromfile(datafile, sep=' ', dtype=np.float32)
    
    feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', \
                      'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]
    feature_nums = len(feature_names)
    
    data = data.reshape([data.shape[0] // feature_nums, feature_nums])
    
    ratio = 0.8
    offset = int(data.shape[0] * ratio)
    training_data = data[:offset]
    
    maximums, minimums, avgs = training_data.max(axis=0), training_data.min(axis=0), \
                                training_data.sum(axis=0) / training_data.shape[0]
    
    global max_values
    global min_values
    global avg_values
    max_values = maximums
    min_values = minimums
    avg_values = avgs
    
    for i in range(feature_nums):
        data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])
    
    training_data = data[:offset]
    test_data = data[offset:]
    
    return training_data, test_data

In [10]:
class Regressor(paddle.nn.Layer):
    def __init__(self, 
                num_of_weights):
        
        super(Regressor, self).__init__()
        
        self.num_of_weights = num_of_weights
        # 定义一层全连接层，输入维度是13，输出维度是1
        self.fc = Linear(in_features = self.num_of_weights, out_features=1)
    
    def forward(self, inputs):
        z = self.fc(inputs)
        
        return z

In [11]:
model = Regressor(13)
# 开启模型的训练模式
model.train()

training_data, test_data = load_data()

'''定义优化算法，使用随机梯度下降SGD'''
opt = paddle.optimizer.SGD(learning_rate=1e-2, parameters=model.parameters())

In [12]:
num_of_epochs = 10
batch_size = 10

for epoch_id in range(num_of_epochs):
    N = len(training_data)
    np.random.shuffle(training_data)
    mini_batches = [training_data[k:k+batch_size] for k in range(0, N, batch_size)]
    
    for iter_id, mini_batch in enumerate(mini_batches):
        x = np.array(training_data[:, :-1])
        y = np.array(training_data[:, -1:])
        housing_features = paddle.to_tensor(x)
        prices = paddle.to_tensor(y)
        
        # 前向计算
        predicts = model(housing_features)
        # 计算损失
        loss = F.square_error_cost(predicts, label=prices)
        avg_loss = paddle.mean(loss)
        if iter_id % 20 == 0:
            print('epoch_id:{}, iter_id:{}, loss:{}'.format(epoch_id, iter_id, avg_loss.numpy()))
        
        # 反向传播
        avg_loss.backward()
        # 最小化loss，更新参数
        opt.step()
        # 清除梯度
        opt.clear_grad()

epoch_id:0, iter_id:0, loss:[0.05239948]
epoch_id:0, iter_id:20, loss:[0.05044193]
epoch_id:0, iter_id:40, loss:[0.04870461]
epoch_id:1, iter_id:0, loss:[0.04862281]
epoch_id:1, iter_id:20, loss:[0.04707587]
epoch_id:1, iter_id:40, loss:[0.04567946]
epoch_id:2, iter_id:0, loss:[0.04561313]
epoch_id:2, iter_id:20, loss:[0.04434928]
epoch_id:2, iter_id:40, loss:[0.04319226]
epoch_id:3, iter_id:0, loss:[0.04313693]
epoch_id:3, iter_id:20, loss:[0.04207582]
epoch_id:3, iter_id:40, loss:[0.04109351]
epoch_id:4, iter_id:0, loss:[0.04104628]
epoch_id:4, iter_id:20, loss:[0.04013606]
epoch_id:4, iter_id:40, loss:[0.03928618]
epoch_id:5, iter_id:0, loss:[0.03924514]
epoch_id:5, iter_id:20, loss:[0.03845138]
epoch_id:5, iter_id:40, loss:[0.03770537]
epoch_id:6, iter_id:0, loss:[0.03766922]
epoch_id:6, iter_id:20, loss:[0.03696828]
epoch_id:6, iter_id:40, loss:[0.03630618]
epoch_id:7, iter_id:0, loss:[0.03627402]
epoch_id:7, iter_id:20, loss:[0.03564903]
epoch_id:7, iter_id:40, loss:[0.03505635]


In [13]:
paddle.save(model.state_dict(), './model/LR_model.pdparams')
print('模型保存成功')

模型保存成功


In [14]:
'''测试模型'''
def load_one_example():
    idx = np.random.randint(0, test_data.shape[0])
    idx = -10
    one_data, label = test_data[idx, :-1], test_data[idx, -1]
    one_data = one_data.reshape([1, -1])
    
    return one_data, label

# 参数为保存模型参数的文件地址
model_dict = paddle.load('LR_model.pdparams')
model.load_dict(model_dict)
model.eval()

# 参数为数据集的文件地址
one_data, label = load_one_example()
# 将数据转为动态图的variable格式 
one_data = paddle.to_tensor(one_data)
predict = model(one_data)

# 对结果做反归一化处理
predict = predict * (max_values[-1] - min_values[-1]) + avg_values[-1]
# 对label数据做反归一化处理
label = label * (max_values[-1] - min_values[-1]) + avg_values[-1]

print("Inference result is {}, the corresponding label is {}".format(predict.numpy(), label))


Inference result is [[15.646884]], the corresponding label is 19.700000762939453
