In [1]:
import pandas as pd
import numpy as np
import paddle
def hanming(a,b):
    res = 0
    for i,j in zip(a,b):
        res += (0 if i==j else 1)
    return res

In [2]:
# 导入训练数据
df_train = pd.read_csv("train.csv")
x = np.asarray(df_train.iloc[:,2:]).astype(np.float32)
y = np.array(df_train.iloc[:,1]).astype(np.int8)
# 导入测试数据
df_test = pd.read_csv("test.csv")
x_pred = np.array(df_test.iloc[:,1:])
# 对特征进行归一化
from sklearn.preprocessing import StandardScaler  
scaler = StandardScaler()  
scaler.fit(x)  
x = scaler.transform(x)
x_pred = scaler.transform(x_pred)
# 将训练数据集和测试数据集按照8:2的比例分开
ratio = 0.8
offset = int(x.shape[0] * ratio)
x_train = x[:offset]
y_train = y[:offset]
x_test = x[offset:]
y_test = y[offset:]

In [9]:
import os
import numpy as np
from paddle.io import Dataset

class MyDataset(Dataset):
    """
    步骤一：继承 paddle.io.Dataset 类
    """
    def __init__(self, x, y):
        """
        步骤二：实现 __init__ 函数，初始化数据集，将样本和标签映射到列表中
        """
        super(MyDataset, self).__init__()
        self.data_list = []
        for i,j in zip(x,y):
            self.data_list.append([i,j])

    def __getitem__(self, index):
        """
        步骤三：实现 __getitem__ 函数，定义指定 index 时如何获取数据，并返回单条数据（样本数据、对应的标签）
        """
        feature = self.data_list[index][0]
        label = self.data_list[index][1]
        # 返回图像和对应标签
        return feature, label

    def __len__(self):
        """
        步骤四：实现 __len__ 函数，返回数据集的样本总数
        """
        return len(self.data_list)

train_dataset = MyDataset(x_train,y_train)
test_dataset = MyDataset(x_test,y_test)

In [13]:
n_input = len(x[0])
# MLP模型组网搭建
from paddle import nn
lenet_Sequential = nn.Sequential(
    nn.Linear(13, 1,),
    nn.Tanh(),
    nn.Linear(1, 20),
    nn.Tanh(),
)
# paddle.device.set_device('gpu:0')  # 本地显卡MX150没装CUDA
# 封装模型为一个 model 实例，便于进行后续的训练、评估和推理
model = paddle.Model(lenet_Sequential)
# 为模型训练做准备，设置优化器及其学习率，并将网络的参数传入优化器，设置损失函数和精度计算方式
model.prepare(optimizer=paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()), 
              loss=paddle.nn.L1Loss(), 
              metrics=paddle.metric.Accuracy())
# 启动模型训练，指定训练数据集，设置训练轮次，设置每次数据集计算的批次大小，设置日志格式
model.fit(train_dataset, 
          epochs=1, 
          batch_size=1,
          verbose=2)

The loss value printed in the log is the current step, and the metric is the average value of previous steps.
Epoch 1/1
step  10/404 - loss: 18.5758 - acc: 0.0000e+00 - 6ms/step
step  20/404 - loss: 22.5854 - acc: 0.0000e+00 - 5ms/step
step  30/404 - loss: 23.1856 - acc: 0.0000e+00 - 5ms/step
step  40/404 - loss: 18.8695 - acc: 0.0000e+00 - 4ms/step
step  50/404 - loss: 24.5653 - acc: 0.0000e+00 - 4ms/step




step  60/404 - loss: 24.9710 - acc: 0.0000e+00 - 4ms/step
step  70/404 - loss: 32.6687 - acc: 0.0000e+00 - 4ms/step
step  80/404 - loss: 18.0986 - acc: 0.0125 - 4ms/step
step  90/404 - loss: 28.8937 - acc: 0.0111 - 4ms/step
step 100/404 - loss: 16.3842 - acc: 0.0100 - 4ms/step
step 110/404 - loss: 9.5603 - acc: 0.0091 - 4ms/step
step 120/404 - loss: 28.5860 - acc: 0.0083 - 4ms/step
step 130/404 - loss: 23.1954 - acc: 0.0077 - 4ms/step
step 140/404 - loss: 26.0842 - acc: 0.0143 - 4ms/step
step 150/404 - loss: 24.2726 - acc: 0.0267 - 4ms/step
step 160/404 - loss: 15.4828 - acc: 0.0312 - 4ms/step
step 170/404 - loss: 38.5351 - acc: 0.0294 - 4ms/step
step 180/404 - loss: 22.6023 - acc: 0.0278 - 4ms/step
step 190/404 - loss: 20.7754 - acc: 0.0316 - 4ms/step
step 200/404 - loss: 12.8832 - acc: 0.0350 - 4ms/step
step 210/404 - loss: 22.9332 - acc: 0.0333 - 4ms/step
step 220/404 - loss: 24.0873 - acc: 0.0318 - 4ms/step
step 230/404 - loss: 22.9985 - acc: 0.0304 - 4ms/step
step 240/404 - loss: 

In [11]:
import paddle
paddle.set_default_dtype("float64")

# step1:用高层API定义数据集，无需进行数据处理等，高层API为你一条龙搞定
train_dataset = paddle.text.datasets.UCIHousing(mode='train')
eval_dataset = paddle.text.datasets.UCIHousing(mode='test')

# step2:定义模型
class UCIHousing(paddle.nn.Layer):
    def __init__(self):
        super(UCIHousing, self).__init__()
        self.fc = paddle.nn.Linear(13, 1, None)

    def forward(self, input):
        pred = self.fc(input)
        return pred

# step3:训练模型
model = paddle.Model(UCIHousing())
model.prepare(paddle.optimizer.Adam(parameters=model.parameters()),
              paddle.nn.MSELoss())
model.fit(train_dataset, epochs=5, batch_size=8, verbose=1)


The loss value printed in the log is the current step, and the metric is the average value of previous steps.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


0.8472
