In [85]:
import pandas as pd
import random 
import numpy as np
import matplotlib.pyplot as plt

N = 11

# 读取数据
data_pd = pd.read_csv('winequality-red.csv', header=0, sep=';')
data = []
# 把输出分割出来
for _, row in data_pd.iterrows():
    data.append([[row['fixed acidity'], row['volatile acidity'], row['citric acid'],
              row['residual sugar'], row['chlorides'], row['free sulfur dioxide'],
              row['total sulfur dioxide'], row['density'], row['pH'],
              row['sulphates'], row['alcohol']],row['quality']])
# print(data[1])


# 打乱顺序进行训练和测试
random.shuffle(data)
# print(len(data),len(data[0]))
# print(data)

# 划分训练数据和测试数据
count = len(data)
ratio = 0.8
split_point = int(count * ratio)
tran_data = data[:split_point]
test_data = data[split_point:]
print('There {} datas in total, {} datas used for train, {} used for test'.format(count, split_point, count - split_point))


[[7.8, 0.88, 0.0, 2.6, 0.098, 25.0, 67.0, 0.9968, 3.2, 0.68, 9.8], 5.0]
There 1599 datas in total, 1279 datas used for train, 320 used for test


In [115]:
class LinearRegression(object):
    def __init__(self):
        super(LinearRegression, self).__init__()
        # 初始化参数
        self.dim = N
        self.w = [0 for i in range(self.dim)]
        self.b = 0
        self.learningRate = 0.00005
        self.alpha = 1

    def response(self, x):
        """
        计算预测结果：求和，激活
        𝒇(𝒙;𝒘,𝑏)=𝒘^𝑻 𝒙+𝑏 
        """
        y = sum([i * j for i, j in zip(self.w, x)]) + self.b
        return y

    

    def updateWeights(self, x, iterError):
        """
        更新参数权重
        w(t+1) = w(t) + (yi - pi) * xi
        b(t+1) = b(t) + (yi - pi)
        """
        # self.w += self.learningRate * iterError * int(x)
        self.w = [ i + self.learningRate * iterError * j for i, j in zip(self.w, x)]
        self.b += self.learningRate * iterError
        self.alpha -= self.learningRate * iterError

    def computeError(self, x, y, r):
        """
        计算损失
        min┬{𝒘,𝑏}   (m𝑎𝑥)┬𝛼  1/2𝑛 ∑_(𝑖=1)^𝑛 ((𝒘^𝑇 𝒙_𝑖+𝑏−𝑦_𝑖))^2 +𝛼(𝒘^𝑇 𝒘−1).
        Let 𝒛=((𝒘 𝑏)), 𝒚=((𝑦_1,…,𝑦_𝑛))^𝑇, 𝑨=((𝒙_1,…,𝒙_𝑛,𝟏))^𝑻, 
        where 𝟏 is a vector with all ones. Hence,
        ∑_(𝑖=1)^𝑛 ((𝒘^𝑇 𝒙_𝑖+𝑏−𝑦_𝑖))^2 = ||𝑨𝒛−𝒚||_2^2, 𝒘^𝑇 𝒘−1=𝒛^𝑇 𝒛−𝑏^2−1. 
        """
        z = self.w + [self.b]
        A = x + [1]
        error1 = error2 = 0
        # print(z,A)
        error1 = 1/10 * (sum([i * j for i, j in zip(A, z)]) - y) ** 2
        # print(self.b)
        error2 = sum([i * j for i, j in zip(z, z)]) - self.b * self.b - 1
        # print(error1,error2)
        return error1 + self.alpha * error2




    def train(self, tran_data):
        """
        训练网络
        """
        iteration = 0

        # for iteration in range(2000):
        while True:
            globalError = 0.0
            for data in tran_data:
                # 计算预测结果
                r = self.response(data[0])
                iterError = (self.computeError(data[0],data[1],r))
                # print(iterError)
                self.updateWeights(data[0], iterError)
                globalError += abs(iterError)
            iteration += 1
            print(f'Epoch {iteration} finished, globalError is {globalError}')
            if globalError <= 70:
                print('end training')
                break


In [116]:
# 网络训练
Net = LinearRegression()
Net.train(tran_data)

5.85072950827514
Epoch 1617 finished, globalError is 125.721842529649
Epoch 1618 finished, globalError is 125.59333671613551
Epoch 1619 finished, globalError is 125.46539562267358
Epoch 1620 finished, globalError is 125.33745521813746
Epoch 1621 finished, globalError is 125.20951533571055
Epoch 1622 finished, globalError is 125.08157580693928
Epoch 1623 finished, globalError is 124.95363646170357
Epoch 1624 finished, globalError is 124.82569712819745
Epoch 1625 finished, globalError is 124.6977576329073
Epoch 1626 finished, globalError is 124.56981780059446
Epoch 1627 finished, globalError is 124.44187745426377
Epoch 1628 finished, globalError is 124.31403877106524
Epoch 1629 finished, globalError is 124.18622759386682
Epoch 1630 finished, globalError is 124.05841547350846
Epoch 1631 finished, globalError is 123.93060222592923
Epoch 1632 finished, globalError is 123.80288600311735
Epoch 1633 finished, globalError is 123.6752717614238
Epoch 1634 finished, globalError is 123.547709065185

In [121]:
# 测试部分
total_error = 0
for data in test_data:
    r = p.response(data[0])
    total_error += abs(data[1] - r)
print(f'test data number: {len(test_data)} , total test loss is {total_error}')

test data number: 320 , total test loss is 155.0047567921725
