-
Notifications
You must be signed in to change notification settings - Fork 0
/
BayesLinearRegression.py
165 lines (140 loc) · 5.42 KB
/
BayesLinearRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2018/8/11 16:44
# @Author : WenMin
# @Email : < wenmin593734264@gmial.com >
# @File : BayesLinearRegression.py
# @Software: PyCharm Community Edition
import numpy as np
from math import pi, e
import matplotlib.pyplot as plt
import random
import math, copy
# from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
class BayesLR(object):
def __init__(self):
self.__basis_type = None
self.__order = None
self.__input_x = None
self.__input_y = None
self.__alpha = None
self.__beta = None
self.__m_n = None
self.__s_n = None
@property
def input_x(self):
return self.__input_x
@input_x.setter
def input_x(self, value):
if not isinstance(value, list):
raise ValueError('input_x must be a list or numpy_array')
self.__input_x = value
@property
def input_y(self):
return self.__input_y
@input_y.setter
def input_y(self, value):
if not isinstance(value, list):
raise ValueError('input_x must be a list or numpy_array')
self.__input_y = value
def basis_function(self, basis_type, order):
if not isinstance(basis_type, str):
raise ValueError('basis type must be a list')
self.__basis_type = basis_type
self.__order = order
if self.__basis_type == 'polynomial':
# order = [ord for ord in range(order)]
fi = np.zeros([len(self.__input_x), order])
for i in range(len(self.__input_x)):
for j in range(order):
fi[i, j] = pow(self.__input_x[i], j)
fi = np.mat(fi)
fi_fi = fi.T * fi
eigenvalue, eigenvector = np.linalg.eig(fi_fi)
return eigenvalue, fi_fi, fi
elif self.__basis_type == 'gauss':
pass
elif self.__basis_type == 'sigmoid':
pass
elif self.__basis_type == 'fourier':
pass
else:
raise ValueError('basis type must be: polynomial, gauss, sigmoid or fourier')
def fit(self, test_x, test_y, alpha, beta, basis_type, order):
self.__input_x = test_x
self.__input_y = test_y
eigenvalue, fi_fi, fi = self.basis_function(basis_type, order)
eigenvalue = np.mat(eigenvalue)
fi_fi = np.mat(fi_fi)
alpha_estimate = 0
beta_estimate = 0
m_n = 0
s_n = 0
while abs(alpha - alpha_estimate) > 0.001 or abs(beta - beta_estimate) > 0.001:
s_n = (alpha * np.eye(fi_fi.shape[0], fi_fi.shape[1]) + beta * fi_fi).I
m_n = beta * s_n * fi.T * np.mat(self.__input_y).T
eigenvalue_lamda = beta * eigenvalue
gama = np.mat([float(lamd_i) / (lamd_i + alpha) for lamd_i in np.asarray(eigenvalue_lamda)[0]]).sum()
alpha_estimate = (float(gama) / (m_n.T * m_n))[0, 0]
d = np.mat(self.__input_y).T - fi * m_n
d[:, :] = map(lambda d_: np.power(d_, 2), d[:, :])
beta_estimate = (len(self.__input_x) - gama) * pow(d.sum(), -1)
alpha = copy.copy(alpha_estimate)
beta = copy.copy(beta_estimate)
self.__alpha = alpha_estimate
self.__beta = beta_estimate
self.__m_n = m_n
self.__s_n = s_n
print 'fit done!'
def predict(self, test):
mean = self.__m_n.T * np.mat([pow(test, i) for i in range(self.__order)]).T
# print mean
mean = mean[0, 0]
# variance = (1.0 / self.__beta) + np.mat([pow(test, i) for i in range(self.__order)]) * self.__s_n *\
# np.mat([pow(test, i) for i in range(self.__order)]).T
# variance = variance[0, 0]
# # return pow(2 * pi * variance, -0.5) * pow(e, -(pow((test - mean), 2)/(2*variance)))
# return pow(2 * pi * variance, -0.5) * math.exp(-(pow((test - mean), 2) / (2 * variance)))
return mean
def evaluate(self, test_x, target_y):
if not isinstance(test_x, list):
raise ValueError('input_x must be a list')
if not isinstance(target_y, list):
raise ValueError('target must be a list')
if len(test_x) != len(target_y):
raise ValueError('len of input_x not equal to len of target!')
y_pred = [self.predict(test_x[i]) for i in range(len(test_x))]
loss = np.sqrt(mean_squared_error(target_y, y_pred))
return loss
if __name__ == '__main__':
x = []
y = []
f = open('data.txt').readlines()
for i in f:
x.append(float(i.split('\t')[0])), y.append(float(i.split('\t')[1]))
cc = list(zip(x, y))
# j = random.sample(cc, 150)
j = sorted(cc)
x, y = zip(*j)
x = list(x)
y = list(y)
bayeslr = BayesLR()
bayeslr.fit(x, y, alpha=0.1, beta=9, basis_type='polynomial', order=7)
y_pred = [bayeslr.predict(i) for i in x]
loss = bayeslr.evaluate(x, y)
print loss
x = np.array(x).reshape(-1, 1)
y = np.array(y).reshape(-1, 1)
reg = linear_model.LinearRegression()
# reg = linear_model.ridge_regression(alpha=0.5)
reg.fit(x, y)
y_predction = reg.predict(x)
print("Mean squared error: %.2f" % np.sqrt(mean_squared_error(x, y_predction)))
ax1 = plt.figure()
ax1.add_subplot(1, 1, 1)
plt.scatter(x, y)
plt.plot(x, y_pred)
plt.plot(x, y_predction)
plt.show()