In [50]:
import numpy as np
import random
import matplotlib.pyplot as plt

random.seed(52)
np.random.seed(52)

# 确定线性函数
def true_function(x):
    return  1.3*x + 1 

# 生成合成数据集
def generate_synthetic_dataset(num_samples, noise_std, outlier_prob, outlier_range):
    x = np.linspace(0.2, 10, num_samples)
    x_plus_two = np.round(x + random.uniform(0, 0.5), 2)
    x_minus_two = np.round(x - random.uniform(0, 0.5), 2)
    y = np.round(true_function(x) + np.random.normal(0, noise_std, num_samples), 2)
    
    num_outliers = int(num_samples * outlier_prob)
    outlier_indices = np.random.choice(num_samples, num_outliers, replace=False)
    y[outlier_indices] = np.round(np.random.uniform(outlier_range[0], outlier_range[1], num_outliers), 2)
    
    y_upper = np.round(y + random.uniform(0, 0.8), 2)
    y_lower = np.round(y - random.uniform(0, 0.8), 2)
    
    return x_plus_two, x_minus_two, y_lower, y_upper

# 设定参数
num_samples = 20
noise_std = 1
outlier_prob = 0.1
outlier_range = [5, 15]

# 生成合成数据集
x_plus_two, x_minus_two, y_lower, y_upper = generate_synthetic_dataset(num_samples, noise_std, outlier_prob, outlier_range)

# 可视化数据集
#fig, ax = plt.subplots()

#for i in range(len(x_plus_two)):
#    if i >= num_samples * 0.8:
        # 测试集数据，使用黄色空心圆圈
#        ax.plot(x_minus_two[i], y_lower[i], 'o', color='none', markeredgecolor='blue', markersize=7)
#    else:
        # 训练集数据，使用黑色空心圆圈
#        ax.plot(x_minus_two[i], y_lower[i], 'o', color='none', markeredgecolor='black', markersize=7)

# 添加图例
#train_patch = plt.Line2D([], [], marker='o', color='none', markeredgecolor='black', markersize=7, label='Train Set')
#test_patch = plt.Line2D([], [], marker='o', color='none', markeredgecolor='blue', markersize=7, label='Test Set')
#ax.legend(handles=[train_patch, test_patch])

#plt.xlabel('x')
#plt.ylabel('y')
#plt.xlim(0, 11)
#plt.ylim(0, 14)
#plt.show()

In [51]:
x_plus_two,x_minus_two,y_lower, y_upper

(array([ 0.69,  1.2 ,  1.72,  2.24,  2.75,  3.27,  3.78,  4.3 ,  4.82,
         5.33,  5.85,  6.36,  6.88,  7.39,  7.91,  8.43,  8.94,  9.46,
         9.97, 10.49]),
 array([0.17, 0.69, 1.2 , 1.72, 2.24, 2.75, 3.27, 3.78, 4.3 , 4.81, 5.33,
        5.85, 6.36, 6.88, 7.39, 7.91, 8.43, 8.94, 9.46, 9.97]),
 array([ 1.39,  0.27,  2.45, 10.2 ,  3.57,  4.62,  6.17,  6.22,  5.78,
         8.37,  7.16,  8.55,  9.13, 10.09, 11.23, 10.57, 10.81, 12.04,
        12.09, 11.86]),
 array([ 2.36,  1.24,  3.42, 11.17,  4.54,  5.59,  7.14,  7.19,  6.75,
         9.34,  8.13,  9.52, 10.1 , 11.06, 12.2 , 11.54, 11.78, 13.01,
        13.06, 12.83]))

In [52]:
X = 0.5 * (x_plus_two + x_minus_two)
print("X:", X)
Y = 0.5 * (y_lower +  y_upper)
Y

X: [ 0.43   0.945  1.46   1.98   2.495  3.01   3.525  4.04   4.56   5.07
  5.59   6.105  6.62   7.135  7.65   8.17   8.685  9.2    9.715 10.23 ]


array([ 1.875,  0.755,  2.935, 10.685,  4.055,  5.105,  6.655,  6.705,
        6.265,  8.855,  7.645,  9.035,  9.615, 10.575, 11.715, 11.055,
       11.295, 12.525, 12.575, 12.345])

In [53]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, shuffle=False)
x = X_train
y = Y_train
X_test = X_test
X_test, Y_test 

(array([ 8.685,  9.2  ,  9.715, 10.23 ]),
 array([11.295, 12.525, 12.575, 12.345]))

In [65]:
#UESVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
param_grid = {
    'C': np.arange(0.01,0.1, 0.01),  # C 参数范围为 0.1 到 1.0，步长为 0.1
    'epsilon': np.arange(1, 2, 0.1),  # mu 参数范围为 0.1 到 1.0，步长为 0.1
}

kf = KFold(n_splits=4)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, epsilon):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    #d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            term1 += np.dot(x[i], x[j].T) * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            term2 = epsilon * np.sum(a_i + a_i_star)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3 
            
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)
start_time = time.time()
for C in param_grid['C']:
    for epsilon in param_grid['epsilon']:
        kf_mse = []
        for train_index, test_index in kf.split(x):
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C, epsilon), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w = np.sum(((a_i - a_i_star) * x_train))
            m = sum(y_train - w * x_train)
            b = m / l
            t = w * x_test + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C, epsilon, avg_mse))

for params in mse_list:
    C, epsilon, mse = params
    print('Parameters: C={}, epsilon={}'.format(C,epsilon))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_epsilon, best_mse = best_params
print('Best parameters: C={}, epsilon={}'.format(best_C, best_epsilon))
print('Best MSE: {}'.format(best_mse))
#预测
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)
# 找到最佳参数组合及其对应的均方误差
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_epsilon, best_mse = best_params

print('Best Parameters: C={}, epsilon={}'.format(best_C, best_epsilon))
print('Best MSE: {}'.format(best_mse))
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)
# 计算总运行时间
end_time = time.time()
total_time = end_time - start_time

print("Total time:", total_time, "seconds")
# 计算平均每次的时间
average_time = total_time / (len(param_grid['C']) * len(param_grid['epsilon']))
print("Average Time per Run:", average_time, "seconds")

Parameters: C=0.01, epsilon=1.0
MSE: 12.539391726518382
---
Parameters: C=0.01, epsilon=1.1
MSE: 12.539391726494511
---
Parameters: C=0.01, epsilon=1.2000000000000002
MSE: 12.700310489427482
---
Parameters: C=0.01, epsilon=1.3000000000000003
MSE: 12.700310489378927
---
Parameters: C=0.01, epsilon=1.4000000000000004
MSE: 12.700310489361655
---
Parameters: C=0.01, epsilon=1.5000000000000004
MSE: 12.948427392519907
---
Parameters: C=0.01, epsilon=1.6000000000000005
MSE: 12.948427392549057
---
Parameters: C=0.01, epsilon=1.7000000000000006
MSE: 12.948427392620747
---
Parameters: C=0.01, epsilon=1.8000000000000007
MSE: 13.232268142920006
---
Parameters: C=0.01, epsilon=1.9000000000000008
MSE: 13.232268142985262
---
Parameters: C=0.02, epsilon=1.0
MSE: 9.773312922231675
---
Parameters: C=0.02, epsilon=1.1
MSE: 9.773312922229668
---
Parameters: C=0.02, epsilon=1.2000000000000002
MSE: 10.034123350507025
---
Parameters: C=0.02, epsilon=1.3000000000000003
MSE: 10.034123350537415
---
Parameters: 

In [67]:
#HSVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
param_grid = {
    'C': np.arange(0.05, 0.1, 0.01),  # C 参数范围为 0.1 到 1.0，步长为 0.1
    'mu': np.arange(1.5, 2.5, 0.1),  # mu 参数范围为 0.1 到 1.0，步长为 0.1
}

kf = KFold(n_splits=4)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, mu):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    #d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            term1 = term1 + x[i] * x[j] * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            #term1 = term1 + d[i, j] * a[i] * a[j]
            term2 = mu / (2 * C) * np.sum((a_i + a_i_star) ** 2)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3
        
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)
start_time = time.time()
for C in param_grid['C']:
    for mu in param_grid['mu']:
        kf_mse = []
        for train_index, test_index in kf.split(x):
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C, mu), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w = np.sum(((a_i - a_i_star) * x_train))
            m = sum(y_train - w * x_train)
            b = m / l
            t = w * x_test + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C, mu, avg_mse))

for params in mse_list:
    C, mu, mse = params
    print('Parameters: C={}, mu={}'.format(C, mu))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_mu, best_mse = best_params
print('Best parameters: C={}, mu={}'.format(best_C, best_mu))
print('Best MSE: {}'.format(best_mse))
#预测
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)
# 计算总运行时间
end_time = time.time()
total_time = end_time - start_time

print("Total time:", total_time, "seconds")

Parameters: C=0.05, mu=1.5
MSE: 4.587441217878264
---
Parameters: C=0.05, mu=1.6
MSE: 4.636910949604455
---
Parameters: C=0.05, mu=1.7000000000000002
MSE: 4.684719854552167
---
Parameters: C=0.05, mu=1.8000000000000003
MSE: 4.728390224728395
---
Parameters: C=0.05, mu=1.9000000000000004
MSE: 4.772580473626718
---
Parameters: C=0.05, mu=2.0000000000000004
MSE: 4.811110307068921
---
Parameters: C=0.05, mu=2.1000000000000005
MSE: 4.859607925508415
---
Parameters: C=0.05, mu=2.2000000000000006
MSE: 4.9076954368434205
---
Parameters: C=0.05, mu=2.3000000000000007
MSE: 4.952247446422709
---
Parameters: C=0.05, mu=2.400000000000001
MSE: 4.994808190946374
---
Parameters: C=0.060000000000000005, mu=1.5
MSE: 3.921843039414915
---
Parameters: C=0.060000000000000005, mu=1.6
MSE: 3.9491824263754305
---
Parameters: C=0.060000000000000005, mu=1.7000000000000002
MSE: 3.97551311853403
---
Parameters: C=0.060000000000000005, mu=1.8000000000000003
MSE: 4.00158987938258
---
Parameters: C=0.060000000000000

In [68]:
#UHSVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
# C 参数范围为 0.1 到 1.0，步长为 0.1
param_grid = {
    'C': np.arange(0.07, 0.08, 0.01),  # C 参数范围为 0.1 到 1.0，步长为 0.1
    'mu': np.arange(1.8, 1.9, 0.1),  # mu 参数范围为 0.1 到 1.0，步长为 0.1
}

kf = KFold(n_splits=4)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, mu):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    #d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            term1 = term1 + x[i] * x[j] * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            #term1 = term1 + d[i, j] * a[i] * a[j]
            term2 = mu / (2 * C) * np.sum((a_i + a_i_star) ** 2)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3
        
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)
start_time = time.time()
for C in param_grid['C']:
    for mu in param_grid['mu']:
        kf_mse = []
        for train_index, test_index in kf.split(x):
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C, mu), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w = np.sum(((a_i - a_i_star) * x_train))
            m = sum(y_train - w * x_train)
            b = m / l
            t = w * x_test + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C, mu, avg_mse))

for params in mse_list:
    C, mu, mse = params
    print('Parameters: C={}, mu={}'.format(C, mu))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_mu, best_mse = best_params
print('Best parameters: C={}, mu={}'.format(best_C, best_mu))
print('Best MSE: {}'.format(best_mse))
#预测
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)
# 计算总运行时间
end_time = time.time()
total_time = end_time - start_time

print("Total time:", total_time, "seconds")

Parameters: C=0.07, mu=1.8
MSE: 3.771664212604164
---
Best parameters: C=0.07, mu=1.8
Best MSE: 3.771664212604164
w= 0.9554829370850528 b= 2.7587928364079235 msee= 0.3322490607482322 Rmse= 0.5764104967366852
Total time: 0.04189872741699219 seconds


In [69]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

param_grid = {
    'w': np.arange(1.1, 2.1, 0.1),
    'b': np.arange(1.1, 2.1, 0.1)
}

kf = KFold(n_splits=4)
mse_list = []

for w in param_grid['w']:
    for b in param_grid['b']:
        kf_mse = []
        for train_index, test_index in kf.split(x):
            x_train, x_test =x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            y_pred = w * x_train + b
            mse = mean_squared_error(y_train, y_pred)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((w, b, avg_mse))

for params in mse_list:
    w, b, mse = params
    print('Parameters: w={}, b={}'.format(w, b))
    print('MSE: {}'.format(mse))
    print('---')

best_params = min(mse_list, key=lambda x: x[-1])
best_w, best_b, best_mse = best_params
print('Best parameters: w={}, b={}'.format(best_w, best_b))
print('Best MSE: {}'.format(best_mse))
w = best_w
b = best_b
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)

Parameters: w=1.1, b=1.1
MSE: 4.872794015624996
---
Parameters: w=1.1, b=1.2000000000000002
MSE: 4.629462765624998
---
Parameters: w=1.1, b=1.3000000000000003
MSE: 4.406131515624997
---
Parameters: w=1.1, b=1.4000000000000004
MSE: 4.202800265624997
---
Parameters: w=1.1, b=1.5000000000000004
MSE: 4.019469015624997
---
Parameters: w=1.1, b=1.6000000000000005
MSE: 3.8561377656249975
---
Parameters: w=1.1, b=1.7000000000000006
MSE: 3.712806515624998
---
Parameters: w=1.1, b=1.8000000000000007
MSE: 3.589475265624997
---
Parameters: w=1.1, b=1.9000000000000008
MSE: 3.4861440156249976
---
Parameters: w=1.1, b=2.000000000000001
MSE: 3.402812765624997
---
Parameters: w=1.2000000000000002, b=1.1
MSE: 3.952581999999997
---
Parameters: w=1.2000000000000002, b=1.2000000000000002
MSE: 3.7952319999999977
---
Parameters: w=1.2000000000000002, b=1.3000000000000003
MSE: 3.657881999999997
---
Parameters: w=1.2000000000000002, b=1.4000000000000004
MSE: 3.540531999999998
---
Parameters: w=1.20000000000000

In [70]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

param_grid = {
    'w': [1.2],
    'b': [1.9]
}

kf = KFold(n_splits=4)
mse_list = []
start_time = time.time()
for w in param_grid['w']:
    for b in param_grid['b']:
        kf_mse = []
        for train_index, test_index in kf.split(x):
            x_train, x_test =x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            y_pred = w * x_train + b
            mse = mean_squared_error(y_train, y_pred)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((w, b, avg_mse))

best_params = min(mse_list, key=lambda x: x[-1])
best_w, best_b, best_mse = best_params
print('Best parameters: w={}, b={}'.format(best_w, best_b))
print('Best MSE: {}'.format(best_mse))
w = best_w
b = best_b
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)
# 计算总运行时间
end_time = time.time()
total_time = end_time - start_time

print("Total time:", total_time, "seconds")

Best parameters: w=1.2, b=1.9
Best MSE: 3.2537819999999993
w= 1.2 b= 1.9 msee= 1.3864510000000012 Rmse= 1.1774765390444097
Total time: 0.00299072265625 seconds


In [71]:
import numpy as np
import random
import matplotlib.pyplot as plt

random.seed(52)
np.random.seed(52)

# 确定线性函数
def true_function(x):
    return  2.1*x + 1 

# 生成合成数据集
def generate_synthetic_dataset(num_samples, noise_std, outlier_prob, outlier_range):
    x = np.linspace(0.2, 10, num_samples)
    x_plus_two = np.round(x + random.uniform(0, 0.5), 2)
    x_minus_two = np.round(x - random.uniform(0, 0.5), 2)
    y = np.round(true_function(x) + np.random.normal(0, noise_std, num_samples), 2)
    
    num_outliers = int(num_samples * outlier_prob)
    outlier_indices = np.random.choice(num_samples, num_outliers, replace=False)
    y[outlier_indices] = np.round(np.random.uniform(outlier_range[0], outlier_range[1], num_outliers), 2)
    
    y_upper = np.round(y + random.uniform(0, 0.8), 2)
    y_lower = np.round(y - random.uniform(0, 0.8), 2)
    
    return x_plus_two, x_minus_two, y_lower, y_upper

# 设定参数
num_samples = 20
noise_std = 1
outlier_prob = 0.1
outlier_range = [5, 15]

# 生成合成数据集
x_plus_two, x_minus_two, y_lower, y_upper = generate_synthetic_dataset(num_samples, noise_std, outlier_prob, outlier_range)

x_plus_two,x_minus_two,y_lower, y_upper

(array([ 0.69,  1.2 ,  1.72,  2.24,  2.75,  3.27,  3.78,  4.3 ,  4.82,
         5.33,  5.85,  6.36,  6.88,  7.39,  7.91,  8.43,  8.94,  9.46,
         9.97, 10.49]),
 array([0.17, 0.69, 1.2 , 1.72, 2.24, 2.75, 3.27, 3.78, 4.3 , 4.81, 5.33,
        5.85, 6.36, 6.88, 7.39, 7.91, 8.43, 8.94, 9.46, 9.97]),
 array([ 1.55,  0.84,  3.44, 10.2 ,  5.38,  6.84,  8.81,  9.27,  9.24,
        12.24, 11.44,  8.55, 14.24, 15.62, 17.17, 16.92, 17.57, 19.21,
        19.68, 19.86]),
 array([ 2.52,  1.81,  4.41, 11.17,  6.35,  7.81,  9.78, 10.24, 10.21,
        13.21, 12.41,  9.52, 15.21, 16.59, 18.14, 17.89, 18.54, 20.18,
        20.65, 20.83]))

In [72]:
X = 0.5 * (x_plus_two + x_minus_two)
print("X:", X)
Y = 0.5 * (y_lower +  y_upper)
Y

X: [ 0.43   0.945  1.46   1.98   2.495  3.01   3.525  4.04   4.56   5.07
  5.59   6.105  6.62   7.135  7.65   8.17   8.685  9.2    9.715 10.23 ]


array([ 2.035,  1.325,  3.925, 10.685,  5.865,  7.325,  9.295,  9.755,
        9.725, 12.725, 11.925,  9.035, 14.725, 16.105, 17.655, 17.405,
       18.055, 19.695, 20.165, 20.345])

In [73]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, shuffle=False)
x = X_train
y = Y_train
X_test = X_test
X_test, Y_test 

(array([ 8.685,  9.2  ,  9.715, 10.23 ]),
 array([18.055, 19.695, 20.165, 20.345]))

In [74]:
#UESVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
param_grid = {
    'C': np.arange(0.01,0.1, 0.01),  # C 参数范围为 0.1 到 1.0，步长为 0.1
    'epsilon': np.arange(1, 2, 0.1),  # mu 参数范围为 0.1 到 1.0，步长为 0.1
}

kf = KFold(n_splits=4)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, epsilon):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    #d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            term1 += np.dot(x[i], x[j].T) * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            term2 = epsilon * np.sum(a_i + a_i_star)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3 
            
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)
start_time = time.time()
for C in param_grid['C']:
    for epsilon in param_grid['epsilon']:
        kf_mse = []
        for train_index, test_index in kf.split(x):
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C, epsilon), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w = np.sum(((a_i - a_i_star) * x_train))
            m = sum(y_train - w * x_train)
            b = m / l
            t = w * x_test + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C, epsilon, avg_mse))

for params in mse_list:
    C, epsilon, mse = params
    print('Parameters: C={}, epsilon={}'.format(C,epsilon))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_epsilon, best_mse = best_params
print('Best parameters: C={}, epsilon={}'.format(best_C, best_epsilon))
print('Best MSE: {}'.format(best_mse))
#预测
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)
# 找到最佳参数组合及其对应的均方误差
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_epsilon, best_mse = best_params

print('Best Parameters: C={}, epsilon={}'.format(best_C, best_epsilon))
print('Best MSE: {}'.format(best_mse))
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)
end_time = time.time()
total_time = end_time - start_time

print("Total time:", total_time, "seconds")
# 计算平均每次的时间
average_time = total_time / (len(param_grid['C']) * len(param_grid['epsilon']))
print("Average Time per Run:", average_time, "seconds")

Parameters: C=0.01, epsilon=1.0
MSE: 33.67312504321531
---
Parameters: C=0.01, epsilon=1.1
MSE: 33.89983443503394
---
Parameters: C=0.01, epsilon=1.2000000000000002
MSE: 34.16559526439277
---
Parameters: C=0.01, epsilon=1.3000000000000003
MSE: 34.1655952642015
---
Parameters: C=0.01, epsilon=1.4000000000000004
MSE: 34.16559526414895
---
Parameters: C=0.01, epsilon=1.5000000000000004
MSE: 34.2506298750145
---
Parameters: C=0.01, epsilon=1.6000000000000005
MSE: 34.25062987503162
---
Parameters: C=0.01, epsilon=1.7000000000000006
MSE: 34.24596835362809
---
Parameters: C=0.01, epsilon=1.8000000000000007
MSE: 34.24596835357471
---
Parameters: C=0.01, epsilon=1.9000000000000008
MSE: 34.245968353411335
---
Parameters: C=0.02, epsilon=1.0
MSE: 28.79326543356821
---
Parameters: C=0.02, epsilon=1.1
MSE: 29.204356070382687
---
Parameters: C=0.02, epsilon=1.2000000000000002
MSE: 29.703252894896323
---
Parameters: C=0.02, epsilon=1.3000000000000003
MSE: 29.703252894805033
---
Parameters: C=0.02, ep

In [82]:
#UESVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
param_grid = {
    'C': np.arange(1,1.5, 0.1),  # C 参数范围为 0.1 到 1.0，步长为 0.1
    'epsilon': np.arange(0.5, 1, 0.1),  # mu 参数范围为 0.1 到 1.0，步长为 0.1
}

kf = KFold(n_splits=4)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, epsilon):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    #d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            term1 += np.dot(x[i], x[j].T) * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            term2 = epsilon * np.sum(a_i + a_i_star)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3 
            
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)
start_time = time.time()
for C in param_grid['C']:
    for epsilon in param_grid['epsilon']:
        kf_mse = []
        for train_index, test_index in kf.split(x):
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C, epsilon), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w = np.sum(((a_i - a_i_star) * x_train))
            m = sum(y_train - w * x_train)
            b = m / l
            t = w * x_test + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C, epsilon, avg_mse))

for params in mse_list:
    C, epsilon, mse = params
    print('Parameters: C={}, epsilon={}'.format(C,epsilon))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_epsilon, best_mse = best_params
print('Best parameters: C={}, epsilon={}'.format(best_C, best_epsilon))
print('Best MSE: {}'.format(best_mse))
#预测
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)
# 找到最佳参数组合及其对应的均方误差
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_epsilon, best_mse = best_params

print('Best Parameters: C={}, epsilon={}'.format(best_C, best_epsilon))
print('Best MSE: {}'.format(best_mse))
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)
end_time = time.time()
total_time = end_time - start_time

print("Total time:", total_time, "seconds")
# 计算平均每次的时间
average_time = total_time / (len(param_grid['C']) * len(param_grid['epsilon']))
print("Average Time per Run:", average_time, "seconds")

Parameters: C=1.0, epsilon=0.5
MSE: 1942.058062402422
---
Parameters: C=1.0, epsilon=0.6
MSE: 1929.9446735755894
---
Parameters: C=1.0, epsilon=0.7
MSE: 1833.7227732645374
---
Parameters: C=1.0, epsilon=0.7999999999999999
MSE: 1821.7810440348374
---
Parameters: C=1.0, epsilon=0.8999999999999999
MSE: 1809.925397751782
---
Parameters: C=1.1, epsilon=0.5
MSE: 2369.355540498619
---
Parameters: C=1.1, epsilon=0.6
MSE: 2356.084759157684
---
Parameters: C=1.1, epsilon=0.7
MSE: 2240.1548779044847
---
Parameters: C=1.1, epsilon=0.7999999999999999
MSE: 2227.055894412948
---
Parameters: C=1.1, epsilon=0.8999999999999999
MSE: 2214.0428005279086
---
Parameters: C=1.2000000000000002, epsilon=0.5
MSE: 2839.653695355618
---
Parameters: C=1.2000000000000002, epsilon=0.6
MSE: 2825.0753368270434
---
Parameters: C=1.2000000000000002, epsilon=0.7
MSE: 2687.6034290675143
---
Parameters: C=1.2000000000000002, epsilon=0.7999999999999999
MSE: 2673.347097004231
---
Parameters: C=1.2000000000000002, epsilon=0.89

In [78]:
#HSVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
param_grid = {
    'C': np.arange(0.01, 0.1, 0.01),  # C 参数范围为 0.1 到 1.0，步长为 0.1
    'mu': np.arange(1.9, 2.1, 0.02),  # mu 参数范围为 0.1 到 1.0，步长为 0.1
}

kf = KFold(n_splits=4)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, mu):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    #d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            term1 = term1 + x[i] * x[j] * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            #term1 = term1 + d[i, j] * a[i] * a[j]
            term2 = mu / (2 * C) * np.sum((a_i + a_i_star) ** 2)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3
        
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)
start_time = time.time()
for C in param_grid['C']:
    for mu in param_grid['mu']:
        kf_mse = []
        for train_index, test_index in kf.split(x):
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C, mu), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w = np.sum(((a_i - a_i_star) * x_train))
            m = sum(y_train - w * x_train)
            b = m / l
            t = w * x_test + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C, mu, avg_mse))

for params in mse_list:
    C, mu, mse = params
    print('Parameters: C={}, mu={}'.format(C, mu))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_mu, best_mse = best_params
print('Best parameters: C={}, mu={}'.format(best_C, best_mu))
print('Best MSE: {}'.format(best_mse))
#预测
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)
end_time = time.time()
total_time = end_time - start_time

print("Total time:", total_time, "seconds")
# 计算平均每次的时间
average_time = total_time / (len(param_grid['C']) * len(param_grid['mu']))
print("Average Time per Run:", average_time, "seconds")

Parameters: C=0.01, mu=1.9
MSE: 33.82759268356553
---
Parameters: C=0.01, mu=1.92
MSE: 33.8319842505327
---
Parameters: C=0.01, mu=1.94
MSE: 33.84029216032785
---
Parameters: C=0.01, mu=1.96
MSE: 33.8430095409469
---
Parameters: C=0.01, mu=1.98
MSE: 33.847916642989794
---
Parameters: C=0.01, mu=2.0
MSE: 33.85294841208099
---
Parameters: C=0.01, mu=2.02
MSE: 33.857097018025314
---
Parameters: C=0.01, mu=2.04
MSE: 33.862556439711724
---
Parameters: C=0.01, mu=2.06
MSE: 33.86712708179477
---
Parameters: C=0.01, mu=2.08
MSE: 33.868517227441316
---
Parameters: C=0.01, mu=2.1
MSE: 33.86812670782584
---
Parameters: C=0.02, mu=1.9
MSE: 29.07747787050191
---
Parameters: C=0.02, mu=1.92
MSE: 29.087059090328108
---
Parameters: C=0.02, mu=1.94
MSE: 29.097606328459218
---
Parameters: C=0.02, mu=1.96
MSE: 29.107828384252507
---
Parameters: C=0.02, mu=1.98
MSE: 29.116798333102736
---
Parameters: C=0.02, mu=2.0
MSE: 29.12587137457058
---
Parameters: C=0.02, mu=2.02
MSE: 29.13528813137187
---
Parameter

In [18]:
#HSVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
param_grid = {
    'C': np.arange(0.09, 0.15, 0.01),  # C 参数范围为 0.1 到 1.0，步长为 0.1
    'mu': np.arange(1, 2, 0.02),  # mu 参数范围为 0.1 到 1.0，步长为 0.1
}

kf = KFold(n_splits=4)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, mu):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    #d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            term1 = term1 + x[i] * x[j] * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            #term1 = term1 + d[i, j] * a[i] * a[j]
            term2 = mu / (2 * C) * np.sum((a_i + a_i_star) ** 2)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3
        
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)
for C in param_grid['C']:
    for mu in param_grid['mu']:
        kf_mse = []
        for train_index, test_index in kf.split(x):
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C, mu), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w = np.sum(((a_i - a_i_star) * x_train))
            m = sum(y_train - w * x_train)
            b = m / l
            t = w * x_test + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C, mu, avg_mse))

for params in mse_list:
    C, mu, mse = params
    print('Parameters: C={}, mu={}'.format(C, mu))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_mu, best_mse = best_params
print('Best parameters: C={}, mu={}'.format(best_C, best_mu))
print('Best MSE: {}'.format(best_mse))
#预测
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)


Parameters: C=0.09, mu=1.0
MSE: 8.484546674200974
---
Parameters: C=0.09, mu=1.02
MSE: 8.47427046143161
---
Parameters: C=0.09, mu=1.04
MSE: 8.464470429943287
---
Parameters: C=0.09, mu=1.06
MSE: 8.455101424945072
---
Parameters: C=0.09, mu=1.08
MSE: 8.446409663663264
---
Parameters: C=0.09, mu=1.1
MSE: 8.443175503054448
---
Parameters: C=0.09, mu=1.12
MSE: 8.442133459795745
---
Parameters: C=0.09, mu=1.1400000000000001
MSE: 8.444452690037261
---
Parameters: C=0.09, mu=1.1600000000000001
MSE: 8.444306538837932
---
Parameters: C=0.09, mu=1.1800000000000002
MSE: 8.4472918044423
---
Parameters: C=0.09, mu=1.2000000000000002
MSE: 8.450827418266854
---
Parameters: C=0.09, mu=1.2200000000000002
MSE: 8.454139565185688
---
Parameters: C=0.09, mu=1.2400000000000002
MSE: 8.457421531250965
---
Parameters: C=0.09, mu=1.2600000000000002
MSE: 8.460656672896079
---
Parameters: C=0.09, mu=1.2800000000000002
MSE: 8.4632695577317
---
Parameters: C=0.09, mu=1.3000000000000003
MSE: 8.466535043392941
---
P

In [90]:
#HSVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
param_grid = {
    'C': np.arange(0.05, 0.15, 0.01),  # C 参数范围为 0.1 到 1.0，步长为 0.1
    'mu': np.arange(1.0, 1.8, 0.1),  # mu 参数范围为 0.1 到 1.0，步长为 0.1
}

kf = KFold(n_splits=4)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, mu):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    #d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            term1 = term1 + x[i] * x[j] * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            #term1 = term1 + d[i, j] * a[i] * a[j]
            term2 = mu / (2 * C) * np.sum((a_i + a_i_star) ** 2)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3
        
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)
for C in param_grid['C']:
    for mu in param_grid['mu']:
        kf_mse = []
        for train_index, test_index in kf.split(x):
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C, mu), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w = np.sum(((a_i - a_i_star) * x_train))
            m = sum(y_train - w * x_train)
            b = m / l
            t = w * x_test + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C, mu, avg_mse))

for params in mse_list:
    C, mu, mse = params
    print('Parameters: C={}, mu={}'.format(C, mu))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_mu, best_mse = best_params
print('Best parameters: C={}, mu={}'.format(best_C, best_mu))
print('Best MSE: {}'.format(best_mse))
#预测
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)


Parameters: C=0.05, mu=1.0
MSE: 17.069347345475776
---
Parameters: C=0.05, mu=1.1
MSE: 17.054959335876532
---
Parameters: C=0.05, mu=1.2000000000000002
MSE: 17.093631608088387
---
Parameters: C=0.05, mu=1.3000000000000003
MSE: 17.130255660354422
---
Parameters: C=0.05, mu=1.4000000000000004
MSE: 17.161839843949558
---
Parameters: C=0.05, mu=1.5000000000000004
MSE: 17.255699274402648
---
Parameters: C=0.05, mu=1.6000000000000005
MSE: 17.350902710849955
---
Parameters: C=0.05, mu=1.7000000000000006
MSE: 17.437395856388385
---
Parameters: C=0.060000000000000005, mu=1.0
MSE: 14.177966782949877
---
Parameters: C=0.060000000000000005, mu=1.1
MSE: 14.152747129613243
---
Parameters: C=0.060000000000000005, mu=1.2000000000000002
MSE: 14.188428667037913
---
Parameters: C=0.060000000000000005, mu=1.3000000000000003
MSE: 14.225430548566804
---
Parameters: C=0.060000000000000005, mu=1.4000000000000004
MSE: 14.256408268762282
---
Parameters: C=0.060000000000000005, mu=1.5000000000000004
MSE: 14.3553

In [92]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

param_grid = {
    'w': np.arange(1, 2.1, 0.2),
    'b': np.arange(0.9, 1.7, 0.2)
}

kf = KFold(n_splits=4)
mse_list = []

for w in param_grid['w']:
    for b in param_grid['b']:
        kf_mse = []
        for train_index, test_index in kf.split(x):
            x_train, x_test =x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            y_pred = w * x_train + b
            mse = mean_squared_error(y_train, y_pred)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((w, b, avg_mse))

for params in mse_list:
    w, b, mse = params
    print('Parameters: w={}, b={}'.format(w, b))
    print('MSE: {}'.format(mse))
    print('---')

best_params = min(mse_list, key=lambda x: x[-1])
best_w, best_b, best_mse = best_params
print('Best parameters: w={}, b={}'.format(best_w, best_b))
print('Best MSE: {}'.format(best_mse))
w = best_w
b = best_b
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)

Parameters: w=1.0, b=0.9
MSE: 30.9805921875
---
Parameters: w=1.0, b=1.1
MSE: 29.11246718750001
---
Parameters: w=1.0, b=1.3000000000000003
MSE: 27.324342187499997
---
Parameters: w=1.0, b=1.5000000000000002
MSE: 25.616217187500006
---
Parameters: w=1.2, b=0.9
MSE: 21.693314500000003
---
Parameters: w=1.2, b=1.1
MSE: 20.169114500000006
---
Parameters: w=1.2, b=1.3000000000000003
MSE: 18.724914500000004
---
Parameters: w=1.2, b=1.5000000000000002
MSE: 17.3607145
---
Parameters: w=1.4, b=0.9
MSE: 14.336956437500005
---
Parameters: w=1.4, b=1.1
MSE: 13.156681437500007
---
Parameters: w=1.4, b=1.3000000000000003
MSE: 12.056406437500005
---
Parameters: w=1.4, b=1.5000000000000002
MSE: 11.036131437500003
---
Parameters: w=1.5999999999999999, b=0.9
MSE: 8.911518000000003
---
Parameters: w=1.5999999999999999, b=1.1
MSE: 8.075168000000003
---
Parameters: w=1.5999999999999999, b=1.3000000000000003
MSE: 7.318818000000002
---
Parameters: w=1.5999999999999999, b=1.5000000000000002
MSE: 6.6424680000

In [97]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

param_grid = {
    'w': np.arange(2.1, 3.1, 0.1),
    'b': np.arange(1.1, 1.4, 0.1)
}

kf = KFold(n_splits=4)
mse_list = []

for w in param_grid['w']:
    for b in param_grid['b']:
        kf_mse = []
        for train_index, test_index in kf.split(x):
            x_train, x_test =x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            y_pred = w * x_train + b
            mse = mean_squared_error(y_train, y_pred)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((w, b, avg_mse))

for params in mse_list:
    w, b, mse = params
    print('Parameters: w={}, b={}'.format(w, b))
    print('MSE: {}'.format(mse))
    print('---')

best_params = min(mse_list, key=lambda x: x[-1])
best_w, best_b, best_mse = best_params
print('Best parameters: w={}, b={}'.format(best_w, best_b))
print('Best MSE: {}'.format(best_mse))
w = best_w
b = best_b
t=w * X_test + b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w=', w, 'b=', b, 'msee=', loss1,'Rmse=', loss3)

Parameters: w=2.1, b=1.1
MSE: 3.8191577656249986
---
Parameters: w=2.1, b=1.2000000000000002
MSE: 3.860889015625
---
Parameters: w=2.1, b=1.3000000000000003
MSE: 3.9226202656250004
---
Parameters: w=2.2, b=1.1
MSE: 4.416145437500001
---
Parameters: w=2.2, b=1.2000000000000002
MSE: 4.543857937500003
---
Parameters: w=2.2, b=1.3000000000000003
MSE: 4.691570437500002
---
Parameters: w=2.3000000000000003, b=1.1
MSE: 5.495863015625004
---
Parameters: w=2.3000000000000003, b=1.2000000000000002
MSE: 5.709556765625002
---
Parameters: w=2.3000000000000003, b=1.3000000000000003
MSE: 5.943250515625004
---
Parameters: w=2.4000000000000004, b=1.1
MSE: 7.058310500000006
---
Parameters: w=2.4000000000000004, b=1.2000000000000002
MSE: 7.357985500000006
---
Parameters: w=2.4000000000000004, b=1.3000000000000003
MSE: 7.677660500000007
---
Parameters: w=2.5000000000000004, b=1.1
MSE: 9.103487890625011
---
Parameters: w=2.5000000000000004, b=1.2000000000000002
MSE: 9.48914414062501
---
Parameters: w=2.500

In [98]:
import re
import numpy as np
import pandas as pd
aqi = pd.read_excel('SH-air-quality.xlsx', encoding='UTF-8', header=0)
print(aqi.head(n=5))

          日期             SO2分指数              NO2分指数                  AQI  \
0 2023-11-19  \mathcal{L}(8,10)  \mathcal{L}(87,89)   \mathcal{L}(87,89)   
1 2023-11-18  \mathcal{L}(8,10)  \mathcal{L}(59,61)   \mathcal{L}(59,61)   
2 2023-11-17   \mathcal{L}(7,9)  \mathcal{L}(47,49)   \mathcal{L}(88,90)   
3 2023-11-16   \mathcal{L}(7,9)  \mathcal{L}(63,65)   \mathcal{L}(88,90)   
4 2023-11-15  \mathcal{L}(9,11)  \mathcal{L}(91,93)  \mathcal{L}(98,100)   

              PM2.5分指数  
0   \mathcal{L}(58,60)  
1   \mathcal{L}(34,36)  
2   \mathcal{L}(39,41)  
3   \mathcal{L}(88,90)  
4  \mathcal{L}(98,100)  


In [99]:
import re
import numpy as np
import pandas as pd
pattern = r"\(([\d.,]+)\)"
def extract_and_calculate(cell):
    match = re.search(pattern, cell)
    if match:
        extracted_data = match.group(1).split(',')
        extracted_data = [float(value) for value in extracted_data]
        return np.mean(extracted_data)
    else:
        return None
df = pd.DataFrame()

# 从原始 DataFrame 中提取数据并计算均值
df['日期'] = aqi['日期']
df['SO2'] = aqi['SO2分指数'].apply(extract_and_calculate)
df['NO2'] = aqi['NO2分指数'].apply(extract_and_calculate)
df['AQI指数'] = aqi['AQI'].apply(extract_and_calculate)
df['PM2.5'] = aqi['PM2.5分指数'].apply(extract_and_calculate)
print(df)

            日期   SO2   NO2  AQI指数  PM2.5
0   2023-11-19   9.0  88.0   88.0   59.0
1   2023-11-18   9.0  60.0   60.0   35.0
2   2023-11-17   8.0  48.0   89.0   40.0
3   2023-11-16   8.0  64.0   89.0   89.0
4   2023-11-15  10.0  92.0   99.0   99.0
..         ...   ...   ...    ...    ...
373 2022-11-23   6.0  42.0   42.0   22.0
374 2022-11-22   6.0  43.0   48.0   48.0
375 2022-11-21   6.0  59.0   59.0   58.0
376 2022-11-20   6.0  48.0   65.0   65.0
377 2022-11-19   6.0  58.0   58.0   39.0

[378 rows x 5 columns]


In [100]:
Y = df['AQI指数']
Y=Y.values
X = df[['PM2.5', 'NO2']]
X=X.values
X,Y

(array([[ 59.,  88.],
        [ 35.,  60.],
        [ 40.,  48.],
        [ 89.,  64.],
        [ 99.,  92.],
        [ 48.,  69.],
        [ 30.,  40.],
        [ 36.,  30.],
        [ 15.,  23.],
        [ 23.,  22.],
        [ 55.,  63.],
        [ 35.,  49.],
        [ 33.,  49.],
        [ 29.,  30.],
        [ 29.,  30.],
        [ 15.,  20.],
        [ 23.,  34.],
        [ 23.,  34.],
        [ 36.,  44.],
        [ 19.,  28.],
        [ 28.,  38.],
        [ 28.,  42.],
        [ 25.,  38.],
        [ 22.,  40.],
        [ 16.,  50.],
        [ 16.,  50.],
        [ 57.,  54.],
        [ 57.,  62.],
        [ 62.,  68.],
        [ 28.,  47.],
        [ 18.,  40.],
        [ 23.,  45.],
        [ 22.,  47.],
        [ 22.,  35.],
        [ 67.,  58.],
        [ 46.,  40.],
        [ 65.,  58.],
        [ 87.,  72.],
        [ 82.,  68.],
        [ 82.,  68.],
        [ 70.,  64.],
        [ 25.,  39.],
        [ 29.,  42.],
        [ 26.,  37.],
        [ 48.,  38.],
        [ 

In [101]:
from sklearn.model_selection import train_test_split, GridSearchCV
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, shuffle=True)
X_train.shape, Y_train.shape, X_test.shape, Y_test.shape 

((302, 2), (302,), (76, 2), (76,))

In [102]:
x = X_train
y = Y_train
X_test = X_test

In [103]:
#HSVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
import time
# 定义要搜索的参数范围和步长
param_grid = {
    'C': np.arange(0.00014, 0.00030, 0.00002),  # C 参数范围为 0.1 到 1.0，步长为 0.1
    'mu': np.arange(0.003, 0.007, 0.001),  # mu 参数范围为 0.1 到 1.0，步长为 0.1
}
# 假设你已经定义了 x 和 y 数据
kf = KFold(n_splits=5)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, mu):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            #term1 = term1 + x[i] * x[j] * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            term1 = term1 + d[i, j] * a[i] * a[j]
            term2 = mu / (2 * C) * np.sum((a_i + a_i_star) ** 2)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3
        
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)

for C in param_grid['C']:
    for mu in param_grid['mu']:  # Replace 'epsilon' with 'mu'
        kf_mse = []
        for train_index, test_index in kf.split(x):
            start_time = time.time()
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C, mu), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w1 = np.sum(((a_i - a_i_star) * x_train[:, 0]))
            w2 = np.sum(((a_i - a_i_star) * x_train[:, 1]))
            m = sum(y_train - w1 * x_train[:, 0] - w2 * x_train[:, 1])
            b = m / l
            t = w1 * x_test[:, 0] + w2 * x_test[:, 1] + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C, mu, avg_mse))


for params in mse_list:
    C, mu, mse = params
    print('Parameters: C={}, mu={}'.format(C, mu))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_mu, best_mse = best_params
print('Best parameters: C={}, mu={}'.format(best_C, best_mu))
print('Best MSE: {}'.format(best_mse))
#预测
t=w1*X_test[:, 0]+w2*X_test[:, 1]+b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w1=', w1, 'w2=', w2, 'b=', b, 'msee=', loss1,'Rmse=', loss3)

Parameters: C=0.00014, mu=0.003
MSE: 563.709970214932
---
Parameters: C=0.00014, mu=0.004
MSE: 563.7100016150102
---
Parameters: C=0.00014, mu=0.005
MSE: 563.7104920287004
---
Parameters: C=0.00014, mu=0.006
MSE: 563.710728109487
---
Parameters: C=0.00015999999999999999, mu=0.003
MSE: 549.0025194811589
---
Parameters: C=0.00015999999999999999, mu=0.004
MSE: 549.0026369285538
---
Parameters: C=0.00015999999999999999, mu=0.005
MSE: 548.999448188363
---
Parameters: C=0.00015999999999999999, mu=0.006
MSE: 549.0001998066133
---
Parameters: C=0.00017999999999999998, mu=0.003
MSE: 539.104504120655
---
Parameters: C=0.00017999999999999998, mu=0.004
MSE: 539.1077085054314
---
Parameters: C=0.00017999999999999998, mu=0.005
MSE: 539.1058721196838
---
Parameters: C=0.00017999999999999998, mu=0.006
MSE: 539.1043446152333
---
Parameters: C=0.00019999999999999998, mu=0.003
MSE: 534.0252657147487
---
Parameters: C=0.00019999999999999998, mu=0.004
MSE: 534.0248590006775
---
Parameters: C=0.000199999999

In [107]:
#### HSVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
import time
# 定义要搜索的参数范围和步长
param_grid = {
    'C': np.arange(0.00022, 0.00024, 0.00002),  # C 参数范围为 0.1 到 1.0，步长为 0.1
    'mu': np.arange(0.003, 0.004, 0.001),  # mu 参数范围为 0.1 到 1.0，步长为 0.1
}
# 假设你已经定义了 x 和 y 数据
kf = KFold(n_splits=5)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, mu):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            #term1 = term1 + x[i] * x[j] * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            term1 = term1 + d[i, j] * a[i] * a[j]
            term2 = mu / (2 * C) * np.sum((a_i + a_i_star) ** 2)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3
        
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)

for C in param_grid['C']:
    for mu in param_grid['mu']:  # Replace 'epsilon' with 'mu'
        kf_mse = []
        for train_index, test_index in kf.split(x):
            start_time = time.time()
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C, mu), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w1 = np.sum(((a_i - a_i_star) * x_train[:, 0]))
            w2 = np.sum(((a_i - a_i_star) * x_train[:, 1]))
            m = sum(y_train - w1 * x_train[:, 0] - w2 * x_train[:, 1])
            b = m / l
            t = w1 * x_test[:, 0] + w2 * x_test[:, 1] + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C, mu, avg_mse))


for params in mse_list:
    C, mu, mse = params
    print('Parameters: C={}, mu={}'.format(C, mu))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_mu, best_mse = best_params
print('Best parameters: C={}, mu={}'.format(best_C, best_mu))
print('Best MSE: {}'.format(best_mse))
#预测
t=w1*X_test[:, 0]+w2*X_test[:, 1]+b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w1=', w1, 'w2=', w2, 'b=', b, 'msee=', loss1,'Rmse=', loss3)

Parameters: C=0.00022, mu=0.003
MSE: 533.7609274358989
---
Best parameters: C=0.00022, mu=0.003
Best MSE: 533.7609274358989
w1= 0.6388800101367097 w2= 0.3152050014077179 b= 28.833526182546628 msee= 409.6948889740125 Rmse= 20.240921149345265


In [104]:
#ESVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
import time
# 定义要搜索的参数范围和步长
param_grid = {
    'C': np.arange(0.0001, 0.0002, 0.00002),
    'epsilon': np.arange(0.5, 0.58, 0.02)
}
# 假设你已经定义了 x 和 y 数据
kf = KFold(n_splits=5)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, epsilon):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            #term1 = term1 + x[i] * x[j] * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            term1 = term1 + d[i, j] * a[i] * a[j]
            term2 = epsilon / (2 * C) * np.sum((a_i + a_i_star) ** 2)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3
        
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)

for C in param_grid['C']:
    for epsilon in param_grid['epsilon']:  
        kf_mse = []
        for train_index, test_index in kf.split(x):
            start_time = time.time()
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C,epsilon), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w1 = np.sum(((a_i - a_i_star) * x_train[:, 0]))
            w2 = np.sum(((a_i - a_i_star) * x_train[:, 1]))
            m = sum(y_train - w1 * x_train[:, 0] - w2 * x_train[:, 1])
            b = m / l
            t = w1 * x_test[:, 0] + w2 * x_test[:, 1] + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C,epsilon, avg_mse))


for params in mse_list:
    C, epsilon, mse = params
    print('Parameters: C={}, epsilon={}'.format(C, epsilon))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_epsilon, best_mse = best_params
print('Best parameters: C={}, epsilon={}'.format(best_C, best_epsilon))
print('Best MSE: {}'.format(best_mse))
#预测
t=w1*X_test[:, 0]+w2*X_test[:, 1]+b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w1=', w1, 'w2=', w2, 'b=', b, 'msee=', loss1,'Rmse=', loss3)

Parameters: C=0.0001, epsilon=0.5
MSE: 607.5901420648281
---
Parameters: C=0.0001, epsilon=0.52
MSE: 607.586122244416
---
Parameters: C=0.0001, epsilon=0.54
MSE: 607.5809178775511
---
Parameters: C=0.0001, epsilon=0.56
MSE: 607.5844545903485
---
Parameters: C=0.00012, epsilon=0.5
MSE: 583.237869673588
---
Parameters: C=0.00012, epsilon=0.52
MSE: 583.2387980608999
---
Parameters: C=0.00012, epsilon=0.54
MSE: 583.2381981559085
---
Parameters: C=0.00012, epsilon=0.56
MSE: 583.2384525503094
---
Parameters: C=0.00014, epsilon=0.5
MSE: 563.7106762688768
---
Parameters: C=0.00014, epsilon=0.52
MSE: 563.7142932366417
---
Parameters: C=0.00014, epsilon=0.54
MSE: 563.7111418820739
---
Parameters: C=0.00014, epsilon=0.56
MSE: 563.710584600523
---
Parameters: C=0.00015999999999999999, epsilon=0.5
MSE: 548.9996253209476
---
Parameters: C=0.00015999999999999999, epsilon=0.52
MSE: 549.001250166423
---
Parameters: C=0.00015999999999999999, epsilon=0.54
MSE: 548.9999215661242
---
Parameters: C=0.000159

In [105]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

param_grid = {
    'w1': np.arange(1.1, 3.1, 0.2),
    'w2': np.arange(0.2, 1.8, 0.2),
    'b': np.arange(2, 20, 2)
}

kf = KFold(n_splits=5)
mse_list = []

for w1 in param_grid['w1']:
    for w2 in param_grid['w2']:
        for b in param_grid['b']:
            kf_mse = []
            for train_index, test_index in kf.split(X):
                x_train, x_test = X[train_index], X[test_index]
                y_train, y_test = Y[train_index], Y[test_index]
                y_pred = w1 * x_train[:, 0] + w2 * x_train[:, 1] + b
                mse = mean_squared_error(y_train, y_pred)
                kf_mse.append(mse)
            avg_mse = np.mean(kf_mse)
            mse_list.append((w1, w2, b, avg_mse))

for params in mse_list:
    w1, w2, b, mse = params
    print('Parameters: w1={}, w2={}, b={}'.format(w1, w2, b))
    print('MSE: {}'.format(mse))
    print('---')

best_params = min(mse_list, key=lambda x: x[-1])
best_w1, best_w2, best_b, best_mse = best_params
print('Best parameters: w1={}, w2={}, b={}'.format(best_w1, best_w2, best_b))
print('Best MSE: {}'.format(best_mse))

# 训练最终模型
w1 = best_w1
w2 = best_w2
b = best_b
final_model = lambda x: w1 * x[:, 0] + w2 * x[:, 1] + b

# 进行预测
y_pred = final_model(X_test)
mse = mean_squared_error(Y_test, y_pred)
rmse = np.sqrt(mse)
print('MSE:', mse)
print('RMSE:', rmse)

Parameters: w1=1.1, w2=0.2, b=2
MSE: 730.6264291303302
---
Parameters: w1=1.1, w2=0.2, b=4
MSE: 681.2077575678097
---
Parameters: w1=1.1, w2=0.2, b=6
MSE: 639.7890860052892
---
Parameters: w1=1.1, w2=0.2, b=8
MSE: 606.3704144427687
---
Parameters: w1=1.1, w2=0.2, b=10
MSE: 580.9517428802483
---
Parameters: w1=1.1, w2=0.2, b=12
MSE: 563.5330713177278
---
Parameters: w1=1.1, w2=0.2, b=14
MSE: 554.1143997552074
---
Parameters: w1=1.1, w2=0.2, b=16
MSE: 552.6957281926868
---
Parameters: w1=1.1, w2=0.2, b=18
MSE: 559.2770566301664
---
Parameters: w1=1.1, w2=0.4, b=2
MSE: 660.2512011015671
---
Parameters: w1=1.1, w2=0.4, b=4
MSE: 641.5330877538084
---
Parameters: w1=1.1, w2=0.4, b=6
MSE: 630.8149744060498
---
Parameters: w1=1.1, w2=0.4, b=8
MSE: 628.0968610582913
---
Parameters: w1=1.1, w2=0.4, b=10
MSE: 633.3787477105326
---
Parameters: w1=1.1, w2=0.4, b=12
MSE: 646.660634362774
---
Parameters: w1=1.1, w2=0.4, b=14
MSE: 667.9425210150154
---
Parameters: w1=1.1, w2=0.4, b=16
MSE: 697.2244076

Parameters: w1=2.6999999999999997, w2=0.8, b=10
MSE: 9145.705101654534
---
Parameters: w1=2.6999999999999997, w2=0.8, b=12
MSE: 9464.280126680216
---
Parameters: w1=2.6999999999999997, w2=0.8, b=14
MSE: 9790.855151705899
---
Parameters: w1=2.6999999999999997, w2=0.8, b=16
MSE: 10125.43017673158
---
Parameters: w1=2.6999999999999997, w2=0.8, b=18
MSE: 10468.00520175726
---
Parameters: w1=2.6999999999999997, w2=1.0, b=2
MSE: 9383.99516053592
---
Parameters: w1=2.6999999999999997, w2=1.0, b=4
MSE: 9701.270743776362
---
Parameters: w1=2.6999999999999997, w2=1.0, b=6
MSE: 10026.546327016806
---
Parameters: w1=2.6999999999999997, w2=1.0, b=8
MSE: 10359.821910257248
---
Parameters: w1=2.6999999999999997, w2=1.0, b=10
MSE: 10701.097493497693
---
Parameters: w1=2.6999999999999997, w2=1.0, b=12
MSE: 11050.373076738137
---
Parameters: w1=2.6999999999999997, w2=1.0, b=14
MSE: 11407.648659978579
---
Parameters: w1=2.6999999999999997, w2=1.0, b=16
MSE: 11772.924243219024
---
Parameters: w1=2.6999999

In [108]:
import re
import numpy as np
import pandas as pd
de = pd.read_excel('my-diabetes-datasets.xlsx', encoding='UTF-8', header=0)
print(de.head(n=5))

                                                 BMI  \
0  \mathcal{L}(-0.0383037934813115,0.161696206518...   
1  \mathcal{L}(-0.151474061238806,0.0485259387611...   
2  \mathcal{L}(-0.0555487866634059,0.144451213336...   
3  \mathcal{L}(-0.111595014505213,0.0884049854947...   
4  \mathcal{L}(-0.136384692204473,0.0636153077955...   

                                                  BP                    TA  
0  \mathcal{L}(-0.0781276450050442,0.121872354994...  \mathcal{L}(150,152)  
1  \mathcal{L}(-0.126327834717352,0.0736721652826...    \mathcal{L}(74,76)  
2  \mathcal{L}(-0.105670610554934,0.0943293894450...  \mathcal{L}(140,142)  
3  \mathcal{L}(-0.136656446798561,0.0633435532014...  \mathcal{L}(205,207)  
4  \mathcal{L}(-0.0781276450050442,0.121872354994...  \mathcal{L}(134,136)  


In [110]:
import re
import numpy as np
import pandas as pd

pattern = r"\\mathcal{L}\(([-\d.,]+),([-.\d,]+)\)"

def extract_and_calculate(cell):
    match = re.search(pattern, cell)
    if match:
        extracted_data = match.group(1).split(',')
        extracted_data = [float(value) for value in extracted_data]

        extracted_data += match.group(2).split(',')
        extracted_data = [float(value) for value in extracted_data]

        return np.mean(extracted_data)
    else:
        return None

df = pd.DataFrame()
df['BMI'] = de['BMI'].apply(extract_and_calculate)
df['BP'] = de['BP'].apply(extract_and_calculate)
df['TA'] = de['TA'].apply(extract_and_calculate)

In [115]:
Y = df['TA']
Y=Y.values
X = df[['BMI', 'BP']]
X=X.values
X,Y

(array([[ 6.16962065e-02,  2.18723550e-02],
        [-5.14740612e-02, -2.63278347e-02],
        [ 4.44512133e-02, -5.67061055e-03],
        [-1.15950145e-02, -3.66564468e-02],
        [-3.63846922e-02,  2.18723550e-02],
        [-4.06959405e-02, -1.94420933e-02],
        [-4.71628129e-02, -1.59992226e-02],
        [-1.89470584e-03,  6.66296740e-02],
        [ 6.16962065e-02, -4.00993175e-02],
        [ 3.90621530e-02, -3.32135761e-02],
        [-8.38084235e-02,  8.10087222e-03],
        [ 1.75059115e-02, -3.32135761e-02],
        [-2.88400077e-02, -9.11348125e-03],
        [-1.89470584e-03,  8.10087222e-03],
        [-2.56065715e-02, -1.25563519e-02],
        [-1.80618869e-02,  8.04011568e-02],
        [ 4.22955892e-02,  4.94153205e-02],
        [ 1.21168511e-02,  5.63010619e-02],
        [-1.05172024e-02, -3.66564468e-02],
        [-1.80618869e-02, -4.00993175e-02],
        [-5.68631216e-02, -4.35421882e-02],
        [-2.23731352e-02,  1.21513083e-03],
        [-4.05032999e-03, -9.113

In [116]:
from sklearn.model_selection import train_test_split, GridSearchCV
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, shuffle=True)
X_train.shape, Y_train.shape, X_test.shape, Y_test.shape 

((331, 2), (331,), (111, 2), (111,))

In [117]:
x = X_train
y = Y_train
X_test = X_test

In [123]:
#HSVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
import time
# 定义要搜索的参数范围和步长
param_grid = {
    'C': np.arange(19, 30, 1),  # C 参数范围为 0.1 到 1.0，步长为 0.1
    'mu': np.arange(0.01, 0.08, 0.01),  # mu 参数范围为 0.1 到 1.0，步长为 0.1
}
# 假设你已经定义了 x 和 y 数据
kf = KFold(n_splits=5)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, mu):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            #term1 = term1 + x[i] * x[j] * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            term1 = term1 + d[i, j] * a[i] * a[j]
            term2 = mu / (2 * C) * np.sum((a_i + a_i_star) ** 2)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3
        
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)

for C in param_grid['C']:
    for mu in param_grid['mu']:  # Replace 'epsilon' with 'mu'
        kf_mse = []
        for train_index, test_index in kf.split(x):
            start_time = time.time()
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C, mu), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w1 = np.sum(((a_i - a_i_star) * x_train[:, 0]))
            w2 = np.sum(((a_i - a_i_star) * x_train[:, 1]))
            m = sum(y_train - w1 * x_train[:, 0] - w2 * x_train[:, 1])
            b = m / l
            t = w1 * x_test[:, 0] + w2 * x_test[:, 1] + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C, mu, avg_mse))


for params in mse_list:
    C, mu, mse = params
    print('Parameters: C={}, mu={}'.format(C, mu))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_mu, best_mse = best_params
print('Best parameters: C={}, mu={}'.format(best_C, best_mu))
print('Best MSE: {}'.format(best_mse))
#预测
t=w1*X_test[:, 0]+w2*X_test[:, 1]+b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w1=', w1, 'w2=', w2, 'b=', b, 'msee=', loss1,'Rmse=', loss3)

Parameters: C=19, mu=0.01
MSE: 5127.2073545842
---
Parameters: C=19, mu=0.02
MSE: 5127.207355307006
---
Parameters: C=19, mu=0.03
MSE: 5127.207360290716
---
Parameters: C=19, mu=0.04
MSE: 5127.207354077118
---
Parameters: C=19, mu=0.05
MSE: 5127.207353958665
---
Parameters: C=19, mu=0.060000000000000005
MSE: 5127.207353836731
---
Parameters: C=19, mu=0.06999999999999999
MSE: 5127.207354420765
---
Parameters: C=19, mu=0.08
MSE: 5127.207353897373
---
Parameters: C=20, mu=0.01
MSE: 5098.1563867780815
---
Parameters: C=20, mu=0.02
MSE: 5098.156386223971
---
Parameters: C=20, mu=0.03
MSE: 5098.156386828221
---
Parameters: C=20, mu=0.04
MSE: 5098.156384999228
---
Parameters: C=20, mu=0.05
MSE: 5098.156384571243
---
Parameters: C=20, mu=0.060000000000000005
MSE: 5098.156386678325
---
Parameters: C=20, mu=0.06999999999999999
MSE: 5098.156384993009
---
Parameters: C=20, mu=0.08
MSE: 5098.156385128636
---
Parameters: C=21, mu=0.01
MSE: 5069.3894422758385
---
Parameters: C=21, mu=0.02
MSE: 5069.3

In [124]:
#HSVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
import time
# 定义要搜索的参数范围和步长
param_grid = {
    'C': np.arange(29, 35, 1),  # C 参数范围为 0.1 到 1.0，步长为 0.1
    'mu': np.arange(0.05, 0.07, 0.01),  # mu 参数范围为 0.1 到 1.0，步长为 0.1
}
# 假设你已经定义了 x 和 y 数据
kf = KFold(n_splits=5)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, mu):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            #term1 = term1 + x[i] * x[j] * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            term1 = term1 + d[i, j] * a[i] * a[j]
            term2 = mu / (2 * C) * np.sum((a_i + a_i_star) ** 2)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3
        
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)

for C in param_grid['C']:
    for mu in param_grid['mu']:  # Replace 'epsilon' with 'mu'
        kf_mse = []
        for train_index, test_index in kf.split(x):
            start_time = time.time()
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C, mu), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w1 = np.sum(((a_i - a_i_star) * x_train[:, 0]))
            w2 = np.sum(((a_i - a_i_star) * x_train[:, 1]))
            m = sum(y_train - w1 * x_train[:, 0] - w2 * x_train[:, 1])
            b = m / l
            t = w1 * x_test[:, 0] + w2 * x_test[:, 1] + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C, mu, avg_mse))


for params in mse_list:
    C, mu, mse = params
    print('Parameters: C={}, mu={}'.format(C, mu))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_mu, best_mse = best_params
print('Best parameters: C={}, mu={}'.format(best_C, best_mu))
print('Best MSE: {}'.format(best_mse))
#预测
t=w1*X_test[:, 0]+w2*X_test[:, 1]+b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w1=', w1, 'w2=', w2, 'b=', b, 'msee=', loss1,'Rmse=', loss3)

Parameters: C=29, mu=0.05
MSE: 4849.478843642033
---
Parameters: C=29, mu=0.060000000000000005
MSE: 4849.478843895095
---
Parameters: C=29, mu=0.07
MSE: 4849.4788452790635
---
Parameters: C=30, mu=0.05
MSE: 4823.2681399300645
---
Parameters: C=30, mu=0.060000000000000005
MSE: 4823.268136577711
---
Parameters: C=30, mu=0.07
MSE: 4823.26813704963
---
Parameters: C=31, mu=0.05
MSE: 4797.341455870086
---
Parameters: C=31, mu=0.060000000000000005
MSE: 4797.34145619207
---
Parameters: C=31, mu=0.07
MSE: 4797.341455701137
---
Parameters: C=32, mu=0.05
MSE: 4771.698791686425
---
Parameters: C=32, mu=0.060000000000000005
MSE: 4771.698799140612
---
Parameters: C=32, mu=0.07
MSE: 4771.698801868099
---
Parameters: C=33, mu=0.05
MSE: 4746.340174980005
---
Parameters: C=33, mu=0.060000000000000005
MSE: 4746.34017238761
---
Parameters: C=33, mu=0.07
MSE: 4746.340174474429
---
Parameters: C=34, mu=0.05
MSE: 4721.2655705255065
---
Parameters: C=34, mu=0.060000000000000005
MSE: 4721.265570572686
---
Par

In [120]:
#ESVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
import time
# 定义要搜索的参数范围和步长
param_grid = {
    'C': np.arange(18, 20, 1),
    'epsilon': np.arange(0.1, 0.5, 0.1)
}
# 假设你已经定义了 x 和 y 数据
kf = KFold(n_splits=5)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, epsilon):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            #term1 = term1 + x[i] * x[j] * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            term1 = term1 + d[i, j] * a[i] * a[j]
            term2 = epsilon / (2 * C) * np.sum((a_i + a_i_star) ** 2)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3
        
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)

for C in param_grid['C']:
    for epsilon in param_grid['epsilon']:  
        kf_mse = []
        for train_index, test_index in kf.split(x):
            start_time = time.time()
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C,epsilon), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w1 = np.sum(((a_i - a_i_star) * x_train[:, 0]))
            w2 = np.sum(((a_i - a_i_star) * x_train[:, 1]))
            m = sum(y_train - w1 * x_train[:, 0] - w2 * x_train[:, 1])
            b = m / l
            t = w1 * x_test[:, 0] + w2 * x_test[:, 1] + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C,epsilon, avg_mse))


for params in mse_list:
    C, epsilon, mse = params
    print('Parameters: C={}, epsilon={}'.format(C, epsilon))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_epsilon, best_mse = best_params
print('Best parameters: C={}, epsilon={}'.format(best_C, best_epsilon))
print('Best MSE: {}'.format(best_mse))
#预测
t=w1*X_test[:, 0]+w2*X_test[:, 1]+b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w1=', w1, 'w2=', w2, 'b=', b, 'msee=', loss1,'Rmse=', loss3)

Parameters: C=18, epsilon=0.1
MSE: 5156.5423468592335
---
Parameters: C=18, epsilon=0.2
MSE: 5156.542348553131
---
Parameters: C=18, epsilon=0.30000000000000004
MSE: 5156.542351509226
---
Parameters: C=18, epsilon=0.4
MSE: 5156.54234869499
---
Parameters: C=19, epsilon=0.1
MSE: 5127.207353842046
---
Parameters: C=19, epsilon=0.2
MSE: 5127.207354300854
---
Parameters: C=19, epsilon=0.30000000000000004
MSE: 5127.207353999898
---
Parameters: C=19, epsilon=0.4
MSE: 5127.207354532779
---
Best parameters: C=19, epsilon=0.1
Best MSE: 5127.207353842046
w1= 99.4432527089267 w2= 83.16368964844861 b= 152.54234745678275 msee= 5844.429363126842 Rmse= 76.44886763796336


In [125]:
#ESVR
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from scipy.optimize import minimize
import time
# 定义要搜索的参数范围和步长
param_grid = {
    'C': np.arange(19, 25, 1),
    'epsilon': np.arange(0.05, 0.1, 0.01)
}
# 假设你已经定义了 x 和 y 数据
kf = KFold(n_splits=5)  # 设置 K-折交叉验证的折数
mse_list = []  # 用于记录每个参数组合的均方误差

def fun(a, x, y, C, epsilon):
    a_i = a[:l]
    a_i_star = a[l:]
    term1 = 0
    d = np.dot(x, x.T)
    a = a_i - a_i_star
    for i in range(l):
        for j in range(l):
            #term1 = term1 + x[i] * x[j] * (a_i[i] - a_i_star[i]) * (a_i[j] - a_i_star[j])
            term1 = term1 + d[i, j] * a[i] * a[j]
            term2 = epsilon / (2 * C) * np.sum((a_i + a_i_star) ** 2)
            term3 = np.sum(y * (a_i - a_i_star))
            return 0.5 * term1 + term2 - term3
        
def cons(a):
    a_i = a[:l]
    a_i_star = a[l:]
    return np.sum(a_i - a_i_star)

for C in param_grid['C']:
    for epsilon in param_grid['epsilon']:  
        kf_mse = []
        for train_index, test_index in kf.split(x):
            start_time = time.time()
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            l = len(y_train)
            a = np.random.rand(1, 2 * l)
            cons_constraint = {'type': 'eq', 'fun': cons}
            bounds = [(0, C) for i in range(2 * l)]
            res = minimize(lambda a: fun(a, x_train, y_train, C,epsilon), x0=np.zeros(2 * l), bounds=bounds, constraints=cons_constraint)
            a = res.x
            a_i = a[:l]
            a_i_star = a[l:]
            w1 = np.sum(((a_i - a_i_star) * x_train[:, 0]))
            w2 = np.sum(((a_i - a_i_star) * x_train[:, 1]))
            m = sum(y_train - w1 * x_train[:, 0] - w2 * x_train[:, 1])
            b = m / l
            t = w1 * x_test[:, 0] + w2 * x_test[:, 1] + b
            mse = mean_squared_error(y_test, t)
            kf_mse.append(mse)
        avg_mse = np.mean(kf_mse)
        mse_list.append((C,epsilon, avg_mse))


for params in mse_list:
    C, epsilon, mse = params
    print('Parameters: C={}, epsilon={}'.format(C, epsilon))
    print('MSE: {}'.format(mse))
    print('---')
best_params = min(mse_list, key=lambda x: x[-1])
best_C, best_epsilon, best_mse = best_params
print('Best parameters: C={}, epsilon={}'.format(best_C, best_epsilon))
print('Best MSE: {}'.format(best_mse))
#预测
t=w1*X_test[:, 0]+w2*X_test[:, 1]+b
loss1 = mean_squared_error(Y_test, t)
loss3 = np.sqrt(loss1)
print('w1=', w1, 'w2=', w2, 'b=', b, 'msee=', loss1,'Rmse=', loss3)

Parameters: C=19, epsilon=0.05
MSE: 5127.207353958665
---
Parameters: C=19, epsilon=0.060000000000000005
MSE: 5127.207353836731
---
Parameters: C=19, epsilon=0.07
MSE: 5127.207353983837
---
Parameters: C=19, epsilon=0.08000000000000002
MSE: 5127.207353893626
---
Parameters: C=19, epsilon=0.09000000000000001
MSE: 5127.207354000556
---
Parameters: C=20, epsilon=0.05
MSE: 5098.156384571243
---
Parameters: C=20, epsilon=0.060000000000000005
MSE: 5098.156386678325
---
Parameters: C=20, epsilon=0.07
MSE: 5098.156384993009
---
Parameters: C=20, epsilon=0.08000000000000002
MSE: 5098.156385015989
---
Parameters: C=20, epsilon=0.09000000000000001
MSE: 5098.156385002127
---
Parameters: C=21, epsilon=0.05
MSE: 5069.389442602088
---
Parameters: C=21, epsilon=0.060000000000000005
MSE: 5069.389442352257
---
Parameters: C=21, epsilon=0.07
MSE: 5069.389442397087
---
Parameters: C=21, epsilon=0.08000000000000002
MSE: 5069.389442489764
---
Parameters: C=21, epsilon=0.09000000000000001
MSE: 5069.389442230

In [127]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

param_grid = {
    'w1': np.arange(100, 120, 2),
    'w2': np.arange(100, 120, 2),
    'b': np.arange(100, 120, 2)
}

kf = KFold(n_splits=5)
mse_list = []

for w1 in param_grid['w1']:
    for w2 in param_grid['w2']:
        for b in param_grid['b']:
            kf_mse = []
            for train_index, test_index in kf.split(X):
                x_train, x_test = X[train_index], X[test_index]
                y_train, y_test = Y[train_index], Y[test_index]
                y_pred = w1 * x_train[:, 0] + w2 * x_train[:, 1] + b
                mse = mean_squared_error(y_train, y_pred)
                kf_mse.append(mse)
            avg_mse = np.mean(kf_mse)
            mse_list.append((w1, w2, b, avg_mse))

for params in mse_list:
    w1, w2, b, mse = params
    print('Parameters: w1={}, w2={}, b={}'.format(w1, w2, b))
    print('MSE: {}'.format(mse))
    print('---')

best_params = min(mse_list, key=lambda x: x[-1])
best_w1, best_w2, best_b, best_mse = best_params
print('Best parameters: w1={}, w2={}, b={}'.format(best_w1, best_w2, best_b))
print('Best MSE: {}'.format(best_mse))

# 训练最终模型
w1 = best_w1
w2 = best_w2
b = best_b
final_model = lambda x: w1 * x[:, 0] + w2 * x[:, 1] + b

# 进行预测
y_pred = final_model(X_test)
mse = mean_squared_error(Y_test, y_pred)
rmse = np.sqrt(mse)
print('MSE:', mse)
print('RMSE:', rmse)

Parameters: w1=100, w2=100, b=100
MSE: 7958.0076144348905
---
Parameters: w1=100, w2=100, b=102
MSE: 7753.470431074743
---
Parameters: w1=100, w2=100, b=104
MSE: 7556.933247714597
---
Parameters: w1=100, w2=100, b=106
MSE: 7368.3960643544515
---
Parameters: w1=100, w2=100, b=108
MSE: 7187.858880994305
---
Parameters: w1=100, w2=100, b=110
MSE: 7015.321697634157
---
Parameters: w1=100, w2=100, b=112
MSE: 6850.78451427401
---
Parameters: w1=100, w2=100, b=114
MSE: 6694.247330913864
---
Parameters: w1=100, w2=100, b=116
MSE: 6545.710147553718
---
Parameters: w1=100, w2=100, b=118
MSE: 6405.172964193571
---
Parameters: w1=100, w2=102, b=100
MSE: 7952.810802389316
---
Parameters: w1=100, w2=102, b=102
MSE: 7748.273630189731
---
Parameters: w1=100, w2=102, b=104
MSE: 7551.736457990148
---
Parameters: w1=100, w2=102, b=106
MSE: 7363.1992857905625
---
Parameters: w1=100, w2=102, b=108
MSE: 7182.662113590978
---
Parameters: w1=100, w2=102, b=110
MSE: 7010.124941391395
---
Parameters: w1=100, w2

MSE: 6454.180725541968
---
Parameters: w1=114, w2=116, b=118
MSE: 6313.643647106992
---
Parameters: w1=114, w2=118, b=100
MSE: 7861.475436353609
---
Parameters: w1=114, w2=118, b=102
MSE: 7656.938369079195
---
Parameters: w1=114, w2=118, b=104
MSE: 7460.401301804779
---
Parameters: w1=114, w2=118, b=106
MSE: 7271.864234530364
---
Parameters: w1=114, w2=118, b=108
MSE: 7091.327167255951
---
Parameters: w1=114, w2=118, b=110
MSE: 6918.7900999815365
---
Parameters: w1=114, w2=118, b=112
MSE: 6754.253032707122
---
Parameters: w1=114, w2=118, b=114
MSE: 6597.715965432706
---
Parameters: w1=114, w2=118, b=116
MSE: 6449.178898158292
---
Parameters: w1=114, w2=118, b=118
MSE: 6308.6418308838765
---
Parameters: w1=116, w2=100, b=100
MSE: 7899.950487080545
---
Parameters: w1=116, w2=100, b=102
MSE: 7695.413321595454
---
Parameters: w1=116, w2=100, b=104
MSE: 7498.876156110362
---
Parameters: w1=116, w2=100, b=106
MSE: 7310.3389906252705
---
Parameters: w1=116, w2=100, b=108
MSE: 7129.80182514017

In [128]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

param_grid = {
    'w1': np.arange(120, 130, 2),
    'w2': np.arange(120, 130, 2),
    'b': np.arange(120, 130, 2)
}

kf = KFold(n_splits=5)
mse_list = []

for w1 in param_grid['w1']:
    for w2 in param_grid['w2']:
        for b in param_grid['b']:
            kf_mse = []
            for train_index, test_index in kf.split(X):
                x_train, x_test = X[train_index], X[test_index]
                y_train, y_test = Y[train_index], Y[test_index]
                y_pred = w1 * x_train[:, 0] + w2 * x_train[:, 1] + b
                mse = mean_squared_error(y_train, y_pred)
                kf_mse.append(mse)
            avg_mse = np.mean(kf_mse)
            mse_list.append((w1, w2, b, avg_mse))

for params in mse_list:
    w1, w2, b, mse = params
    print('Parameters: w1={}, w2={}, b={}'.format(w1, w2, b))
    print('MSE: {}'.format(mse))
    print('---')

best_params = min(mse_list, key=lambda x: x[-1])
best_w1, best_w2, best_b, best_mse = best_params
print('Best parameters: w1={}, w2={}, b={}'.format(best_w1, best_w2, best_b))
print('Best MSE: {}'.format(best_mse))

# 训练最终模型
w1 = best_w1
w2 = best_w2
b = best_b
final_model = lambda x: w1 * x[:, 0] + w2 * x[:, 1] + b

# 进行预测
y_pred = final_model(X_test)
mse = mean_squared_error(Y_test, y_pred)
rmse = np.sqrt(mse)
print('MSE:', mse)
print('RMSE:', rmse)

Parameters: w1=120, w2=120, b=120
MSE: 6149.808751144858
---
Parameters: w1=120, w2=120, b=122
MSE: 6025.271701734151
---
Parameters: w1=120, w2=120, b=124
MSE: 5908.734652323444
---
Parameters: w1=120, w2=120, b=126
MSE: 5800.197602912737
---
Parameters: w1=120, w2=120, b=128
MSE: 5699.660553502029
---
Parameters: w1=120, w2=122, b=120
MSE: 6144.864615829546
---
Parameters: w1=120, w2=122, b=122
MSE: 6020.3275775794
---
Parameters: w1=120, w2=122, b=124
MSE: 5903.790539329255
---
Parameters: w1=120, w2=122, b=126
MSE: 5795.253501079111
---
Parameters: w1=120, w2=122, b=128
MSE: 5694.716462828966
---
Parameters: w1=120, w2=124, b=120
MSE: 6139.938580304384
---
Parameters: w1=120, w2=124, b=122
MSE: 6015.401553214801
---
Parameters: w1=120, w2=124, b=124
MSE: 5898.864526125218
---
Parameters: w1=120, w2=124, b=126
MSE: 5790.327499035636
---
Parameters: w1=120, w2=124, b=128
MSE: 5689.790471946053
---
Parameters: w1=120, w2=126, b=120
MSE: 6135.030644569376
---
Parameters: w1=120, w2=126

In [130]:
import numpy as np
from scipy.stats import friedmanchisquare

# 模拟三个模型的性能评估数据（使用随机数生成）
mse_model1 = [21.29, 79.59, 1.8, 1.5]  # 线性
mse_model2 = [20.26, 75.18, 0.70, 5.46]  # UE
mse_model3 = [20.24, 72.77, 0.588, 0.85]  # UH

# 将数据组合成二维数组
data = np.array([mse_model1, mse_model2, mse_model3])

# 对每个模型的性能指标进行排名
ranks = np.argsort(np.argsort(data, axis=0), axis=0) + 1

# 执行Friedman检验
statistic, p_value = friedmanchisquare(*ranks)
print("Friedman Test:")
print("Statistic:", statistic)
print("p-value:", p_value)

Friedman Test:
Statistic: 6.5
p-value: 0.03877420783172202


In [131]:
ranks

array([[3, 3, 3, 2],
       [2, 2, 2, 3],
       [1, 1, 1, 1]], dtype=int64)