In [8]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# 读取California housing数据集
housing = fetch_california_housing()
X = housing.data
y = housing.target

# 拆分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 标准化特征
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 初始化SGD回归器
sgd_regressor = SGDRegressor(max_iter=1000, tol=1e-3, random_state=42)

# 训练模型
sgd_regressor.fit(X_train, y_train)

# 使用交叉验证评估模型
scores = cross_val_score(sgd_regressor, X_train, y_train, cv=5, scoring='r2')
print("Cross-validated R-squared scores:", scores)
print("Mean cross-validated R-squared score:", np.mean(scores))

# 在测试集上评估模型
test_score = sgd_regressor.score(X_test, y_test)
print("Test R-squared score:", test_score)


Cross-validated R-squared scores: [ 6.53463616e-01 -4.39721947e+01  6.12989050e-01 -1.16620158e+01
 -3.09276443e+06]
Mean cross-validated R-squared score: -618563.7590690053
Test R-squared score: 0.5798267665069695


In [10]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler

# 读取California housing数据集
housing = fetch_california_housing()
X = housing.data
y = housing.target

# 拆分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 标准化特征和目标变量
X_scaler = StandardScaler()
y_scaler = StandardScaler()
X_train = X_scaler.fit_transform(X_train)
y_train = y_scaler.fit_transform(y_train.reshape(-1, 1)).ravel()
X_test = X_scaler.transform(X_test)
y_test = y_scaler.transform(y_test.reshape(-1, 1)).ravel()

# 初始化SGD回归器
regressor = SGDRegressor(loss='squared_error', max_iter=1000, tol=1e-3, random_state=42)

# 使用交叉验证评估模型
scores = cross_val_score(regressor, X_train, y_train, cv=5)
print('Cross validation r-squared scores:', scores)
print('Average cross validation r-squared score:', np.mean(scores))

# 训练模型
regressor.fit(X_train, y_train)

# 在测试集上评估模型
test_score = regressor.score(X_test, y_test)
print('Test set r-squared score:', test_score)


Cross validation r-squared scores: [ 6.48287002e-01 -1.99866044e+01  6.12991302e-01 -1.01398360e+01
 -2.66338738e+06]
Average cross validation r-squared score: -532683.2485389799
Test set r-squared score: 0.5782791257511655
