In [4]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from mgwr.gwr import GWR
from mgwr.sel_bw import Sel_BW
import numpy as np


  warn("geopandas not available. Some functionality will be disabled.")


In [6]:
# 加载数据
df = pd.read_csv('housing.csv')
X = df[['longitude', 'latitude', 'housing_median_age', 'total_rooms', 
        'total_bedrooms', 'population', 'households', 'median_income']]
y = df['median_house_value']

# 转换为 NumPy 数组（用于 GWR）
coords = X[['longitude', 'latitude']].values
X_gwr = X.drop(['longitude', 'latitude'], axis=1).values
y_gwr = y.values.reshape(-1, 1)

# 选择带宽
sel_bw = Sel_BW(coords, y_gwr, X_gwr)
bw = sel_bw.search()

# 拟合 GWR 模型
gwr_model = GWR(coords, y_gwr, X_gwr, bw)
gwr_results = gwr_model.fit()

# 获取 GWR 参数作为特征
gwr_features = gwr_results.params


In [7]:
# 将 GWR 生成的特征加入原始特征
X_combined = np.hstack([X_gwr, gwr_features])

# 转换为 PyTorch 张量
X_combined = torch.tensor(X_combined, dtype=torch.float32)
y = torch.tensor(y.values, dtype=torch.float32).view(-1, 1)

# 数据标准化
def standardize(data):
    mean = torch.mean(data, dim=0)
    std = torch.std(data, dim=0)
    return (data - mean) / std, mean, std

X_combined, X_mean, X_std = standardize(X_combined)
y, y_mean, y_std = standardize(y)

# 划分数据集
dataset_size = len(X_combined)
train_size = int(0.7 * dataset_size)
test_size = dataset_size - train_size
indices = torch.randperm(dataset_size)

X_train = X_combined[indices[:train_size]]
y_train = y[indices[:train_size]]
X_test = X_combined[indices[train_size:]]
y_test = y[indices[train_size:]]

# 使用 GPU（如果可用）
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)


In [8]:
class ImprovedNet(nn.Module):
    def __init__(self, input_dim):
        super(ImprovedNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.fc4 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        x = self.fc4(x)
        return x

input_dim = X_train.shape[1]
net = ImprovedNet(input_dim).to(device)


In [9]:
# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# 训练模型
epochs = 2000
for epoch in range(1, epochs + 1):
    net.train()
    optimizer.zero_grad()
    outputs = net(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    if epoch % 100 == 0:
        print(f"Epoch {epoch}/{epochs}, Loss: {loss.item():.4f}")




Epoch 100/2000, Loss: 0.2482
Epoch 200/2000, Loss: 0.2104
Epoch 300/2000, Loss: 0.1985
Epoch 400/2000, Loss: 0.1835
Epoch 500/2000, Loss: 0.1769
Epoch 600/2000, Loss: 0.1684
Epoch 700/2000, Loss: 0.1700
Epoch 800/2000, Loss: 0.1611
Epoch 900/2000, Loss: 0.1549
Epoch 1000/2000, Loss: 0.1485
Epoch 1100/2000, Loss: 0.1469
Epoch 1200/2000, Loss: 0.1423
Epoch 1300/2000, Loss: 0.1411
Epoch 1400/2000, Loss: 0.1371
Epoch 1500/2000, Loss: 0.1374
Epoch 1600/2000, Loss: 0.1323
Epoch 1700/2000, Loss: 0.1328
Epoch 1800/2000, Loss: 0.1310
Epoch 1900/2000, Loss: 0.1255
Epoch 2000/2000, Loss: 0.1277
Standardized Test RMSE: 0.3362
Original Scale Test RMSE: 39569.0938


In [14]:
def calculate_r2(y_true, y_pred):
    ssr = torch.sum((y_true - y_pred) ** 2)  # 残差平方和
    sst = torch.sum((y_true - torch.mean(y_true)) ** 2)  # 总平方和
    r2 = 1 - ssr / sst
    return r2


In [18]:
# 训练集评估
net.eval()
with torch.no_grad():
    y_pred_train = net(X_train)
    train_rmse = calculate_rmse(y_train, y_pred_train)
    train_r2 = calculate_r2(y_train, y_pred_train)

    y_pred_test = net(X_test)
    test_rmse = calculate_rmse(y_test, y_pred_test)
    test_r2 = calculate_r2(y_test, y_pred_test)

# 还原到原始尺度
restored_train_rmse = train_rmse * y_std.to(device)
restored_test_rmse = test_rmse * y_std.to(device)

print(f"Standardized Train RMSE: {train_rmse.item():.4f}")
print(f"Standardized Test RMSE: {test_rmse.item():.4f}")
print(f"Original Scale Train RMSE: {restored_train_rmse.item():.4f}")
print(f"Original Scale Test RMSE: {restored_test_rmse.item():.4f}")
print(f"Train R²: {train_r2.item():.4f}")
print(f"Test R²: {test_r2.item():.4f}")


Standardized Train RMSE: 0.2708
Standardized Test RMSE: 0.3362
Original Scale Train RMSE: 31865.7520
Original Scale Test RMSE: 39569.0938
Train R²: 0.9273
Test R²: 0.8847
