In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
import lightgbm as lgb
from lightgbm import log_evaluation

# 数据读取
df = pd.read_csv('训练集、验证集/南海数据小小.csv')  # 修改此处为实际输入的文件名

# 数据标准化
scaler_coords = StandardScaler()  # 标准化经度、纬度
scaler_value = StandardScaler()   # 标准化 value
df[['longitude', 'latitude']] = scaler_coords.fit_transform(df[['longitude', 'latitude']])
df['value'] = scaler_value.fit_transform(df[['value']])

# 提取输入和目标值
X = df[['longitude', 'latitude']].values  # 输入数据 (经度、纬度)
y = df['value'].values  # 输出目标值

# 初始化 KFold 交叉验证
kf = KFold(n_splits=10, shuffle=True, random_state=42)
predictions = np.zeros_like(y)
actual_values = np.zeros_like(y)

# LightGBM 参数设置
params = {
    'objective': 'regression',  # 回归任务
    'metric': 'mse',            # 损失函数: 均方误差
    'boosting_type': 'gbdt',    # 提升类型: GBDT
    'num_leaves': 31,           # 树的最大叶子数
    'learning_rate': 0.01,      # 学习率
    'n_estimators': 2000,        # 树的数量
    'verbose': -1               # 禁止默认输出
}

# 十折交叉验证
fold = 1
for train_index, val_index in kf.split(X):
    print(f"\n正在处理第 {fold} 折...")

    # 数据划分
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # 初始化 LightGBM 数据集
    train_data = lgb.Dataset(X_train, label=y_train)
    val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)

    # 使用 callbacks 控制输出
    callbacks = [log_evaluation(100)]  # 每 100 轮输出一次日志

    # 训练 LightGBM 模型
    model = lgb.train(params, train_data, valid_sets=[val_data], callbacks=callbacks)

    # 模型验证
    y_pred_val = model.predict(X_val, num_iteration=model.best_iteration)

    # 保存预测结果
    predictions[val_index] = y_pred_val.flatten()
    actual_values[val_index] = y_val.flatten()

    fold += 1

# 反标准化处理
predictions_original = scaler_value.inverse_transform(predictions.reshape(-1, 1))
actual_values_original = scaler_value.inverse_transform(actual_values.reshape(-1, 1))
coords_original = scaler_coords.inverse_transform(X)

# 保存结果
df_result = pd.DataFrame({
    'longitude': np.round(coords_original[:, 0], 6),
    'latitude': np.round(coords_original[:, 1], 6),
    'actual_value': np.round(actual_values_original.flatten(), 2),
    'predicted_value': np.round(predictions_original.flatten(), 2)
})

df_result.to_csv('predictions-lightgbm-x.csv', index=False)
print("十折交叉验证完成，结果已保存至 'predictions-lightgbm-x.csv'")



正在处理第 1 折...
[100]	valid_0's l2: 0.317902
[200]	valid_0's l2: 0.137559
[300]	valid_0's l2: 0.080622
[400]	valid_0's l2: 0.0578948




[500]	valid_0's l2: 0.043758
[600]	valid_0's l2: 0.0383636
[700]	valid_0's l2: 0.0346046
[800]	valid_0's l2: 0.0321333
[900]	valid_0's l2: 0.0300714
[1000]	valid_0's l2: 0.0279151
[1100]	valid_0's l2: 0.0267806
[1200]	valid_0's l2: 0.0253911
[1300]	valid_0's l2: 0.0241363
[1400]	valid_0's l2: 0.0227993
[1500]	valid_0's l2: 0.0216578
[1600]	valid_0's l2: 0.0210301
[1700]	valid_0's l2: 0.020584
[1800]	valid_0's l2: 0.0202572
[1900]	valid_0's l2: 0.0198179
[2000]	valid_0's l2: 0.0192042

正在处理第 2 折...
[100]	valid_0's l2: 0.381871
[200]	valid_0's l2: 0.169833
[300]	valid_0's l2: 0.0978375
[400]	valid_0's l2: 0.0657856
[500]	valid_0's l2: 0.0522769
[600]	valid_0's l2: 0.045598
[700]	valid_0's l2: 0.0403896
[800]	valid_0's l2: 0.0367338
[900]	valid_0's l2: 0.0342966
[1000]	valid_0's l2: 0.0322345
[1100]	valid_0's l2: 0.0308507
[1200]	valid_0's l2: 0.0293045
[1300]	valid_0's l2: 0.0279106
[1400]	valid_0's l2: 0.0265607
[1500]	valid_0's l2: 0.0250442
[1600]	valid_0's l2: 0.0240496
[1700]	valid_