<h2>导入包</h2>

In [46]:
import logging
from datetime import datetime
import numpy as np
import pandas as pd
from pandas import Series
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from lightgbm import Dataset


logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

<h2>读取数据</h2>

In [47]:
x_train = pd.read_csv("../data/A榜-训练集_分布式光伏发电预测_气象变量数据.csv", encoding="gbk")
y_train = pd.read_csv("../data/A榜-训练集_分布式光伏发电预测_实际功率数据.csv", encoding="gbk")
info_train = pd.read_csv("../data/A榜-训练集_分布式光伏发电预测_基本信息.csv", encoding="gbk")
x_test = pd.read_csv("../data/A榜-测试集_分布式光伏发电预测_气象变量数据.csv", encoding="gbk")
y_test = pd.read_csv("../data/submit_example.csv", encoding="utf-8")
info_test = pd.read_csv("../data/A榜-测试集_分布式光伏发电预测_基本信息.csv", encoding="gbk")

In [48]:
x_train = pd.merge(x_train, info_train[["光伏用户编号", "装机容量(kW)", "经度", "纬度"]], how="left", on="光伏用户编号")
x_train["时间"] = pd.to_datetime(x_train["时间"])
x_test = pd.merge(x_test, info_test[["光伏用户编号", "装机容量(kW)", "经度", "纬度"]], how="left", on="光伏用户编号")
x_test["时间"] = pd.to_datetime(x_test["时间"])

In [49]:
y_train = y_train.set_index(["光伏用户编号", "综合倍率", "时间"]).stack().reset_index().rename(columns={0:"target"})
y_train["level_3"] = y_train["level_3"].apply(lambda x: int(x[1:]))
y_train["时间"] = pd.to_datetime(y_train["时间"])
y_train["时间"] = y_train["时间"] + (y_train["level_3"] - 1) * 15 * pd.Timedelta(1, unit="minutes")
y_train = y_train.drop(columns=["level_3"])

y_test = y_test.set_index(["光伏用户编号", "综合倍率", "时间"]).stack().reset_index().rename(columns={0:"target"})
y_test["level_3"] = y_test["level_3"].apply(lambda x: int(x[1:]))
y_test["时间"] = pd.to_datetime(y_test["时间"])
y_test["时间"] = y_test["时间"] + (y_test["level_3"] - 1) * 15 * pd.Timedelta(1, unit="minutes")
y_test = y_test.drop(columns=["level_3"])

In [50]:
df_train = pd.merge(x_train, y_train, on=["光伏用户编号", "时间"], how="left")
df_test = pd.merge(x_test, y_test, on=["光伏用户编号", "时间"], how="left")

<h2>特征工程</h2>

<h3>时间特征</h3>

In [51]:
df_train["年"] = df_train["时间"].dt.year
df_train["月"] = df_train["时间"].dt.month
df_train["日"] = df_train["时间"].dt.day
df_train["分"] = df_train["时间"].dt.minute // 15 + df_train["时间"].dt.hour * 4
df_train = df_train.drop(columns=["时间"])

df_test["年"] = df_test["时间"].dt.year
df_test["月"] = df_test["时间"].dt.month
df_test["日"] = df_test["时间"].dt.day
df_test["分"] = df_test["时间"].dt.minute // 15 + df_test["时间"].dt.hour * 4
df_test = df_test.drop(columns=["时间"])

<h2>训练模型</h2>

<h3>评测指标</h3>

In [52]:
def score(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    return 1 / (1 + rmse)

<h3>lightgbm模型</h3>

In [53]:
params_lgb = {
    'learning_rate': 0.02,
    'boosting_type': 'gbdt',
    'objective': 'mse',
    'metric': 'mse',
    'num_leaves': 64,
    'verbose': -1,
    'seed': 42,
    'n_jobs': -1,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.9,
    'bagging_freq': 4,
}

In [54]:
model_lgb = dict()
kfold = KFold(n_splits=5, random_state=42, shuffle=True)
for site, df in df_train.groupby("光伏用户编号"):
    model_lgb[site] = []
    df = df.drop(columns=["光伏用户编号"]).astype(np.float32)
    y = df.pop("target")
    df["辐照强度（J/m2） - 1"] = df["辐照强度（J/m2）"].shift(1)
    df["辐照强度（J/m2） - 2"] = df["辐照强度（J/m2）"].shift(2)
    mse = 0
    for fold, (train_index, val_index) in enumerate(kfold.split(df, y)):
        logging.info(f'############ site: {site} fold: {fold} ###########')
        x_train, x_val, y_train, y_val = df.iloc[train_index], df.iloc[val_index], y.iloc[train_index], y.iloc[val_index]
        trainset = Dataset(x_train, y_train)
        valset = Dataset(x_val, y_val)
        model = lgb.train(params_lgb, trainset, valid_sets=[trainset, valset], num_boost_round=5000, callbacks=[lgb.early_stopping(100), lgb.log_evaluation(1000)])
        model.save_model("../models/lgb_%s_%d.txt" % (site, fold))
        model_lgb[site].append(model)
        val_pred = Series(model.predict(x_val, num_iteration=model.best_iteration), index=y_val.index).fillna(0)
        mse += mean_squared_error(y_val.fillna(0), val_pred)
    rmse = np.sqrt(mse)
    score = 1 / (1 + rmse)
    logging.info(f"--------------site: {site} 本地分数 {score}---------------")

2024-03-07 16:06:21,512 : INFO : ############ site: f1 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00803111	valid_1's l2: 0.0191556
[2000]	training's l2: 0.00405073	valid_1's l2: 0.0181388
Early stopping, best iteration is:
[2481]	training's l2: 0.00310367	valid_1's l2: 0.0179793


2024-03-07 16:06:24,730 : INFO : ############ site: f1 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00775778	valid_1's l2: 0.0225184
[2000]	training's l2: 0.00392952	valid_1's l2: 0.0212867
[3000]	training's l2: 0.00230164	valid_1's l2: 0.020864
[4000]	training's l2: 0.00145357	valid_1's l2: 0.0206752
Early stopping, best iteration is:
[3918]	training's l2: 0.00150467	valid_1's l2: 0.0206726


2024-03-07 16:06:29,649 : INFO : ############ site: f1 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00769575	valid_1's l2: 0.0213012
[2000]	training's l2: 0.00392034	valid_1's l2: 0.0203329
Early stopping, best iteration is:
[2432]	training's l2: 0.00308345	valid_1's l2: 0.0201521


2024-03-07 16:06:32,803 : INFO : ############ site: f1 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00802172	valid_1's l2: 0.0205029
[2000]	training's l2: 0.00407982	valid_1's l2: 0.0192664
[3000]	training's l2: 0.00238085	valid_1's l2: 0.0188198
[4000]	training's l2: 0.00150448	valid_1's l2: 0.0186562
Early stopping, best iteration is:
[4133]	training's l2: 0.00141779	valid_1's l2: 0.0186454


2024-03-07 16:06:37,935 : INFO : ############ site: f1 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00789372	valid_1's l2: 0.021259
[2000]	training's l2: 0.00402555	valid_1's l2: 0.0202638
[3000]	training's l2: 0.00232599	valid_1's l2: 0.0198412
[4000]	training's l2: 0.00148755	valid_1's l2: 0.0196441
[5000]	training's l2: 0.000978489	valid_1's l2: 0.0195527
Did not meet early stopping. Best iteration is:
[5000]	training's l2: 0.000978489	valid_1's l2: 0.0195527


2024-03-07 16:06:44,044 : INFO : --------------site: f1 本地分数 0.7625138165635004---------------
2024-03-07 16:06:44,051 : INFO : ############ site: f2 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0111805	valid_1's l2: 0.0305282
[2000]	training's l2: 0.00568553	valid_1's l2: 0.0287823
[3000]	training's l2: 0.00332686	valid_1's l2: 0.0283334
[4000]	training's l2: 0.00211449	valid_1's l2: 0.0281639
Early stopping, best iteration is:
[4000]	training's l2: 0.00211449	valid_1's l2: 0.0281639


2024-03-07 16:06:49,342 : INFO : ############ site: f2 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0113766	valid_1's l2: 0.0278696
[2000]	training's l2: 0.00583035	valid_1's l2: 0.0264384
Early stopping, best iteration is:
[2326]	training's l2: 0.00485793	valid_1's l2: 0.0262369


2024-03-07 16:06:52,584 : INFO : ############ site: f2 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0116268	valid_1's l2: 0.0271446
[2000]	training's l2: 0.00590123	valid_1's l2: 0.0257751
Early stopping, best iteration is:
[2298]	training's l2: 0.0049838	valid_1's l2: 0.025632


2024-03-07 16:06:56,075 : INFO : ############ site: f2 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0113677	valid_1's l2: 0.0284127
[2000]	training's l2: 0.00575984	valid_1's l2: 0.0271721
Early stopping, best iteration is:
[2611]	training's l2: 0.00410925	valid_1's l2: 0.0269449


2024-03-07 16:06:59,462 : INFO : ############ site: f2 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.01146	valid_1's l2: 0.0285717
[2000]	training's l2: 0.00587998	valid_1's l2: 0.0270704
[3000]	training's l2: 0.00345485	valid_1's l2: 0.0266228
Early stopping, best iteration is:
[3024]	training's l2: 0.00341505	valid_1's l2: 0.0266089


2024-03-07 16:07:03,371 : INFO : --------------site: f2 本地分数 0.7323351796520018---------------
2024-03-07 16:07:03,378 : INFO : ############ site: f3 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0181212	valid_1's l2: 0.044453
[2000]	training's l2: 0.00914514	valid_1's l2: 0.0424191
Early stopping, best iteration is:
[2764]	training's l2: 0.00603464	valid_1's l2: 0.0421553


2024-03-07 16:07:06,909 : INFO : ############ site: f3 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0180546	valid_1's l2: 0.049346
[2000]	training's l2: 0.00920192	valid_1's l2: 0.0466205
[3000]	training's l2: 0.00540619	valid_1's l2: 0.0459584
Early stopping, best iteration is:
[3337]	training's l2: 0.00459261	valid_1's l2: 0.0457892


2024-03-07 16:07:11,207 : INFO : ############ site: f3 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0176881	valid_1's l2: 0.0471162
[2000]	training's l2: 0.00899509	valid_1's l2: 0.0451358
[3000]	training's l2: 0.00523496	valid_1's l2: 0.0443624
Early stopping, best iteration is:
[2985]	training's l2: 0.00527647	valid_1's l2: 0.0443511


2024-03-07 16:07:15,039 : INFO : ############ site: f3 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0182577	valid_1's l2: 0.0466649
[2000]	training's l2: 0.00921837	valid_1's l2: 0.0436151
[3000]	training's l2: 0.00542633	valid_1's l2: 0.0427612
Early stopping, best iteration is:
[3512]	training's l2: 0.00426125	valid_1's l2: 0.0425144


2024-03-07 16:07:19,382 : INFO : ############ site: f3 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0183357	valid_1's l2: 0.0437887
[2000]	training's l2: 0.0093537	valid_1's l2: 0.0412993
[3000]	training's l2: 0.00551557	valid_1's l2: 0.0404918
Early stopping, best iteration is:
[3692]	training's l2: 0.00399797	valid_1's l2: 0.0402864


2024-03-07 16:07:23,981 : INFO : --------------site: f3 本地分数 0.683160486412926---------------
2024-03-07 16:07:23,988 : INFO : ############ site: f4 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00767941	valid_1's l2: 0.0227275
Early stopping, best iteration is:
[1880]	training's l2: 0.00419827	valid_1's l2: 0.0223048


2024-03-07 16:07:26,393 : INFO : ############ site: f4 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00790219	valid_1's l2: 0.0198582
[2000]	training's l2: 0.00409492	valid_1's l2: 0.0193569
Early stopping, best iteration is:
[2504]	training's l2: 0.00310001	valid_1's l2: 0.0192464


2024-03-07 16:07:29,547 : INFO : ############ site: f4 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00800608	valid_1's l2: 0.0199501
[2000]	training's l2: 0.0040629	valid_1's l2: 0.0190758
Early stopping, best iteration is:
[2476]	training's l2: 0.00312344	valid_1's l2: 0.0189403


2024-03-07 16:07:32,768 : INFO : ############ site: f4 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00751255	valid_1's l2: 0.0231869
Early stopping, best iteration is:
[1658]	training's l2: 0.00470729	valid_1's l2: 0.02282


2024-03-07 16:07:34,996 : INFO : ############ site: f4 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0079068	valid_1's l2: 0.0204847
[2000]	training's l2: 0.00407361	valid_1's l2: 0.0196433
Early stopping, best iteration is:
[2197]	training's l2: 0.00364034	valid_1's l2: 0.0195788


2024-03-07 16:07:37,851 : INFO : --------------site: f4 本地分数 0.7571368421931092---------------
2024-03-07 16:07:37,858 : INFO : ############ site: f5 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00898384	valid_1's l2: 0.021173
[2000]	training's l2: 0.00457321	valid_1's l2: 0.0190935
[3000]	training's l2: 0.0027404	valid_1's l2: 0.0185339
[4000]	training's l2: 0.00176736	valid_1's l2: 0.0182759
Early stopping, best iteration is:
[4350]	training's l2: 0.00153393	valid_1's l2: 0.0182168


2024-03-07 16:07:44,688 : INFO : ############ site: f5 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00877117	valid_1's l2: 0.0216919
[2000]	training's l2: 0.00450772	valid_1's l2: 0.0199106
[3000]	training's l2: 0.00270565	valid_1's l2: 0.0193545
Early stopping, best iteration is:
[3827]	training's l2: 0.00187488	valid_1's l2: 0.0191308


2024-03-07 16:07:50,705 : INFO : ############ site: f5 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0086359	valid_1's l2: 0.0223493
[2000]	training's l2: 0.00436198	valid_1's l2: 0.020688
[3000]	training's l2: 0.00260923	valid_1's l2: 0.0202172
[4000]	training's l2: 0.00170026	valid_1's l2: 0.0200412
[5000]	training's l2: 0.00115456	valid_1's l2: 0.0199336
Did not meet early stopping. Best iteration is:
[5000]	training's l2: 0.00115456	valid_1's l2: 0.0199336


2024-03-07 16:07:58,448 : INFO : ############ site: f5 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00870721	valid_1's l2: 0.0210705
[2000]	training's l2: 0.00443706	valid_1's l2: 0.0194663
[3000]	training's l2: 0.00263467	valid_1's l2: 0.0190188
[4000]	training's l2: 0.00168232	valid_1's l2: 0.0188277
Early stopping, best iteration is:
[3968]	training's l2: 0.00170485	valid_1's l2: 0.0188235


2024-03-07 16:08:04,863 : INFO : ############ site: f5 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00867968	valid_1's l2: 0.0221048
[2000]	training's l2: 0.00453951	valid_1's l2: 0.0203178
[3000]	training's l2: 0.00271998	valid_1's l2: 0.0196476
[4000]	training's l2: 0.00176593	valid_1's l2: 0.0193599
[5000]	training's l2: 0.00118755	valid_1's l2: 0.0192027
Did not meet early stopping. Best iteration is:
[5000]	training's l2: 0.00118755	valid_1's l2: 0.0192027


2024-03-07 16:08:12,630 : INFO : --------------site: f5 本地分数 0.7641059549916783---------------
2024-03-07 16:08:12,638 : INFO : ############ site: f6 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000380243	valid_1's l2: 0.00942411
[2000]	training's l2: 0.000205585	valid_1's l2: 0.00937284
[3000]	training's l2: 0.000124706	valid_1's l2: 0.00935476
Early stopping, best iteration is:
[3677]	training's l2: 9.27868e-05	valid_1's l2: 0.00934873


2024-03-07 16:08:18,233 : INFO : ############ site: f6 fold: 1 ###########


Training until validation scores don't improve for 100 rounds


2024-03-07 16:08:19,517 : INFO : ############ site: f6 fold: 2 ###########


Early stopping, best iteration is:
[701]	training's l2: 0.00155143	valid_1's l2: 0.00108704
Training until validation scores don't improve for 100 rounds


2024-03-07 16:08:20,435 : INFO : ############ site: f6 fold: 3 ###########


Early stopping, best iteration is:
[464]	training's l2: 0.00193049	valid_1's l2: 0.00124797
Training until validation scores don't improve for 100 rounds


2024-03-07 16:08:21,271 : INFO : ############ site: f6 fold: 4 ###########


Early stopping, best iteration is:
[408]	training's l2: 0.00207968	valid_1's l2: 0.00125839
Training until validation scores don't improve for 100 rounds


2024-03-07 16:08:22,651 : INFO : --------------site: f6 本地分数 0.8942916521754625---------------
2024-03-07 16:08:22,657 : INFO : ############ site: f7 fold: 0 ###########


Early stopping, best iteration is:
[764]	training's l2: 0.00143698	valid_1's l2: 0.00102992
Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000505761	valid_1's l2: 0.00126661
[2000]	training's l2: 0.000264138	valid_1's l2: 0.0011948
[3000]	training's l2: 0.000161211	valid_1's l2: 0.00117383
Early stopping, best iteration is:
[3544]	training's l2: 0.00012697	valid_1's l2: 0.00116707


2024-03-07 16:08:28,022 : INFO : ############ site: f7 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000504764	valid_1's l2: 0.0012713
[2000]	training's l2: 0.000264835	valid_1's l2: 0.0012027
[3000]	training's l2: 0.000159256	valid_1's l2: 0.00117685
[4000]	training's l2: 0.000103695	valid_1's l2: 0.00116624
Early stopping, best iteration is:
[4344]	training's l2: 9.04048e-05	valid_1's l2: 0.00116384


2024-03-07 16:08:34,525 : INFO : ############ site: f7 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000504818	valid_1's l2: 0.00123756
[2000]	training's l2: 0.000261131	valid_1's l2: 0.00115918
[3000]	training's l2: 0.000158747	valid_1's l2: 0.00113908
Early stopping, best iteration is:
[3526]	training's l2: 0.000124292	valid_1's l2: 0.00113178


2024-03-07 16:08:39,855 : INFO : ############ site: f7 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000501024	valid_1's l2: 0.00134335
[2000]	training's l2: 0.000256763	valid_1's l2: 0.00127638
[3000]	training's l2: 0.000155834	valid_1's l2: 0.00125443
Early stopping, best iteration is:
[3633]	training's l2: 0.000117702	valid_1's l2: 0.00124782


2024-03-07 16:08:45,351 : INFO : ############ site: f7 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000502082	valid_1's l2: 0.00121278
[2000]	training's l2: 0.000264961	valid_1's l2: 0.00113976
Early stopping, best iteration is:
[2450]	training's l2: 0.000207822	valid_1's l2: 0.00112478


2024-03-07 16:08:49,173 : INFO : --------------site: f7 本地分数 0.9290320477716312---------------
2024-03-07 16:08:49,181 : INFO : ############ site: f8 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0104899	valid_1's l2: 0.0297815
Early stopping, best iteration is:
[1762]	training's l2: 0.00623454	valid_1's l2: 0.0291577


2024-03-07 16:08:51,465 : INFO : ############ site: f8 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0110168	valid_1's l2: 0.0285787
[2000]	training's l2: 0.00568832	valid_1's l2: 0.0274389
Early stopping, best iteration is:
[2270]	training's l2: 0.00489983	valid_1's l2: 0.0272967


2024-03-07 16:08:54,430 : INFO : ############ site: f8 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0110603	valid_1's l2: 0.0266512
Early stopping, best iteration is:
[1648]	training's l2: 0.00705152	valid_1's l2: 0.0260475


2024-03-07 16:08:56,563 : INFO : ############ site: f8 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0106282	valid_1's l2: 0.0300034
Early stopping, best iteration is:
[1506]	training's l2: 0.00740116	valid_1's l2: 0.0292201


2024-03-07 16:08:58,654 : INFO : ############ site: f8 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0108086	valid_1's l2: 0.0294862
[2000]	training's l2: 0.00557813	valid_1's l2: 0.0287236
Early stopping, best iteration is:
[2224]	training's l2: 0.00493717	valid_1's l2: 0.0286183


2024-03-07 16:09:01,633 : INFO : --------------site: f8 本地分数 0.7274736961673484---------------
2024-03-07 16:09:01,643 : INFO : ############ site: f9 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000476966	valid_1's l2: 0.00121215
[2000]	training's l2: 0.000245938	valid_1's l2: 0.00112331
[3000]	training's l2: 0.000148161	valid_1's l2: 0.00109573
[4000]	training's l2: 9.76306e-05	valid_1's l2: 0.00108707
Early stopping, best iteration is:
[4592]	training's l2: 7.6756e-05	valid_1's l2: 0.00108274


2024-03-07 16:09:08,636 : INFO : ############ site: f9 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000499609	valid_1's l2: 0.0011886
[2000]	training's l2: 0.000258536	valid_1's l2: 0.00110328
[3000]	training's l2: 0.0001558	valid_1's l2: 0.00107738
[4000]	training's l2: 0.000101534	valid_1's l2: 0.00106544
Early stopping, best iteration is:
[4453]	training's l2: 8.51112e-05	valid_1's l2: 0.00106288


2024-03-07 16:09:15,231 : INFO : ############ site: f9 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000491022	valid_1's l2: 0.00114674
[2000]	training's l2: 0.000257232	valid_1's l2: 0.00107469
[3000]	training's l2: 0.000156059	valid_1's l2: 0.00105058
[4000]	training's l2: 0.000101486	valid_1's l2: 0.00103882
Early stopping, best iteration is:
[3967]	training's l2: 0.000102753	valid_1's l2: 0.00103855


2024-03-07 16:09:21,152 : INFO : ############ site: f9 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000488141	valid_1's l2: 0.00113801
[2000]	training's l2: 0.000252126	valid_1's l2: 0.00105856
[3000]	training's l2: 0.000152363	valid_1's l2: 0.00103845
[4000]	training's l2: 9.9089e-05	valid_1's l2: 0.00102983
[5000]	training's l2: 6.73442e-05	valid_1's l2: 0.001026
Did not meet early stopping. Best iteration is:
[5000]	training's l2: 6.73442e-05	valid_1's l2: 0.001026


2024-03-07 16:09:28,447 : INFO : ############ site: f9 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000487253	valid_1's l2: 0.00116986
[2000]	training's l2: 0.000253374	valid_1's l2: 0.00109423
[3000]	training's l2: 0.000152495	valid_1's l2: 0.0010716
Early stopping, best iteration is:
[3763]	training's l2: 0.000108871	valid_1's l2: 0.00106339


2024-03-07 16:09:34,065 : INFO : --------------site: f9 本地分数 0.9322972820202412---------------


<h2>预测</h2>

In [55]:
for site, df in df_test.groupby("光伏用户编号"):
    df = df.drop(columns=["光伏用户编号"]).astype(np.float32)
    y = df.pop("target")
    df["辐照强度（J/m2） - 1"] = df["辐照强度（J/m2）"].shift(1)
    df["辐照强度（J/m2） - 2"] = df["辐照强度（J/m2）"].shift(2)
    y_pred = np.zeros((df.shape[0], ))
    for model in model_lgb[site]:
        y_pred += model.predict(df, num_iteration=model.best_iteration)
    y_pred = y_pred / kfold.n_splits
    df_test.loc[df_test["光伏用户编号"] == site, "target"] = y_pred

In [56]:
df_test = df_test[["光伏用户编号", "综合倍率", "年", "月", "日", "分", "target"]]
df_test["时间"] = df_test["年"].astype(str) + "-" + df_test["月"].astype(str) + "-" + df_test["日"].astype(str) + " 0:00"
df_test["分"] = "p" + (df_test["分"] + 1).astype(str)
df_test = df_test.drop(columns=["年", "月", "日"])

In [57]:
result = pd.pivot(df_test, index=["光伏用户编号", "综合倍率", "时间"], columns="分", values="target").reset_index()
result = result[result["综合倍率"].notnull()]
result["综合倍率"] = result["综合倍率"].astype(int)

In [58]:
result.to_csv("../data/%s.csv" % datetime.now().strftime("%Y%m%d_%H%M%S"), encoding="utf-8", index=False)