<h2>导入包</h2>

In [58]:
import logging
from datetime import datetime
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from lightgbm import Dataset


logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

<h2>读取数据</h2>

In [59]:
x_train = pd.read_csv("../data/A榜-训练集_分布式光伏发电预测_气象变量数据.csv", encoding="gbk")
y_train = pd.read_csv("../data/A榜-训练集_分布式光伏发电预测_实际功率数据.csv", encoding="gbk")
info_train = pd.read_csv("../data/A榜-训练集_分布式光伏发电预测_基本信息.csv", encoding="gbk")
x_test = pd.read_csv("../data/A榜-测试集_分布式光伏发电预测_气象变量数据.csv", encoding="gbk")
y_test = pd.read_csv("../data/submit_example.csv", encoding="utf-8")
info_test = pd.read_csv("../data/A榜-测试集_分布式光伏发电预测_基本信息.csv", encoding="gbk")

In [60]:
x_train = pd.merge(x_train, info_train[["光伏用户编号", "装机容量(kW)", "经度", "纬度"]], how="left", on="光伏用户编号")
x_train["时间"] = pd.to_datetime(x_train["时间"])
x_test = pd.merge(x_test, info_test[["光伏用户编号", "装机容量(kW)", "经度", "纬度"]], how="left", on="光伏用户编号")
x_test["时间"] = pd.to_datetime(x_test["时间"])

In [61]:
y_train = y_train.set_index(["光伏用户编号", "综合倍率", "时间"]).stack().reset_index().rename(columns={0:"target"})
y_train["level_3"] = y_train["level_3"].apply(lambda x: int(x[1:]))
y_train["时间"] = pd.to_datetime(y_train["时间"])
y_train["时间"] = y_train["时间"] + (y_train["level_3"] - 1) * 15 * pd.Timedelta(1, unit="minutes")
y_train = y_train.drop(columns=["level_3"])

y_test = y_test.set_index(["光伏用户编号", "综合倍率", "时间"]).stack().reset_index().rename(columns={0:"target"})
y_test["level_3"] = y_test["level_3"].apply(lambda x: int(x[1:]))
y_test["时间"] = pd.to_datetime(y_test["时间"])
y_test["时间"] = y_test["时间"] + (y_test["level_3"] - 1) * 15 * pd.Timedelta(1, unit="minutes")
y_test = y_test.drop(columns=["level_3"])

In [62]:
df_train = pd.merge(x_train, y_train, on=["光伏用户编号", "时间"], how="left")
df_test = pd.merge(x_test, y_test, on=["光伏用户编号", "时间"], how="left")

<h2>特征工程</h2>

<h3>时间特征</h3>

In [63]:
df_train["年"] = df_train["时间"].dt.year
df_train["月"] = df_train["时间"].dt.month
df_train["日"] = df_train["时间"].dt.day
df_train["分"] = df_train["时间"].dt.minute // 15 + df_train["时间"].dt.hour * 4
df_train = df_train.drop(columns=["时间"])

df_test["年"] = df_test["时间"].dt.year
df_test["月"] = df_test["时间"].dt.month
df_test["日"] = df_test["时间"].dt.day
df_test["分"] = df_test["时间"].dt.minute // 15 + df_test["时间"].dt.hour * 4
df_test = df_test.drop(columns=["时间"])

<h2>训练模型</h2>

<h3>评测指标</h3>

In [64]:
def score(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    return 1 / (1 + rmse)

<h3>lightgbm模型</h3>

In [65]:
params_lgb = {
    'learning_rate': 0.02,
    'boosting_type': 'gbdt',
    'objective': 'mse',
    'metric': 'mse',
    'num_leaves': 64,
    'verbose': -1,
    'seed': 42,
    'n_jobs': -1,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.9,
    'bagging_freq': 4,
}

In [66]:
model_lgb = dict()
kfold = KFold(n_splits=5, random_state=42, shuffle=True)
for site, df in df_train.groupby("光伏用户编号"):
    model_lgb[site] = []
    df = df.drop(columns=["光伏用户编号"]).astype(np.float32)
    y = df.pop("target")
    df["辐照强度（J/m2） - 1"] = df["辐照强度（J/m2）"].shift(1)
    for fold, (train_index, val_index) in enumerate(kfold.split(df, y)):
        logging.info(f'############ site: {site} fold: {fold} ###########')
        x_train, x_val, y_train, y_val = df.iloc[train_index], df.iloc[val_index], y.iloc[train_index], y.iloc[val_index]
        trainset = Dataset(x_train, y_train)
        valset = Dataset(x_val, y_val)
        model = lgb.train(params_lgb, trainset, valid_sets=[trainset, valset], num_boost_round=5000, callbacks=[lgb.early_stopping(100), lgb.log_evaluation(1000)])
        model.save_model("../models/lgb_%s_%d.txt" % (site, fold))
        model_lgb[site].append(model)

2024-03-07 12:18:21,236 : INFO : ############ site: f1 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00820665	valid_1's l2: 0.0192935
[2000]	training's l2: 0.00422071	valid_1's l2: 0.0184285
[3000]	training's l2: 0.00253238	valid_1's l2: 0.0182106
Early stopping, best iteration is:
[3338]	training's l2: 0.00217332	valid_1's l2: 0.0181561


2024-03-07 12:18:42,064 : INFO : ############ site: f1 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00787395	valid_1's l2: 0.0223177
[2000]	training's l2: 0.00404532	valid_1's l2: 0.0210572
[3000]	training's l2: 0.00242543	valid_1's l2: 0.0206658
[4000]	training's l2: 0.00156003	valid_1's l2: 0.0205088
Early stopping, best iteration is:
[4301]	training's l2: 0.00138204	valid_1's l2: 0.0204609


2024-03-07 12:19:08,597 : INFO : ############ site: f1 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00785752	valid_1's l2: 0.0212818
[2000]	training's l2: 0.00402966	valid_1's l2: 0.0202693
Early stopping, best iteration is:
[2427]	training's l2: 0.00320583	valid_1's l2: 0.0200676


2024-03-07 12:19:23,904 : INFO : ############ site: f1 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00813496	valid_1's l2: 0.0202983
[2000]	training's l2: 0.00425458	valid_1's l2: 0.0190779
[3000]	training's l2: 0.00253769	valid_1's l2: 0.018721
Early stopping, best iteration is:
[3229]	training's l2: 0.00227302	valid_1's l2: 0.018676


2024-03-07 12:19:44,042 : INFO : ############ site: f1 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00803221	valid_1's l2: 0.0212217
[2000]	training's l2: 0.00416171	valid_1's l2: 0.0201222
Early stopping, best iteration is:
[2472]	training's l2: 0.00322399	valid_1's l2: 0.019913


2024-03-07 12:19:59,639 : INFO : ############ site: f2 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0112906	valid_1's l2: 0.0300409
[2000]	training's l2: 0.005865	valid_1's l2: 0.0283253
[3000]	training's l2: 0.00355037	valid_1's l2: 0.0279301
Early stopping, best iteration is:
[3084]	training's l2: 0.00341829	valid_1's l2: 0.0279227


2024-03-07 12:20:18,885 : INFO : ############ site: f2 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0116957	valid_1's l2: 0.0278872
[2000]	training's l2: 0.00600385	valid_1's l2: 0.0265091
[3000]	training's l2: 0.0036457	valid_1's l2: 0.0261744
Early stopping, best iteration is:
[3522]	training's l2: 0.00288923	valid_1's l2: 0.0261023


2024-03-07 12:20:40,762 : INFO : ############ site: f2 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0117736	valid_1's l2: 0.0268406
[2000]	training's l2: 0.0061234	valid_1's l2: 0.0255332
Early stopping, best iteration is:
[2190]	training's l2: 0.00551973	valid_1's l2: 0.0254132


2024-03-07 12:20:54,645 : INFO : ############ site: f2 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0114656	valid_1's l2: 0.0281426
[2000]	training's l2: 0.00600017	valid_1's l2: 0.0269912
Early stopping, best iteration is:
[2672]	training's l2: 0.00422369	valid_1's l2: 0.0267561


2024-03-07 12:21:11,385 : INFO : ############ site: f2 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0115555	valid_1's l2: 0.0283445
[2000]	training's l2: 0.00605398	valid_1's l2: 0.0267632
[3000]	training's l2: 0.00363071	valid_1's l2: 0.0264105
Early stopping, best iteration is:
[3059]	training's l2: 0.00353465	valid_1's l2: 0.0263983


2024-03-07 12:21:30,399 : INFO : ############ site: f3 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0183342	valid_1's l2: 0.0437345
[2000]	training's l2: 0.00942643	valid_1's l2: 0.0419117
Early stopping, best iteration is:
[2537]	training's l2: 0.00706444	valid_1's l2: 0.0416717


2024-03-07 12:21:46,299 : INFO : ############ site: f3 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0180631	valid_1's l2: 0.0484177
[2000]	training's l2: 0.0094236	valid_1's l2: 0.045816
[3000]	training's l2: 0.00568013	valid_1's l2: 0.0452122
Early stopping, best iteration is:
[3252]	training's l2: 0.00505948	valid_1's l2: 0.045114


2024-03-07 12:22:06,447 : INFO : ############ site: f3 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0178434	valid_1's l2: 0.046959
[2000]	training's l2: 0.00926453	valid_1's l2: 0.0448521
Early stopping, best iteration is:
[2637]	training's l2: 0.0065384	valid_1's l2: 0.0443163


2024-03-07 12:22:22,930 : INFO : ############ site: f3 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0183582	valid_1's l2: 0.0463858
[2000]	training's l2: 0.00951857	valid_1's l2: 0.0432279
[3000]	training's l2: 0.00571999	valid_1's l2: 0.0421501
[4000]	training's l2: 0.00371112	valid_1's l2: 0.0417324
Early stopping, best iteration is:
[4188]	training's l2: 0.00344627	valid_1's l2: 0.0416991


2024-03-07 12:22:48,680 : INFO : ############ site: f3 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0184801	valid_1's l2: 0.0428629
[2000]	training's l2: 0.00950914	valid_1's l2: 0.0404742
Early stopping, best iteration is:
[2540]	training's l2: 0.00718152	valid_1's l2: 0.0399974


2024-03-07 12:23:05,014 : INFO : ############ site: f4 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00787481	valid_1's l2: 0.0228083


2024-03-07 12:23:14,973 : INFO : ############ site: f4 fold: 1 ###########


Early stopping, best iteration is:
[1504]	training's l2: 0.00548169	valid_1's l2: 0.0225229
Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00808025	valid_1's l2: 0.0196578


2024-03-07 12:23:24,629 : INFO : ############ site: f4 fold: 2 ###########


Early stopping, best iteration is:
[1488]	training's l2: 0.0057536	valid_1's l2: 0.0193067
Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00817837	valid_1's l2: 0.0200161
[2000]	training's l2: 0.00425617	valid_1's l2: 0.019258
Early stopping, best iteration is:
[2328]	training's l2: 0.00355685	valid_1's l2: 0.0191605


2024-03-07 12:23:38,796 : INFO : ############ site: f4 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00771104	valid_1's l2: 0.0228328


2024-03-07 12:23:50,080 : INFO : ############ site: f4 fold: 4 ###########


Early stopping, best iteration is:
[1775]	training's l2: 0.00458116	valid_1's l2: 0.0224807
Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00810198	valid_1's l2: 0.0202108
Early stopping, best iteration is:
[1756]	training's l2: 0.00484875	valid_1's l2: 0.0195021


2024-03-07 12:24:01,229 : INFO : ############ site: f5 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00911935	valid_1's l2: 0.0207645
[2000]	training's l2: 0.00468931	valid_1's l2: 0.0185909
[3000]	training's l2: 0.0028547	valid_1's l2: 0.0180285
[4000]	training's l2: 0.00187891	valid_1's l2: 0.0178242
Early stopping, best iteration is:
[4112]	training's l2: 0.00179624	valid_1's l2: 0.0177939


2024-03-07 12:24:36,989 : INFO : ############ site: f5 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00907169	valid_1's l2: 0.0213366
[2000]	training's l2: 0.00469958	valid_1's l2: 0.0192362
[3000]	training's l2: 0.00287369	valid_1's l2: 0.0186244
[4000]	training's l2: 0.00189271	valid_1's l2: 0.0183244
[5000]	training's l2: 0.00130762	valid_1's l2: 0.0182122
Did not meet early stopping. Best iteration is:
[5000]	training's l2: 0.00130762	valid_1's l2: 0.0182122


2024-03-07 12:25:19,156 : INFO : ############ site: f5 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00879392	valid_1's l2: 0.0224644
[2000]	training's l2: 0.0045515	valid_1's l2: 0.0207837
[3000]	training's l2: 0.00276744	valid_1's l2: 0.0202886
Early stopping, best iteration is:
[3166]	training's l2: 0.0025675	valid_1's l2: 0.0202676


2024-03-07 12:25:47,033 : INFO : ############ site: f5 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00880836	valid_1's l2: 0.0211589
[2000]	training's l2: 0.0045402	valid_1's l2: 0.0194569
[3000]	training's l2: 0.00275477	valid_1's l2: 0.0189657
[4000]	training's l2: 0.00179864	valid_1's l2: 0.0187802
[5000]	training's l2: 0.0012435	valid_1's l2: 0.0186599
Did not meet early stopping. Best iteration is:
[5000]	training's l2: 0.0012435	valid_1's l2: 0.0186599


2024-03-07 12:26:29,124 : INFO : ############ site: f5 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00888643	valid_1's l2: 0.0221422
[2000]	training's l2: 0.00471477	valid_1's l2: 0.0202142
[3000]	training's l2: 0.00288515	valid_1's l2: 0.0195479
[4000]	training's l2: 0.00190593	valid_1's l2: 0.0192787
[5000]	training's l2: 0.00130866	valid_1's l2: 0.0191577
Did not meet early stopping. Best iteration is:
[5000]	training's l2: 0.00130866	valid_1's l2: 0.0191577


2024-03-07 12:27:11,366 : INFO : ############ site: f6 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000385939	valid_1's l2: 0.00941929
[2000]	training's l2: 0.000213283	valid_1's l2: 0.00936576
[3000]	training's l2: 0.000131667	valid_1's l2: 0.00934646
Early stopping, best iteration is:
[2959]	training's l2: 0.000133954	valid_1's l2: 0.00934617


2024-03-07 12:27:36,557 : INFO : ############ site: f6 fold: 1 ###########


Training until validation scores don't improve for 100 rounds


2024-03-07 12:27:43,438 : INFO : ############ site: f6 fold: 2 ###########


Early stopping, best iteration is:
[701]	training's l2: 0.00154611	valid_1's l2: 0.00108005
Training until validation scores don't improve for 100 rounds


2024-03-07 12:27:48,576 : INFO : ############ site: f6 fold: 3 ###########


Early stopping, best iteration is:
[490]	training's l2: 0.00186803	valid_1's l2: 0.00123062
Training until validation scores don't improve for 100 rounds


2024-03-07 12:27:53,048 : INFO : ############ site: f6 fold: 4 ###########


Early stopping, best iteration is:
[409]	training's l2: 0.0020995	valid_1's l2: 0.0012592
Training until validation scores don't improve for 100 rounds


2024-03-07 12:27:59,652 : INFO : ############ site: f7 fold: 0 ###########


Early stopping, best iteration is:
[665]	training's l2: 0.00161033	valid_1's l2: 0.00103778
Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.00051771	valid_1's l2: 0.00126851
[2000]	training's l2: 0.000273931	valid_1's l2: 0.00119317
[3000]	training's l2: 0.00016961	valid_1's l2: 0.00117537
Early stopping, best iteration is:
[3564]	training's l2: 0.000132597	valid_1's l2: 0.00116826


2024-03-07 12:28:29,050 : INFO : ############ site: f7 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000514053	valid_1's l2: 0.00127308
[2000]	training's l2: 0.000271988	valid_1's l2: 0.00119776
[3000]	training's l2: 0.000167514	valid_1's l2: 0.00117214
[4000]	training's l2: 0.00011051	valid_1's l2: 0.00116057
Early stopping, best iteration is:
[4143]	training's l2: 0.000104844	valid_1's l2: 0.00115974


2024-03-07 12:29:02,966 : INFO : ############ site: f7 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000512734	valid_1's l2: 0.00122974
[2000]	training's l2: 0.000271499	valid_1's l2: 0.00115256
[3000]	training's l2: 0.000168201	valid_1's l2: 0.00112982
Early stopping, best iteration is:
[3351]	training's l2: 0.000143113	valid_1's l2: 0.00112514


2024-03-07 12:29:30,947 : INFO : ############ site: f7 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000511112	valid_1's l2: 0.00133517
[2000]	training's l2: 0.000266464	valid_1's l2: 0.00125902
[3000]	training's l2: 0.000163082	valid_1's l2: 0.00123828
[4000]	training's l2: 0.000108118	valid_1's l2: 0.00122727
[5000]	training's l2: 7.39985e-05	valid_1's l2: 0.00122212
Did not meet early stopping. Best iteration is:
[5000]	training's l2: 7.39985e-05	valid_1's l2: 0.00122212


2024-03-07 12:30:11,152 : INFO : ############ site: f7 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000510374	valid_1's l2: 0.00121553
[2000]	training's l2: 0.000271606	valid_1's l2: 0.00114539
[3000]	training's l2: 0.000166647	valid_1's l2: 0.00112481
Early stopping, best iteration is:
[3524]	training's l2: 0.000133704	valid_1's l2: 0.00111851


2024-03-07 12:30:40,662 : INFO : ############ site: f8 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0106202	valid_1's l2: 0.0298357


2024-03-07 12:30:49,779 : INFO : ############ site: f8 fold: 1 ###########


Early stopping, best iteration is:
[1437]	training's l2: 0.00779055	valid_1's l2: 0.0294039
Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0111737	valid_1's l2: 0.0284388
[2000]	training's l2: 0.00592997	valid_1's l2: 0.0273717
Early stopping, best iteration is:
[2518]	training's l2: 0.0045222	valid_1's l2: 0.0271151


2024-03-07 12:31:05,526 : INFO : ############ site: f8 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0111622	valid_1's l2: 0.0265065
Early stopping, best iteration is:
[1714]	training's l2: 0.00694763	valid_1's l2: 0.0259482


2024-03-07 12:31:16,499 : INFO : ############ site: f8 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0107551	valid_1's l2: 0.0298965
[2000]	training's l2: 0.0056514	valid_1's l2: 0.0288319
Early stopping, best iteration is:
[2464]	training's l2: 0.00442731	valid_1's l2: 0.028692


2024-03-07 12:31:32,186 : INFO : ############ site: f8 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.0109703	valid_1's l2: 0.0289581
[2000]	training's l2: 0.00576559	valid_1's l2: 0.0283973
Early stopping, best iteration is:
[2412]	training's l2: 0.00467351	valid_1's l2: 0.0283115


2024-03-07 12:31:47,198 : INFO : ############ site: f9 fold: 0 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000483728	valid_1's l2: 0.00119432
[2000]	training's l2: 0.000253289	valid_1's l2: 0.00110226
[3000]	training's l2: 0.000156109	valid_1's l2: 0.00107602
[4000]	training's l2: 0.0001038	valid_1's l2: 0.00106681
[5000]	training's l2: 7.16055e-05	valid_1's l2: 0.00106259
Did not meet early stopping. Best iteration is:
[5000]	training's l2: 7.16055e-05	valid_1's l2: 0.00106259


2024-03-07 12:32:28,368 : INFO : ############ site: f9 fold: 1 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000504408	valid_1's l2: 0.00118034
[2000]	training's l2: 0.000265181	valid_1's l2: 0.00109624
[3000]	training's l2: 0.000162559	valid_1's l2: 0.00106995
Early stopping, best iteration is:
[3591]	training's l2: 0.000126886	valid_1's l2: 0.00106513


2024-03-07 12:32:58,466 : INFO : ############ site: f9 fold: 2 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000496794	valid_1's l2: 0.00113465
[2000]	training's l2: 0.000266079	valid_1's l2: 0.00106204
[3000]	training's l2: 0.000164187	valid_1's l2: 0.00104031
Early stopping, best iteration is:
[3840]	training's l2: 0.000115519	valid_1's l2: 0.00103199


2024-03-07 12:33:30,817 : INFO : ############ site: f9 fold: 3 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000499326	valid_1's l2: 0.00113853
[2000]	training's l2: 0.000261786	valid_1's l2: 0.00105482
[3000]	training's l2: 0.000160151	valid_1's l2: 0.00103599
[4000]	training's l2: 0.000105165	valid_1's l2: 0.00102658
Early stopping, best iteration is:
[4436]	training's l2: 8.8715e-05	valid_1's l2: 0.00102508


2024-03-07 12:34:07,903 : INFO : ############ site: f9 fold: 4 ###########


Training until validation scores don't improve for 100 rounds
[1000]	training's l2: 0.000494027	valid_1's l2: 0.00116099
[2000]	training's l2: 0.000261729	valid_1's l2: 0.00107825
[3000]	training's l2: 0.000160589	valid_1's l2: 0.00105469
[4000]	training's l2: 0.000106139	valid_1's l2: 0.0010466
Early stopping, best iteration is:
[4496]	training's l2: 8.69548e-05	valid_1's l2: 0.00104382


<h2>预测</h2>

In [67]:
for site, df in df_test.groupby("光伏用户编号"):
    df = df.drop(columns=["光伏用户编号"]).astype(np.float32)
    y = df.pop("target")
    df["辐照强度（J/m2） - 1"] = df["辐照强度（J/m2）"].shift(1)
    
    y_pred = np.zeros((df.shape[0], ))
    for model in model_lgb[site]:
        y_pred += model.predict(df, num_iteration=model.best_iteration)
    y_pred = y_pred / kfold.n_splits
    df_test.loc[df_test["光伏用户编号"] == site, "target"] = y_pred

In [68]:
df_test = df_test[["光伏用户编号", "综合倍率", "年", "月", "日", "分", "target"]]
df_test["时间"] = df_test["年"].astype(str) + "-" + df_test["月"].astype(str) + "-" + df_test["日"].astype(str) + " 0:00"
df_test["分"] = "p" + (df_test["分"] + 1).astype(str)
df_test = df_test.drop(columns=["年", "月", "日"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [69]:
result = pd.pivot(df_test, index=["光伏用户编号", "综合倍率", "时间"], columns="分", values="target").reset_index()
result = result[result["综合倍率"].notnull()]
result["综合倍率"] = result["综合倍率"].astype(int)

In [70]:
result.to_csv("../data/%s.csv" % datetime.now().strftime("%Y%m%d_%H%M%S"), encoding="utf-8", index=False)