In [10]:
import sys  
import json
import pandas as pd
import numpy as np
import lightgbm as lgb

sys.path.insert(0, '../')
import gbdt_forecast

In [None]:
params_path = '../params/params_eem2020_wind_example.json'
preprocessed_path = '../data/eem20/preprocessed/eem2020-preprocessed.csv'
output_path = './submission-0.csv'
trial_name = 'trial0'
model = 'lightgbm'
split_name = 'valid'

In [11]:
with open(params_path, 'r', encoding='utf-8') as file:
    params_json = json.loads(file.read())

In [38]:
sites = range(len(params_json['sites']))
splits = range(len(params_json['splits']['valid']))
alpha_q = np.arange(params_json['regression_params']['alpha_range'][0],
                    params_json['regression_params']['alpha_range'][1],
                    params_json['regression_params']['alpha_range'][2])

gbm_split = []
for split in splits:    
    gbm_site = []
    for site in sites:
        gbm_q = {}
        for alpha in alpha_q:
            file_name = '../result/eem20/'+trial_name+'/gbm_model/gbm_model_'+model+'_q_quantile{0:.2f}_split_{1}_site_{2}.txt'.format(alpha, split, site)
            if model == 'lightgbm': 
                gbm = lgb.Booster(model_file=file_name)
            elif model == 'catboost':
                gbm = cb.CatBoostRegressor().load_model(file_name)
            gbm_q['quantile{0:.2f}'.format(alpha)] = gbm
        gbm_site.append(gbm_q)
    gbm_split.append(gbm_site)

In [40]:
df_X = pd.read_csv('../data/eem20/preprocessed/eem2020-preprocessed.csv', index_col=[0,1], header=[0,1])

In [41]:
trial = gbdt_forecast.Trial(params_json)

In [65]:
dfs_y_pred = trial.predict_model_split_site([[df_X['SE1'], df_X['SE2'], df_X['SE3'], df_X['SE4']]], {model: gbm_split})

  0%|          | 0/4 [00:00<?, ?it/s]

Predicting...


100%|██████████| 4/4 [00:02<00:00,  1.94it/s]


In [80]:
df_pred = pd.concat(dfs_y_pred[model][0], axis=1)
df_pred.columns = [f'SE{site_id+1}_quantile{q_id+1}' for q_id in range(len(dfs_y_pred[model][0][0].columns)) for site_id in sites]
df_pred = df_pred.droplevel(level=0, axis=0)

In [81]:
df_pred.to_csv("./submission-0.csv", header=True)

In [82]:
df_pred.head()

Unnamed: 0_level_0,SE1_quantile1,SE2_quantile1,SE3_quantile1,SE4_quantile1,SE1_quantile2,SE2_quantile2,SE3_quantile2,SE4_quantile2,SE1_quantile3,SE2_quantile3,...,SE3_quantile97,SE4_quantile97,SE1_quantile98,SE2_quantile98,SE3_quantile98,SE4_quantile98,SE1_quantile99,SE2_quantile99,SE3_quantile99,SE4_quantile99
valid_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-02 00:00:00,2.625833,8.032666,10.299038,13.524069,15.488036,17.436348,19.230108,20.182856,22.811934,25.310804,...,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
2000-01-02 01:00:00,2.625833,7.494488,10.299038,13.524069,15.488036,17.849544,19.230108,20.182856,21.949742,22.811934,...,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
2000-01-02 02:00:00,2.625833,8.032666,10.546276,13.524069,15.457736,18.021248,19.230108,20.40711,22.678376,22.811934,...,976.472435,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
2000-01-02 03:00:00,2.625833,8.032666,10.546276,13.657336,15.116212,17.658164,19.041101,21.853178,22.52437,22.737912,...,949.37762,968.851326,972.809635,993.581721,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
2000-01-02 04:00:00,2.625833,7.494488,10.546276,13.657336,15.116212,17.283963,19.041101,22.028306,22.207718,22.292007,...,966.215947,972.412788,992.95946,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
