In [2]:
import os
import sys
import glob

import numpy as np
import pandas as pd

import matplotlib.pylab as plt
import seaborn as sns

from tqdm import tqdm
from itertools import cycle

from scipy import stats
from sklearn.metrics import mean_absolute_error as mae

from sklearn import metrics
from sklearn import model_selection
from sklearn import preprocessing
from sklearn import linear_model
from sklearn import ensemble
from sklearn import decomposition
from sklearn import tree
from sklearn import svm
import category_encoders as ce

import lightgbm as lgb
import xgboost as xgb
import catboost as cat

pd.set_option("display.max_columns", None)

plt.style.use("ggplot")
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])

import warnings
warnings.filterwarnings('ignore')

In [4]:
INPUT_PATH = "../input"
OUTPUT_PATH = "./"

In [5]:
train_df = pd.read_csv(f"{INPUT_PATH}/train.csv")
test_df = pd.read_csv(f"{INPUT_PATH}/test.csv")
ss_df = pd.read_csv(f"{INPUT_PATH}/sample_submission.csv")

#train_df = train_df.apply(lambda x: "_".join(x.astype(str).str.lower().split(' ')))
train_df.columns = ['_'.join(col.lower().split()) for col in train_df.columns.values]
test_df.columns = ['_'.join(col.lower().split()) for col in test_df.columns.values]

train_df = train_df[~((train_df['month'] == 2) & (train_df['day'] == 29))].reset_index(drop=True)
train_df['cloud_type'] = train_df['cloud_type'].replace({10:np.nan}).fillna(method="ffill")

train_df['fill_flag'] = train_df['fill_flag'].apply(lambda x: x if x in [0, 1, 3, 4] else 0)
test_df['fill_flag'] = test_df['fill_flag'].apply(lambda x: x if x in [0, 1, 3, 4] else 0)

tmp_train_df = train_df[~train_df.hour.isin([i for i in range(1, 11)])]

train_df.shape, test_df.shape, ss_df.shape

((175200, 18), (17520, 18), (17520, 3))

In [6]:
tmp_test_df = train_df.copy()

In [7]:
cat_mr_final_oofs_1 = np.load("weights/cat/MR_CAT_F_good_cv_586_lb_374_oofs_predict_dict.npy", allow_pickle=True).tolist()
cat_mr_final_oofs_2 = np.load("weights/cat/MR_CAT_F_good_cv_511_lb_367_oofs_predict_dict.npy", allow_pickle=True).tolist()
cat_mr_final_oofs_1.update(cat_mr_final_oofs_2)
cat_mr_final_oofs = cat_mr_final_oofs_1

cat_mr_final_test_1 = np.load("weights/cat/MR_CAT_F_good_cv_586_lb_374_test_predict_dict.npy", allow_pickle=True).tolist()
cat_mr_final_test_2 = np.load("weights/cat/MR_CAT_F_good_cv_511_lb_367_test_predict_dict.npy", allow_pickle=True).tolist()
cat_mr_final_test_1.update(cat_mr_final_test_2)
cat_mr_final_test = cat_mr_final_test_1

train_df[['cat_mr_clearsky_dhi', 'cat_mr_clearsky_dni', 'cat_mr_clearsky_ghi']] = np.concatenate(np.array(list(cat_mr_final_oofs.values())), axis=0)
tmp_test_df[['cat_mr_clearsky_dhi', 'cat_mr_clearsky_dni', 'cat_mr_clearsky_ghi']] = np.concatenate(np.array(list(cat_mr_final_test.values())), axis=0)

In [8]:
cat_mr_v1_final_oofs = np.load("weights/cat/MR_CAT_V1_cv_547_lb_363_oofs_predict_dict.npy", allow_pickle=True).tolist()
cat_mr_v1_final_test = np.load("weights/cat/MR_CAT_V1_cv_547_lb_363_test_predict_dict.npy", allow_pickle=True).tolist()

train_df[['cat_mr_v1_clearsky_dhi', 'cat_mr_v1_clearsky_dni', 'cat_mr_v1_clearsky_ghi']] = np.concatenate(np.array(list(cat_mr_v1_final_oofs.values())), axis=0)
tmp_test_df[['cat_mr_v1_clearsky_dhi', 'cat_mr_v1_clearsky_dni', 'cat_mr_v1_clearsky_ghi']] = np.concatenate(np.array(list(cat_mr_v1_final_test.values())), axis=0)

In [9]:
cat_mr_v2_final_oofs = np.load("weights/cat/MR_CAT_V1_cv_550_lb_371_oofs_predict_dict.npy", allow_pickle=True).tolist()
cat_mr_v2_final_test = np.load("weights/cat/MR_CAT_V1_cv_550_lb_371_test_predict_dict.npy", allow_pickle=True).tolist()

train_df[['cat_mr_v2_clearsky_dhi', 'cat_mr_v2_clearsky_dni', 'cat_mr_v2_clearsky_ghi']] = np.concatenate(np.array(list(cat_mr_v2_final_oofs.values())), axis=0)
tmp_test_df[['cat_mr_v2_clearsky_dhi', 'cat_mr_v2_clearsky_dni', 'cat_mr_v2_clearsky_ghi']] = np.concatenate(np.array(list(cat_mr_v2_final_test.values())), axis=0)

In [11]:
cat_final_oofs = np.load("weights/cat/CAT_F_cv_546_lb_373_oofs_predict_dict.npy", allow_pickle=True).tolist()
cat_final_test = np.load("weights/cat/CAT_F_cv_546_lb_373_test_predict_dict.npy", allow_pickle=True).tolist()

train_df[['cat_clearsky_dhi', 'cat_clearsky_dni', 'cat_clearsky_ghi']] = np.concatenate(np.array(list(cat_final_oofs.values())), axis=1).T
tmp_test_df[['cat_clearsky_dhi', 'cat_clearsky_dni', 'cat_clearsky_ghi']] = np.concatenate(np.array(list(cat_final_test.values())), axis=1).T

In [15]:
cat_final_v1_oofs = np.load("weights/cat/CAT_V1_cv_554_lb_372_oofs_predict_dict.npy", allow_pickle=True).tolist()
cat_final_v1_test = np.load("weights/cat/CAT_V1_cv_554_lb_372_test_predict_dict.npy", allow_pickle=True).tolist()

train_df[['cat_v1_clearsky_dhi', 'cat_v1_clearsky_dni', 'cat_v1_clearsky_ghi']] = np.concatenate(np.array(list(cat_final_v1_oofs.values())), axis=1).T
tmp_test_df[['cat_v1_clearsky_dhi', 'cat_v1_clearsky_dni', 'cat_v1_clearsky_ghi']] = np.concatenate(np.array(list(cat_final_v1_test.values())), axis=1).T

In [16]:
cat_final_v2_oofs = np.load("weights/cat/CAT_FV2_cv_548_lb_384_oofs_predict_dict.npy", allow_pickle=True).tolist()
cat_final_v2_test = np.load("weights/cat/CAT_FV2_cv_548_lb_384_test_predict_dict.npy", allow_pickle=True).tolist()

train_df[['cat_v2_clearsky_dhi', 'cat_v2_clearsky_dni', 'cat_v2_clearsky_ghi']] = np.concatenate(np.array(list(cat_final_v2_oofs.values())), axis=1).T
tmp_test_df[['cat_v2_clearsky_dhi', 'cat_v2_clearsky_dni', 'cat_v2_clearsky_ghi']] = np.concatenate(np.array(list(cat_final_v2_test.values())), axis=1).T

In [18]:
lgb_final_oofs = np.load("weights/lgb/LGBM_F_r_s_cv_549_lb_369_oofs_predict_dict.npy", allow_pickle=True).tolist()
lgb_final_test = np.load("weights/lgb/LGBM_F_r_s_cv_549_lb_369_test_predict_dict.npy", allow_pickle=True).tolist()

train_df[['lgb_clearsky_dhi', 'lgb_clearsky_dni', 'lgb_clearsky_ghi']] = np.concatenate(np.array(list(lgb_final_oofs.values())), axis=1).T
tmp_test_df[['lgb_clearsky_dhi', 'lgb_clearsky_dni', 'lgb_clearsky_ghi']] = np.concatenate(np.array(list(lgb_final_test.values())), axis=1).T

In [19]:
lgb_final_v1_oofs = np.load("weights/lgb/LGBM_V1_cv_551_lb_363_oofs_predict_dict.npy", allow_pickle=True).tolist()
lgb_final_v1_test = np.load("weights/lgb/LGBM_V1_cv_551_lb_363_test_predict_dict.npy", allow_pickle=True).tolist()

train_df[['lgb_v1_clearsky_dhi', 'lgb_v1_clearsky_dni', 'lgb_v1_clearsky_ghi']] = np.concatenate(np.array(list(lgb_final_v1_oofs.values())), axis=1).T
tmp_test_df[['lgb_v1_clearsky_dhi', 'lgb_v1_clearsky_dni', 'lgb_v1_clearsky_ghi']] = np.concatenate(np.array(list(lgb_final_v1_test.values())), axis=1).T

In [20]:
lgb_final_v2_oofs = np.load("weights/lgb/LGBM_FV2_r_s_cv_556_lb_374_oofs_predict_dict.npy", allow_pickle=True).tolist()
lgb_final_v2_test = np.load("weights/lgb/LGBM_FV2_r_s_cv_556_lb_374_test_predict_dict.npy", allow_pickle=True).tolist()

train_df[['lgb_v2_clearsky_dhi', 'lgb_v2_clearsky_dni', 'lgb_v2_clearsky_ghi']] = np.concatenate(np.array(list(lgb_final_v2_oofs.values())), axis=1).T
tmp_test_df[['lgb_v2_clearsky_dhi', 'lgb_v2_clearsky_dni', 'lgb_v2_clearsky_ghi']] = np.concatenate(np.array(list(lgb_final_v2_test.values())), axis=1).T

In [21]:
lgb_hp_final_oofs = np.load("weights/lgb/LGBM_FHP_r_s_cv_530_lb_351_oofs_predict_dict.npy", allow_pickle=True).tolist()
lgb_hp_final_test = np.load("weights/lgb/LGBM_FHP_r_s_cv_530_lb_351_test_predict_dict.npy", allow_pickle=True).tolist()

train_df[['lgb_hp_clearsky_dhi', 'lgb_hp_clearsky_dni', 'lgb_hp_clearsky_ghi']] = np.concatenate(np.array(list(lgb_hp_final_oofs.values())), axis=1).T
tmp_test_df[['lgb_hp_clearsky_dhi', 'lgb_hp_clearsky_dni', 'lgb_hp_clearsky_ghi']] = np.concatenate(np.array(list(lgb_hp_final_test.values())), axis=1).T

In [22]:
xgb_final_oofs = np.load("weights/xgb/XGB_F_cv_555_lb_372_oofs_predict_dict.npy", allow_pickle=True).tolist()
xgb_final_test = np.load("weights/xgb/XGB_F_cv_555_lb_372_test_predict_dict.npy", allow_pickle=True).tolist()

train_df[['xgb_clearsky_dhi', 'xgb_clearsky_dni', 'xgb_clearsky_ghi']] = np.concatenate(np.array(list(xgb_final_oofs.values())), axis=1).T
tmp_test_df[['xgb_clearsky_dhi', 'xgb_clearsky_dni', 'xgb_clearsky_ghi']] = np.concatenate(np.array(list(xgb_final_test.values())), axis=1).T

In [23]:
xgb_v1_final_oofs = np.load("weights/xgb/XGB_V1_cv_549_lb_362_oofs_predict_dict.npy", allow_pickle=True).tolist()
xgb_v1_final_test = np.load("weights/xgb/XGB_V1_cv_549_lb_362_test_predict_dict.npy", allow_pickle=True).tolist()

train_df[['xgb_v1_clearsky_dhi', 'xgb_v1_clearsky_dni', 'xgb_v1_clearsky_ghi']] = np.concatenate(np.array(list(xgb_v1_final_oofs.values())), axis=1).T
tmp_test_df[['xgb_v1_clearsky_dhi', 'xgb_v1_clearsky_dni', 'xgb_v1_clearsky_ghi']] = np.concatenate(np.array(list(xgb_v1_final_test.values())), axis=1).T

In [24]:
xgb_v2_final_oofs = np.load("weights/xgb/CAT_V2_cv_553_lb_368_oofs_predict_dict.npy", allow_pickle=True).tolist()
xgb_v2_final_test = np.load("weights/xgb/CAT_V2_cv_553_lb_368_test_predict_dict.npy", allow_pickle=True).tolist()

train_df[['xgb_v2_clearsky_dhi', 'xgb_v2_clearsky_dni', 'xgb_v2_clearsky_ghi']] = np.concatenate(np.array(list(xgb_v2_final_oofs.values())), axis=1).T
tmp_test_df[['xgb_v2_clearsky_dhi', 'xgb_v2_clearsky_dni', 'xgb_v2_clearsky_ghi']] = np.concatenate(np.array(list(xgb_v2_final_test.values())), axis=1).T

In [26]:
def run_lgb(train_df, test_df, train_cols, target_cols, year):

    #train_df = train_df.query("year != 2011").reset_index(drop=True)

    oofs_predict_dict = {}
    test_predict_dict = {}

    oofs_scores_dict = {}
    oofs_lb_scores_dict = {}

    X_test = test_df[train_cols]

    for target_col in target_cols:
        
#         train_cols = TRAIN_COLS[target_col]
#         X_test = test_df[train_cols]

        # if year == 2011:
        #     continue

        print("="*100)
        print("YEAR : ", year)
        print("Target :", target_col)
        print("_"*100)

        X_train = train_df.query(f"year != {year}")[train_cols]
        y_train = train_df.query(f"year != {year}")[target_col]

        X_valid = train_df.query(f"year == {year}")[train_cols]
        y_valid = train_df.query(f"year == {year}")[target_col]

        print("Trian :", X_train.shape, y_train.shape)
        print("Valid :", X_valid.shape, y_valid.shape)


#         params = {
#             'iterations':10000,
#             'loss_function': 'RMSE',
#             'eval_metric' : 'RMSE',#MSEMetric(),
#             'task_type': 'CPU',

#         }
#         model = cat.CatBoostRegressor(**params)
#         model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], 
#                     early_stopping_rounds=200,
#                     verbose=100)
            

#         valid_predict = np.clip(model.predict(X_valid), a_min=0, a_max=None)
#         test_predict = np.clip(model.predict(X_test), a_min=0, a_max=None)
        
#         model = linear_model.ElasticNet()
        model = linear_model.LinearRegression()
        #model = svm.LinearSVR(fit_intercept=False)
        model.fit(X_train, y_train)
        
        valid_predict = model.predict(X_valid)
        test_predict = model.predict(X_test)

        oofs_predict_dict[f'{target_col}_{year}'] = valid_predict
        test_predict_dict[f'{target_col}_{year}'] = test_predict

        tmp_preds = np.zeros(len(X_test))
        tmp_true = np.zeros(len(X_test))
        tmp_preds[:len(valid_predict)] = valid_predict
        tmp_true[:len(y_valid)] = y_valid
        
        tmp_score = metrics.mean_squared_error(tmp_true, tmp_preds)
        print('TMP MSE', tmp_score)

        score = metrics.mean_squared_error(y_valid, valid_predict)
        oofs_scores_dict[f'{target_col}_{year}'] = score
        print('MSE :', score)

        lb_score = metrics.mean_squared_error(y_valid[-5256:], valid_predict[-5256:])
        oofs_lb_scores_dict[f'{target_col}_{year}'] = lb_score
        print('LB MSE :', lb_score)

#         fi = pd.Series(index = train_cols, data = model.feature_importances_)
#         fi.sort_values(ascending=False)[0:20][::-1].plot(kind = 'barh')
#         plt.show()

    score = np.mean(list(oofs_scores_dict.values()))
    print("OOFS MSE MEAN", score)

    lb_score = np.mean(list(oofs_lb_scores_dict.values()))
    print("OOFS LB MSE MEAN", lb_score)

    #oofs_mean = np.array(list(oofs_predict_dict.values())).mean(axis=0)
    #test_mean = np.array(list(test_predict_dict.values())).mean(axis=0)

    return score, lb_score, oofs_predict_dict, test_predict_dict

In [27]:
def run_lgbm_years(train_df, test_df, train_cols, target_cols):
    
    oofs_predict_dict_final = {}
    test_predict_dict_final = {}
    score_dict = {}
    lb_score_dict = {}
    oofs_predict_stack_dict = {}

    for year in range(2009, 2019):
        if year == 2011:
            continue
            
        print("=*"*100)
        print("Year :", year)
        print("-*"*100)
        score, lb_score, oofs_predict_dict, test_predict_dict = run_lgb(train_df, test_df, train_cols, target_cols, year)

        score_dict[year] = score
        lb_score_dict[year] = lb_score
        oofs_predict_dict_final[year] = np.array(list(oofs_predict_dict.values()))
        test_predict_dict_final[year] = np.array(list(test_predict_dict.values()))
        #oofs_predict_stack_dict[year] = oofs_predict_stack
    
    score_mean = np.mean(list(score_dict.values()))
    print("FINAL MAE", score_mean)

    lb_score_mean = np.mean(list(lb_score_dict.values()))
    print("FINAL LB MAE", lb_score_mean)

    return score_mean, lb_score_mean, oofs_predict_dict_final, test_predict_dict_final

In [28]:
def normalize_scaler(train_df, test_df, cols):
    ss = preprocessing.StandardScaler()
    ss.fit(train_df[cols])
    train_df[cols] = ss.transform(train_df[cols])
    test_df[cols] = ss.transform(test_df[cols])
    return train_df, test_df

In [29]:
train_df.columns[18:]

Index(['cat_mr_clearsky_dhi', 'cat_mr_clearsky_dni', 'cat_mr_clearsky_ghi',
       'cat_mr_v1_clearsky_dhi', 'cat_mr_v1_clearsky_dni',
       'cat_mr_v1_clearsky_ghi', 'cat_mr_v2_clearsky_dhi',
       'cat_mr_v2_clearsky_dni', 'cat_mr_v2_clearsky_ghi', 'cat_clearsky_dhi',
       'cat_clearsky_dni', 'cat_clearsky_ghi', 'cat_v1_clearsky_dhi',
       'cat_v1_clearsky_dni', 'cat_v1_clearsky_ghi', 'cat_v2_clearsky_dhi',
       'cat_v2_clearsky_dni', 'cat_v2_clearsky_ghi', 'lgb_clearsky_dhi',
       'lgb_clearsky_dni', 'lgb_clearsky_ghi', 'lgb_v1_clearsky_dhi',
       'lgb_v1_clearsky_dni', 'lgb_v1_clearsky_ghi', 'lgb_v2_clearsky_dhi',
       'lgb_v2_clearsky_dni', 'lgb_v2_clearsky_ghi', 'lgb_hp_clearsky_dhi',
       'lgb_hp_clearsky_dni', 'lgb_hp_clearsky_ghi', 'xgb_clearsky_dhi',
       'xgb_clearsky_dni', 'xgb_clearsky_ghi', 'xgb_v1_clearsky_dhi',
       'xgb_v1_clearsky_dni', 'xgb_v1_clearsky_ghi', 'xgb_v2_clearsky_dhi',
       'xgb_v2_clearsky_dni', 'xgb_v2_clearsky_ghi'],
      dtype='

In [30]:
drop_cols = ['year', 'clearsky_dhi', 'clearsky_dni', 'clearsky_ghi', 'fill_flag']
train_cols = [col for col in train_df.columns if col not in drop_cols]
train_cols = [
              'cat_mr_clearsky_dhi', 'cat_mr_clearsky_dni', 'cat_mr_clearsky_ghi',
              'cat_mr_v1_clearsky_dhi', 'cat_mr_v1_clearsky_dni', 'cat_mr_v1_clearsky_ghi',
              'cat_mr_v2_clearsky_dhi', 'cat_mr_v2_clearsky_dni', 'cat_mr_v2_clearsky_ghi',
            
              'cat_clearsky_dhi', 'cat_clearsky_dni', 'cat_clearsky_ghi',
              'cat_v1_clearsky_dhi', 'cat_v1_clearsky_dni', 'cat_v1_clearsky_ghi',
              'cat_v2_clearsky_dhi', 'cat_v2_clearsky_dni', 'cat_v2_clearsky_ghi',
    
              'lgb_clearsky_dhi', 'lgb_clearsky_dni', 'lgb_clearsky_ghi',
              'lgb_v1_clearsky_dhi', 'lgb_v1_clearsky_dni', 'lgb_v1_clearsky_ghi',
              'lgb_v2_clearsky_dhi', 'lgb_v2_clearsky_dni', 'lgb_v2_clearsky_ghi',
    
              'xgb_clearsky_dhi', 'xgb_clearsky_dni', 'xgb_clearsky_ghi',
              'xgb_v1_clearsky_dhi', 'xgb_v1_clearsky_dni', 'xgb_v1_clearsky_ghi',
              'xgb_v2_clearsky_dhi', 'xgb_v2_clearsky_dni', 'xgb_v2_clearsky_ghi',
    
              
              #'lgb_hp_clearsky_dhi', 'lgb_hp_clearsky_dni', 'lgb_hp_clearsky_ghi'
              #'cat_v2_clearsky_dhi', 'cat_v2_clearsky_dni', 'cat_v2_clearsky_ghi',
              #'lgb_v2_clearsky_dhi', 'lgb_v2_clearsky_dni', 'lgb_v2_clearsky_ghi'
              #'cat_mr_v1_clearsky_dhi', 'cat_mr_v1_clearsky_dni', 'cat_mr_v1_clearsky_ghi',
              
             ]
target_cols = ['clearsky_dhi', 'clearsky_dni', 'clearsky_ghi']

TRAIN_COLS = {
    "clearsky_dhi" : [
         'cat_mr_clearsky_dhi',
         'cat_clearsky_dhi',
         'lgb_clearsky_dhi',
         #'cat_v2_clearsky_dhi',
         #'lgb_v2_clearsky_dhi'
    ],
    
    "clearsky_dni" : [
         'cat_mr_clearsky_dni',
         'cat_clearsky_dni',
         'lgb_clearsky_dni',
         #'cat_v2_clearsky_dni',
         #'lgb_v2_clearsky_dni'
    ],
    
    "clearsky_ghi" : [
         'cat_mr_clearsky_ghi',
         'cat_clearsky_ghi',
         'lgb_clearsky_ghi',
         #'cat_v2_clearsky_ghi',
         #'lgb_v2_clearsky_ghi'
    ]
}

In [31]:
d_train_df = train_df[train_df['year'] != 2011].reset_index(drop=True)
d_test_df = tmp_test_df[tmp_test_df['year'] != 2011].reset_index(drop=True)

d_train_df.shape, d_test_df.shape

((157680, 57), (157680, 57))

In [32]:
score, lb_score, oofs_predict_dict_final, test_predict_dict_final = run_lgbm_years(d_train_df, d_test_df, train_cols, target_cols)

=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*
Year : 2009
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
YEAR :  2009
Target : clearsky_dhi
____________________________________________________________________________________________________
Trian : (140160, 36) (140160,)
Valid : (17520, 36) (17520,)
TMP MSE 24.152635105248077
MSE : 217.3737159472327
LB MSE : 84.20345351478105
YEAR :  2009
Target : clearsky_dni
____________________________________________________________________________________________________
Trian : (140160, 36) (140160,)
Valid : (17520, 36) (17520,)
TMP MSE 135.3606979372188
MSE : 1218.246281434969
LB MSE : 826.3536604885472
YEAR :  2009
Target : clearsky_ghi
____

In [35]:
#test_mean = np.array(list(test_predict_dict_final.values())).mean(axis=0).T.reshape(10, 17520, 3).mean(axis=0)
test_mean = np.array(list(test_predict_dict_final.values())).mean(axis=0).T.reshape(9, 17520, 3).mean(axis=0)

test_df[['clearsky_dhi', 'clearsky_dni', 'clearsky_ghi']] = test_mean
test_df.loc[test_df['solar_zenith_angle'] > 93][['clearsky_dhi', 'clearsky_dni', 'clearsky_ghi']] = 0
test_df.loc[test_df.hour.isin([i for i in range(1, 11)]), ['clearsky_dhi', 'clearsky_dni', 'clearsky_ghi']] = 0
test_df[['clearsky_dhi', 'clearsky_dni', 'clearsky_ghi']] = np.clip(test_df[['clearsky_dhi', 'clearsky_dni', 'clearsky_ghi']], a_min=0, a_max=None)

ss_df[['Clearsky DHI', 'Clearsky DNI', 'Clearsky GHI']] = test_df[['clearsky_dhi', 'clearsky_dni', 'clearsky_ghi']]

ss_df.to_csv("submissions/cat_mr_lgb_drop2011_stack_v5.csv", index=False)