In [None]:
!pip install pycaret[full]

In [15]:
from pycaret.regression import *

import pickle
import warnings
import pandas as pd
import random
import os
import numpy as np
import torch
import torchvision

In [8]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything(42) # Seed 고정

In [10]:
def lg_nrmse(gt, preds):
    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    all_nrmse = []
    for idx in range(14): # ignore 'ID'
        rmse = metrics.mean_squared_error(gt[:,idx], preds[:,idx], squared=False)
        nrmse = rmse/np.mean(np.abs(gt[:,idx]))
        all_nrmse.append(nrmse)
    score = 1.2 * np.sum(all_nrmse[:8]) + 1.0 * np.sum(all_nrmse[8:15])
    return score
def lg_nrmse_12(gt, preds):
    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    rmse = metrics.mean_squared_error(gt, preds, squared=False)
    nrmse = rmse/np.mean(np.abs(gt))
    
    return nrmse * 1.2

def lg_nrmse_10(gt, preds):
    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    rmse = metrics.mean_squared_error(gt, preds, squared=False)
    nrmse = rmse/np.mean(np.abs(gt))
    
    return nrmse

In [13]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [14]:
train_df = pd.read_csv('train.csv')

In [17]:
train_x = train_df.filter(regex='X') # Input : X Featrue
train_y = train_df.filter(regex='Y') # Output : Y Feature

test_x = pd.read_csv('./test.csv').drop(columns=['ID'])

In [18]:
submit = pd.read_csv('./sample_submission.csv')
best_models_dic = {}
tuned_best_models_dic = {}

for col in train_y.columns:
    train_tmp = pd.concat([train_x, train_y[col]], axis=1)

    reg = setup(data=train_tmp,
                target=col,
                session_id = 42,
                use_gpu = True,
                normalize = True,
                train_size = 0.8,
                silent= True)
                # fold_shuffle = True,
        
    best_models = compare_models(sort='RMSE', n_select=3, fold=5)
    best_models_dic[col] = best_models
    
    tuned_best_models = [tune_model(model, optimize='RMSE', early_stopping=True, fold=5) for model in best_models]
    tuned_best_models_dic[col] = tuned_best_models
    
    bagged_models = []
    for model in tuned_best_models:
        bagged = ensemble_model(model, method='Bagging')
        bagged_models.append(bagged)
        
    blender = blend_models(estimator_list=bagged_models, optimize='RMSE', fold=5)
    
    final_model = finalize_model(blender)
    pred = predict_model(final_model, data=test_x)
    
    submit[col] = pred['Label']
    pred['Label'].to_csv(f'{col}_pred.csv', index=False)
        
submit.to_csv('pycaret_tune_bag_ens.csv', index=False)

with open('pycaret_tune_bag_ens_bm.pickle', 'wb') as file:
    pickle.dump(best_models_dic, file, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('pycaret_tune_bag_ens_tbm.pickle', 'wb') as file:
    pickle.dump(tuned_best_models_dic, file, protocol=pickle.HIGHEST_PROTOCOL)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.4958,0.4033,0.635,0.0779,0.0232,0.0188
1,0.4869,0.3931,0.627,0.0898,0.0229,0.0185
2,0.4857,0.388,0.6229,0.0893,0.0227,0.0184
3,0.4838,0.3917,0.6258,0.0804,0.0228,0.0184
4,0.4843,0.3862,0.6214,0.0915,0.0227,0.0184
Mean,0.4873,0.3924,0.6264,0.0858,0.0229,0.0185
Std,0.0044,0.0059,0.0047,0.0055,0.0002,0.0002




In [30]:
pycaret_sample_submit = pd.read_csv("./sample_submission.csv")["ID"]
for i in range(1,15):
    temp_Y = pd.read_csv("Y_%02d_pred.csv"%i)
    pycaret_sample_submit = pd.concat([pycaret_sample_submit, temp_Y], axis=1)
pycaret_sample_submit.columns = ['ID','Y_01', 'Y_02', 'Y_03','Y_04','Y_05', 'Y_06', 'Y_07','Y_08','Y_09', 'Y_10', 'Y_11','Y_12','Y_13', 'Y_14']
pycaret_sample_submit.to_csv("pycaret_sample_submit.csv",index=False)
pycaret_sample_submit

Unnamed: 0,ID,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,TEST_00001,1.409730,1.172698,1.089530,13.798788,31.320076,16.572851,3.142963,-26.168288,-26.191247,-22.278453,24.430099,-26.117312,-26.101221,-26.143502
1,TEST_00002,1.446926,1.194214,1.111366,13.592058,30.955498,16.517858,3.145595,-26.168065,-26.173588,-22.279146,24.374033,-26.111013,-26.105807,-26.104747
2,TEST_00003,1.419968,1.137048,1.094225,14.833265,31.822797,16.737695,3.062893,-25.945240,-25.932582,-22.159965,24.556532,-25.907577,-25.880051,-25.883685
3,TEST_00004,1.425591,1.139525,1.048436,14.950840,32.519808,16.942631,3.044718,-25.734597,-25.759496,-21.947959,24.825821,-25.693470,-25.692414,-25.713051
4,TEST_00005,1.331761,1.004513,0.953266,14.853179,31.698418,16.799474,3.120202,-25.852467,-25.864392,-22.250719,24.664609,-25.801416,-25.802786,-25.793909
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,TEST_39604,1.269808,0.964473,0.989906,13.254155,31.287604,16.660730,3.159219,-26.475499,-26.472326,-22.741711,24.366498,-26.404711,-26.403818,-26.412686
39604,TEST_39605,1.255943,0.916197,0.961712,13.736527,31.390949,16.683605,3.171051,-26.429021,-26.429576,-22.723891,24.401349,-26.365643,-26.356077,-26.367564
39605,TEST_39606,1.246997,0.925742,0.962441,13.239913,31.219130,16.619165,3.172664,-26.513285,-26.495953,-22.748871,24.286994,-26.446237,-26.443042,-26.462309
39606,TEST_39607,1.233771,0.888382,0.936463,13.391755,31.282205,16.643478,3.191145,-26.473753,-26.465405,-22.757936,24.383439,-26.420387,-26.407569,-26.428093


In [31]:
pycaret_sample_submit = pd.read_csv("./sample_submission.csv")["ID"]
for i in range(1,15):
    temp_Y = pd.read_csv("Y_%02d_pred.csv"%i)
    pycaret_sample_submit = pd.concat([pycaret_sample_submit, temp_Y], axis=1)
pycaret_sample_submit.columns = ['ID','Y_01', 'Y_02', 'Y_03','Y_04','Y_05', 'Y_06', 'Y_07','Y_08','Y_09', 'Y_10', 'Y_11','Y_12','Y_13', 'Y_14']
pycaret_sample_submit = pycaret_sample_submit.round(2)
pycaret_sample_submit.to_csv("pycaret_sample_submit_ROUND(2).csv",index=False)
pycaret_sample_submit

Unnamed: 0,ID,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,TEST_00001,1.41,1.17,1.09,13.80,31.32,16.57,3.14,-26.17,-26.19,-22.28,24.43,-26.12,-26.10,-26.14
1,TEST_00002,1.45,1.19,1.11,13.59,30.96,16.52,3.15,-26.17,-26.17,-22.28,24.37,-26.11,-26.11,-26.10
2,TEST_00003,1.42,1.14,1.09,14.83,31.82,16.74,3.06,-25.95,-25.93,-22.16,24.56,-25.91,-25.88,-25.88
3,TEST_00004,1.43,1.14,1.05,14.95,32.52,16.94,3.04,-25.73,-25.76,-21.95,24.83,-25.69,-25.69,-25.71
4,TEST_00005,1.33,1.00,0.95,14.85,31.70,16.80,3.12,-25.85,-25.86,-22.25,24.66,-25.80,-25.80,-25.79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,TEST_39604,1.27,0.96,0.99,13.25,31.29,16.66,3.16,-26.48,-26.47,-22.74,24.37,-26.40,-26.40,-26.41
39604,TEST_39605,1.26,0.92,0.96,13.74,31.39,16.68,3.17,-26.43,-26.43,-22.72,24.40,-26.37,-26.36,-26.37
39605,TEST_39606,1.25,0.93,0.96,13.24,31.22,16.62,3.17,-26.51,-26.50,-22.75,24.29,-26.45,-26.44,-26.46
39606,TEST_39607,1.23,0.89,0.94,13.39,31.28,16.64,3.19,-26.47,-26.47,-22.76,24.38,-26.42,-26.41,-26.43
