### import

In [2]:
import pandas as pd
import random
import os
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import optuna
import lightgbm as lgb

### Fixed RandomSeed

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

### Data Load

In [4]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [4]:
train.head()

Unnamed: 0,ID,생산년도,모델출시년도,브랜드,차량모델명,판매도시,판매구역,주행거리,배기량,압축천연가스(CNG),경유,가솔린,하이브리드,액화석유가스(LPG),가격
0,TRAIN_00000,2018,2014,skoda,fabia,KAT,SLA,85231,999,0,0,1,0,0,51.74
1,TRAIN_00001,2010,2006,toyota,auris,RKO,SWI,135000,1598,0,0,1,0,0,41.47
2,TRAIN_00002,2002,2002,mercedes-benz,clk-klasa,GNI,WIE,255223,1796,0,0,1,0,0,17.81
3,TRAIN_00003,2006,2001,nissan,x-trail,EHX,WIE,238000,2184,0,1,0,0,0,18.2
4,TRAIN_00004,2007,2007,fiat,bravo,OSW,MAL,251000,1910,0,1,0,0,0,17.55


### Split

In [5]:
train_x = train.drop(['ID', '가격'], axis = 1)
train_y = train['가격']

test_x = test.drop('ID', axis = 1)

### Data Processing

In [6]:
ordinal_features = ['브랜드', '차량모델명', '판매도시', '판매구역']

for feature in ordinal_features:
    le = LabelEncoder()
    le = le.fit(train_x[feature])
    train_x[feature] = le.transform(train_x[feature])

    # train데이터에서 존재하지 않았던 값이 test 데이터에 존재할 수도 있습니다.
    # 따라서 test 데이터를 바로 변형시키지 않고 고유값을 확인후 test 데이터를 변환합니다.
    # Data Leakage를 발생시키지 않기 위함이니, 반드시 주의해주세요.
    for label in np.unique(test_x[feature]):
        if label not in le.classes_:
            le.classes_ = np.append(le.classes_, label)
    test_x[feature] = le.transform(test_x[feature])

### Train-Validation Split

In [7]:
X_train, X_val, y_train, y_val = train_test_split(train_x, train_y, test_size=0.2, random_state=42)

### LightGBM 데이터셋으로 변환

In [8]:
train_data = lgb.Dataset(X_train, label=y_train)

### Objective Function for Optuna

In [9]:
def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'mean_absolute_error',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'num_leaves': trial.suggest_int('num_leaves', 10, 200),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.1, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.1, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
        'verbose': 0,
        'random_state': 42
    }

    # LightGBM 모델 학습
    model = lgb.train(params, train_data, num_boost_round=100, valid_sets=[train_data], early_stopping_rounds=10, verbose_eval=False)

    # 검증 데이터에 대한 예측 수행
    y_pred = model.predict(X_val)

    # MAE 계산
    mae = mean_absolute_error(y_val, y_pred)
    return mae

### Optuna를 사용한 하이퍼파라미터 튜닝

In [10]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

[I 2023-06-13 02:10:02,848] A new study created in memory with name: no-name-36e2b59e-4e23-4697-97ff-b23b8984531c


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:03,680] Trial 0 finished with value: 7.005217065279506 and parameters: {'learning_rate': 0.23814700058746416, 'num_leaves': 82, 'feature_fraction': 0.6144363932068045, 'bagging_fraction': 0.15959120815834577, 'bagging_freq': 8}. Best is trial 0 with value: 7.005217065279506.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:04,355] Trial 1 finished with value: 6.639242525638719 and parameters: {'learning_rate': 0.09298980086357593, 'num_leaves': 38, 'feature_fraction': 0.5879477241609797, 'bagging_fraction': 0.8083029887432317, 'bagging_freq': 4}. Best is trial 1 with value: 6.639242525638719.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:05,190] Trial 2 finished with value: 6.220406227208393 and parameters: {'learning_rate': 0.251310043839845, 'num_leaves': 109, 'feature_fraction': 0.9865115155487136, 'bagging_fraction': 0.7492593436655943, 'bagging_freq': 7}. Best is trial 2 with value: 6.220406227208393.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:06,328] Trial 3 finished with value: 6.542214188012067 and parameters: {'learning_rate': 0.051404919834148785, 'num_leaves': 118, 'feature_fraction': 0.47197057495844086, 'bagging_fraction': 0.39183813060932604, 'bagging_freq': 5}. Best is trial 2 with value: 6.220406227208393.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:07,479] Trial 4 finished with value: 6.53226875898096 and parameters: {'learning_rate': 0.056364060400165894, 'num_leaves': 83, 'feature_fraction': 0.5887155400030812, 'bagging_fraction': 0.6398382809206645, 'bagging_freq': 6}. Best is trial 2 with value: 6.220406227208393.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:08,292] Trial 5 finished with value: 6.606356571835494 and parameters: {'learning_rate': 0.0957621954048358, 'num_leaves': 172, 'feature_fraction': 0.23952451867269778, 'bagging_fraction': 0.5453122345807484, 'bagging_freq': 8}. Best is trial 2 with value: 6.220406227208393.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:09,070] Trial 6 finished with value: 6.26424341614426 and parameters: {'learning_rate': 0.22184371788894813, 'num_leaves': 179, 'feature_fraction': 0.8638264179933606, 'bagging_fraction': 0.5986736106532866, 'bagging_freq': 10}. Best is trial 2 with value: 6.220406227208393.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:09,405] Trial 7 finished with value: 6.948297029819033 and parameters: {'learning_rate': 0.07248434859670734, 'num_leaves': 33, 'feature_fraction': 0.6497914553892513, 'bagging_fraction': 0.6965021146114072, 'bagging_freq': 8}. Best is trial 2 with value: 6.220406227208393.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:10,300] Trial 8 finished with value: 6.166518590569599 and parameters: {'learning_rate': 0.2256376610722448, 'num_leaves': 161, 'feature_fraction': 0.3323687000608009, 'bagging_fraction': 0.92163435379245, 'bagging_freq': 4}. Best is trial 8 with value: 6.166518590569599.
[I 2023-06-13 02:10:10,497] Trial 9 finished with value: 7.338257835371472 and parameters: {'learning_rate': 0.25936415577122396, 'num_leaves': 15, 'feature_fraction': 0.4664246076787719, 'bagging_fraction': 0.1011979970000148, 'bagging_freq': 8}. Best is trial 8 with value: 6.166518590569599.


You can set `force_col_wise=true` to remove the overhead.




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:11,223] Trial 10 finished with value: 7.824342298871974 and parameters: {'learning_rate': 0.17141406760768424, 'num_leaves': 146, 'feature_fraction': 0.12999152543683487, 'bagging_fraction': 0.9619782373194846, 'bagging_freq': 1}. Best is trial 8 with value: 6.166518590569599.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:12,428] Trial 11 finished with value: 6.175767756901818 and parameters: {'learning_rate': 0.28587363171263686, 'num_leaves': 134, 'feature_fraction': 0.9865959601356089, 'bagging_fraction': 0.9828699137508425, 'bagging_freq': 3}. Best is trial 8 with value: 6.166518590569599.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:13,866] Trial 12 finished with value: 6.225204459469729 and parameters: {'learning_rate': 0.29779866042119013, 'num_leaves': 198, 'feature_fraction': 0.8224722218164864, 'bagging_fraction': 0.9845703717120815, 'bagging_freq': 2}. Best is trial 8 with value: 6.166518590569599.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:15,672] Trial 13 finished with value: 6.160705066845903 and parameters: {'learning_rate': 0.2965426401898389, 'num_leaves': 144, 'feature_fraction': 0.3443133847758629, 'bagging_fraction': 0.8768466563706184, 'bagging_freq': 3}. Best is trial 13 with value: 6.160705066845903.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:17,063] Trial 14 finished with value: 6.198127682331521 and parameters: {'learning_rate': 0.20242900206030973, 'num_leaves': 154, 'feature_fraction': 0.31416372155058386, 'bagging_fraction': 0.8059326426366245, 'bagging_freq': 4}. Best is trial 13 with value: 6.160705066845903.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:18,606] Trial 15 finished with value: 6.223122684443719 and parameters: {'learning_rate': 0.19026922021424605, 'num_leaves': 165, 'feature_fraction': 0.3394927548583815, 'bagging_fraction': 0.8833466489931701, 'bagging_freq': 2}. Best is trial 13 with value: 6.160705066845903.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:19,542] Trial 16 finished with value: 8.077745610427302 and parameters: {'learning_rate': 0.14734450773590646, 'num_leaves': 130, 'feature_fraction': 0.11978529506844623, 'bagging_fraction': 0.8680565835235172, 'bagging_freq': 4}. Best is trial 13 with value: 6.160705066845903.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.






[I 2023-06-13 02:10:20,859] Trial 17 finished with value: 6.102948074811875 and parameters: {'learning_rate': 0.2687338445800219, 'num_leaves': 91, 'feature_fraction': 0.3806798367948581, 'bagging_fraction': 0.884150967005413, 'bagging_freq': 1}. Best is trial 17 with value: 6.102948074811875.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:22,014] Trial 18 finished with value: 6.1777905510761935 and parameters: {'learning_rate': 0.2732421217659805, 'num_leaves': 88, 'feature_fraction': 0.4545267585741852, 'bagging_fraction': 0.8188205899406797, 'bagging_freq': 1}. Best is trial 17 with value: 6.102948074811875.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:22,912] Trial 19 finished with value: 6.228475176737969 and parameters: {'learning_rate': 0.29127894137339966, 'num_leaves': 63, 'feature_fraction': 0.4025933343656285, 'bagging_fraction': 0.7147997128703748, 'bagging_freq': 2}. Best is trial 17 with value: 6.102948074811875.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:24,398] Trial 20 finished with value: 6.324570300488992 and parameters: {'learning_rate': 0.2616577050759644, 'num_leaves': 64, 'feature_fraction': 0.22415061817279208, 'bagging_fraction': 0.8866554168741031, 'bagging_freq': 3}. Best is trial 17 with value: 6.102948074811875.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:25,898] Trial 21 finished with value: 6.0194127883532875 and parameters: {'learning_rate': 0.22737065045291863, 'num_leaves': 196, 'feature_fraction': 0.3464657920169083, 'bagging_fraction': 0.9184133292453341, 'bagging_freq': 3}. Best is trial 21 with value: 6.0194127883532875.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:27,360] Trial 22 finished with value: 6.270728619184793 and parameters: {'learning_rate': 0.29893298413053715, 'num_leaves': 198, 'feature_fraction': 0.24402990535197044, 'bagging_fraction': 0.9145951558225762, 'bagging_freq': 1}. Best is trial 21 with value: 6.0194127883532875.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:28,102] Trial 23 finished with value: 6.054980496147606 and parameters: {'learning_rate': 0.26215706423555846, 'num_leaves': 95, 'feature_fraction': 0.35150543573024007, 'bagging_fraction': 0.9928420265420334, 'bagging_freq': 3}. Best is trial 21 with value: 6.0194127883532875.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:29,115] Trial 24 finished with value: 6.128073629215321 and parameters: {'learning_rate': 0.25028731289814704, 'num_leaves': 97, 'feature_fraction': 0.5103540764906311, 'bagging_fraction': 0.9979709974983251, 'bagging_freq': 2}. Best is trial 21 with value: 6.0194127883532875.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:29,909] Trial 25 finished with value: 6.006767144792921 and parameters: {'learning_rate': 0.22743734344098757, 'num_leaves': 117, 'feature_fraction': 0.41237358233998067, 'bagging_fraction': 0.9945029644392124, 'bagging_freq': 5}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:30,748] Trial 26 finished with value: 6.134463475934117 and parameters: {'learning_rate': 0.21359277054209685, 'num_leaves': 62, 'feature_fraction': 0.42847767194116854, 'bagging_fraction': 0.9827607248592762, 'bagging_freq': 5}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:33,101] Trial 27 finished with value: 6.088060412169043 and parameters: {'learning_rate': 0.23670143433253577, 'num_leaves': 114, 'feature_fraction': 0.5222133146691929, 'bagging_fraction': 0.9385443718470265, 'bagging_freq': 6}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:35,074] Trial 28 finished with value: 6.160961417521282 and parameters: {'learning_rate': 0.1998397562511965, 'num_leaves': 182, 'feature_fraction': 0.28944354457311156, 'bagging_fraction': 0.9984878279349163, 'bagging_freq': 5}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:36,069] Trial 29 finished with value: 6.042655936015539 and parameters: {'learning_rate': 0.2371903358668207, 'num_leaves': 130, 'feature_fraction': 0.3966862197021421, 'bagging_fraction': 0.8087715768501351, 'bagging_freq': 3}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:36,761] Trial 30 finished with value: 6.1194862815767745 and parameters: {'learning_rate': 0.23424423906907763, 'num_leaves': 122, 'feature_fraction': 0.4068961327856666, 'bagging_fraction': 0.7801198518804362, 'bagging_freq': 6}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:37,260] Trial 31 finished with value: 6.141941038501318 and parameters: {'learning_rate': 0.24570923304810502, 'num_leaves': 73, 'feature_fraction': 0.3927047996741421, 'bagging_fraction': 0.8386255207516907, 'bagging_freq': 3}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:37,879] Trial 32 finished with value: 6.204147619827184 and parameters: {'learning_rate': 0.231831498877376, 'num_leaves': 105, 'feature_fraction': 0.2848141054795372, 'bagging_fraction': 0.9300052804388034, 'bagging_freq': 4}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:38,705] Trial 33 finished with value: 6.173771920270874 and parameters: {'learning_rate': 0.2756109376203776, 'num_leaves': 103, 'feature_fraction': 0.5279219233874344, 'bagging_fraction': 0.8167643552626689, 'bagging_freq': 3}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:40,119] Trial 34 finished with value: 6.0392046637411045 and parameters: {'learning_rate': 0.24687694122857756, 'num_leaves': 134, 'feature_fraction': 0.3728884597473338, 'bagging_fraction': 0.9162422478719892, 'bagging_freq': 5}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:41,205] Trial 35 finished with value: 6.169207853917732 and parameters: {'learning_rate': 0.24495637184198318, 'num_leaves': 134, 'feature_fraction': 0.45481296061956655, 'bagging_fraction': 0.754455995014513, 'bagging_freq': 7}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:41,880] Trial 36 finished with value: 6.0811717599560415 and parameters: {'learning_rate': 0.21461718349214165, 'num_leaves': 121, 'feature_fraction': 0.5633394811637061, 'bagging_fraction': 0.8413504486997581, 'bagging_freq': 5}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:42,628] Trial 37 finished with value: 6.069354974014229 and parameters: {'learning_rate': 0.18125452483212, 'num_leaves': 144, 'feature_fraction': 0.4916912699778474, 'bagging_fraction': 0.931500619031155, 'bagging_freq': 7}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:43,443] Trial 38 finished with value: 6.134671918417704 and parameters: {'learning_rate': 0.21463241470792468, 'num_leaves': 183, 'feature_fraction': 0.4321984984992699, 'bagging_fraction': 0.7626968510095803, 'bagging_freq': 5}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:44,328] Trial 39 finished with value: 6.223346930330772 and parameters: {'learning_rate': 0.24703963933430992, 'num_leaves': 156, 'feature_fraction': 0.6090616575048565, 'bagging_fraction': 0.7167501101895616, 'bagging_freq': 7}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:45,144] Trial 40 finished with value: 6.099350469740805 and parameters: {'learning_rate': 0.22759546038315046, 'num_leaves': 169, 'feature_fraction': 0.3622451739539915, 'bagging_fraction': 0.6766431574387048, 'bagging_freq': 10}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:45,799] Trial 41 finished with value: 6.018098768057871 and parameters: {'learning_rate': 0.25703823142571486, 'num_leaves': 114, 'feature_fraction': 0.37599245106929263, 'bagging_fraction': 0.9339386683145516, 'bagging_freq': 4}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:46,446] Trial 42 finished with value: 6.0399172704703865 and parameters: {'learning_rate': 0.2799599877688494, 'num_leaves': 111, 'feature_fraction': 0.38283637272203996, 'bagging_fraction': 0.9325511094978183, 'bagging_freq': 4}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:47,052] Trial 43 finished with value: 6.181379342181953 and parameters: {'learning_rate': 0.2779221027242524, 'num_leaves': 78, 'feature_fraction': 0.47935187278832736, 'bagging_fraction': 0.9417280237692249, 'bagging_freq': 4}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:47,710] Trial 44 finished with value: 6.172730624141576 and parameters: {'learning_rate': 0.2589996301910952, 'num_leaves': 111, 'feature_fraction': 0.30327703651648036, 'bagging_fraction': 0.9123814947789162, 'bagging_freq': 6}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:48,548] Trial 45 finished with value: 6.175294605978538 and parameters: {'learning_rate': 0.28280522903760874, 'num_leaves': 119, 'feature_fraction': 0.4290269220546876, 'bagging_fraction': 0.9509879319115232, 'bagging_freq': 5}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:49,178] Trial 46 finished with value: 6.083272738031561 and parameters: {'learning_rate': 0.2552096008981824, 'num_leaves': 107, 'feature_fraction': 0.3728510834271027, 'bagging_fraction': 0.8547894082911545, 'bagging_freq': 4}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:49,485] Trial 47 finished with value: 6.459451551819264 and parameters: {'learning_rate': 0.2701596356556408, 'num_leaves': 46, 'feature_fraction': 0.24955269905251543, 'bagging_fraction': 0.9008273285419541, 'bagging_freq': 6}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.






[I 2023-06-13 02:10:49,978] Trial 48 finished with value: 7.375850700777744 and parameters: {'learning_rate': 0.28005077205267787, 'num_leaves': 143, 'feature_fraction': 0.18917801442214532, 'bagging_fraction': 0.9520347951667381, 'bagging_freq': 9}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.






[I 2023-06-13 02:10:50,734] Trial 49 finished with value: 6.182184399482975 and parameters: {'learning_rate': 0.2192016412011243, 'num_leaves': 135, 'feature_fraction': 0.3238133518784798, 'bagging_fraction': 0.8568653454627597, 'bagging_freq': 5}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.






[I 2023-06-13 02:10:51,394] Trial 50 finished with value: 6.236845767566434 and parameters: {'learning_rate': 0.25050874237603243, 'num_leaves': 122, 'feature_fraction': 0.3620866680020715, 'bagging_fraction': 0.5613096042739419, 'bagging_freq': 4}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:52,018] Trial 51 finished with value: 6.083285784172063 and parameters: {'learning_rate': 0.23830083453074286, 'num_leaves': 130, 'feature_fraction': 0.4007695647192234, 'bagging_fraction': 0.7908940705459064, 'bagging_freq': 4}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:52,547] Trial 52 finished with value: 6.185861881706121 and parameters: {'learning_rate': 0.22982533732784627, 'num_leaves': 100, 'feature_fraction': 0.32660494305222587, 'bagging_fraction': 0.9061247796569567, 'bagging_freq': 3}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:53,256] Trial 53 finished with value: 6.137717509802435 and parameters: {'learning_rate': 0.2642026496512409, 'num_leaves': 151, 'feature_fraction': 0.44271571841841195, 'bagging_fraction': 0.9649293492678027, 'bagging_freq': 3}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:53,897] Trial 54 finished with value: 6.194832698016786 and parameters: {'learning_rate': 0.2879784316832939, 'num_leaves': 113, 'feature_fraction': 0.48656773895675975, 'bagging_fraction': 0.8782497511742243, 'bagging_freq': 2}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:54,368] Trial 55 finished with value: 6.052799371523025 and parameters: {'learning_rate': 0.20203267546477682, 'num_leaves': 86, 'feature_fraction': 0.3830587874487531, 'bagging_fraction': 0.8271498892373357, 'bagging_freq': 4}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:54,978] Trial 56 finished with value: 6.13716227867953 and parameters: {'learning_rate': 0.23915658833236883, 'num_leaves': 127, 'feature_fraction': 0.2877239976054235, 'bagging_fraction': 0.9625368636547663, 'bagging_freq': 5}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:55,781] Trial 57 finished with value: 6.07558346851293 and parameters: {'learning_rate': 0.2681959314705922, 'num_leaves': 141, 'feature_fraction': 0.3510693339773877, 'bagging_fraction': 0.9151525402800534, 'bagging_freq': 3}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:56,771] Trial 58 finished with value: 6.046840172461395 and parameters: {'learning_rate': 0.2518372979231156, 'num_leaves': 175, 'feature_fraction': 0.41693504236837575, 'bagging_fraction': 0.8708407421141903, 'bagging_freq': 2}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:57,545] Trial 59 finished with value: 6.095172023556727 and parameters: {'learning_rate': 0.22465135714852896, 'num_leaves': 138, 'feature_fraction': 0.4603531147792271, 'bagging_fraction': 0.8002273052384464, 'bagging_freq': 4}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:10:58,052] Trial 60 finished with value: 6.175918879332123 and parameters: {'learning_rate': 0.2983361226230735, 'num_leaves': 95, 'feature_fraction': 0.3257911778739732, 'bagging_fraction': 0.9648651518556681, 'bagging_freq': 6}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:58,782] Trial 61 finished with value: 6.071211097290904 and parameters: {'learning_rate': 0.25549981012789075, 'num_leaves': 190, 'feature_fraction': 0.4146082181780025, 'bagging_fraction': 0.8622527055363225, 'bagging_freq': 2}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:10:59,450] Trial 62 finished with value: 6.0446960762303865 and parameters: {'learning_rate': 0.243712379707286, 'num_leaves': 174, 'feature_fraction': 0.3874380203062247, 'bagging_fraction': 0.8913993271768311, 'bagging_freq': 2}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:11:00,330] Trial 63 finished with value: 6.049140693266202 and parameters: {'learning_rate': 0.24096234695822885, 'num_leaves': 190, 'feature_fraction': 0.37635773769318104, 'bagging_fraction': 0.8964098866892978, 'bagging_freq': 3}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:11:01,934] Trial 64 finished with value: 6.1401305071139 and parameters: {'learning_rate': 0.26919647093288906, 'num_leaves': 200, 'feature_fraction': 0.34316394390387317, 'bagging_fraction': 0.9973039471599158, 'bagging_freq': 4}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:03,586] Trial 65 finished with value: 6.009342481796697 and parameters: {'learning_rate': 0.22127458386893364, 'num_leaves': 159, 'feature_fraction': 0.3941828958689975, 'bagging_fraction': 0.9299773433927967, 'bagging_freq': 2}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:06,740] Trial 66 finished with value: 6.150885718160546 and parameters: {'learning_rate': 0.22180458908032413, 'num_leaves': 160, 'feature_fraction': 0.2715922796638637, 'bagging_fraction': 0.9290145634142831, 'bagging_freq': 1}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:08,265] Trial 67 finished with value: 6.043726555767153 and parameters: {'learning_rate': 0.2084919493063629, 'num_leaves': 149, 'feature_fraction': 0.4474248794929704, 'bagging_fraction': 0.9760664641307226, 'bagging_freq': 3}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:11,757] Trial 68 finished with value: 6.198672635874464 and parameters: {'learning_rate': 0.22842324197981298, 'num_leaves': 130, 'feature_fraction': 0.3090662390042419, 'bagging_fraction': 0.8352634973756251, 'bagging_freq': 5}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:13,566] Trial 69 finished with value: 6.025288557862022 and parameters: {'learning_rate': 0.2594895208196688, 'num_leaves': 125, 'feature_fraction': 0.39924357107374775, 'bagging_fraction': 0.9435932133872282, 'bagging_freq': 3}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:11:15,408] Trial 70 finished with value: 6.036601598201787 and parameters: {'learning_rate': 0.25816645283204354, 'num_leaves': 114, 'feature_fraction': 0.3535481455570722, 'bagging_fraction': 0.9375519083633344, 'bagging_freq': 2}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:16,733] Trial 71 finished with value: 6.075971348147128 and parameters: {'learning_rate': 0.2873292933676767, 'num_leaves': 117, 'feature_fraction': 0.34957746072805435, 'bagging_fraction': 0.9424149416422656, 'bagging_freq': 2}. Best is trial 25 with value: 6.006767144792921.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:11:17,948] Trial 72 finished with value: 6.020117018516099 and parameters: {'learning_rate': 0.26095349415883395, 'num_leaves': 106, 'feature_fraction': 0.4215580026997262, 'bagging_fraction': 0.9752563223515381, 'bagging_freq': 2}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:18,966] Trial 73 finished with value: 6.057780285649062 and parameters: {'learning_rate': 0.2603274767486469, 'num_leaves': 126, 'feature_fraction': 0.42006162674237335, 'bagging_fraction': 0.9989729986434472, 'bagging_freq': 1}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:19,852] Trial 74 finished with value: 6.0983493392843835 and parameters: {'learning_rate': 0.25032707548081456, 'num_leaves': 100, 'feature_fraction': 0.4619824851844049, 'bagging_fraction': 0.9521890953651369, 'bagging_freq': 2}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:20,751] Trial 75 finished with value: 6.122445944079982 and parameters: {'learning_rate': 0.26486097284957816, 'num_leaves': 108, 'feature_fraction': 0.49288393985138496, 'bagging_fraction': 0.9747490802248883, 'bagging_freq': 1}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:21,673] Trial 76 finished with value: 6.19829301619676 and parameters: {'learning_rate': 0.2737080354099215, 'num_leaves': 91, 'feature_fraction': 0.30967590957392055, 'bagging_fraction': 0.9130258614018943, 'bagging_freq': 2}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:22,734] Trial 77 finished with value: 6.028530825980461 and parameters: {'learning_rate': 0.23272476004455067, 'num_leaves': 116, 'feature_fraction': 0.4064026005539024, 'bagging_fraction': 0.9737105036778004, 'bagging_freq': 3}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:23,670] Trial 78 finished with value: 6.092380351931349 and parameters: {'learning_rate': 0.23643293069824486, 'num_leaves': 81, 'feature_fraction': 0.43340158279179464, 'bagging_fraction': 0.9699882486390936, 'bagging_freq': 3}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:24,780] Trial 79 finished with value: 6.033971147989018 and parameters: {'learning_rate': 0.22073577830671057, 'num_leaves': 115, 'feature_fraction': 0.4025119017141532, 'bagging_fraction': 0.8867714359215016, 'bagging_freq': 2}. Best is trial 25 with value: 6.006767144792921.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:25,815] Trial 80 finished with value: 5.996730306488689 and parameters: {'learning_rate': 0.196964601256542, 'num_leaves': 102, 'feature_fraction': 0.40679967020045177, 'bagging_fraction': 0.8801603093743194, 'bagging_freq': 3}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:26,752] Trial 81 finished with value: 6.021562466339619 and parameters: {'learning_rate': 0.20864479850112774, 'num_leaves': 104, 'feature_fraction': 0.4046398050918808, 'bagging_fraction': 0.8911212919956674, 'bagging_freq': 3}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:27,721] Trial 82 finished with value: 6.074809195447529 and parameters: {'learning_rate': 0.19462725607370818, 'num_leaves': 103, 'feature_fraction': 0.46124235551807946, 'bagging_fraction': 0.9762109012856024, 'bagging_freq': 3}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:28,595] Trial 83 finished with value: 6.020267323734797 and parameters: {'learning_rate': 0.21519736451163168, 'num_leaves': 94, 'feature_fraction': 0.4068528198053685, 'bagging_fraction': 0.9191564289413104, 'bagging_freq': 3}. Best is trial 80 with value: 5.996730306488689.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:11:29,486] Trial 84 finished with value: 6.120976201998152 and parameters: {'learning_rate': 0.21177443044979266, 'num_leaves': 73, 'feature_fraction': 0.44278586188705066, 'bagging_fraction': 0.8596620957288551, 'bagging_freq': 3}. Best is trial 80 with value: 5.996730306488689.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:11:30,681] Trial 85 finished with value: 6.055727171204862 and parameters: {'learning_rate': 0.2075592090376441, 'num_leaves': 93, 'feature_fraction': 0.5021424711350004, 'bagging_fraction': 0.9203382177536724, 'bagging_freq': 3}. Best is trial 80 with value: 5.996730306488689.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:11:31,592] Trial 86 finished with value: 6.043431946749918 and parameters: {'learning_rate': 0.18793333187389452, 'num_leaves': 87, 'feature_fraction': 0.37014970802392355, 'bagging_fraction': 0.8924849032119768, 'bagging_freq': 3}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:32,109] Trial 87 finished with value: 6.989509649866141 and parameters: {'learning_rate': 0.21989508485545936, 'num_leaves': 14, 'feature_fraction': 0.47518563036870054, 'bagging_fraction': 0.8393242005623833, 'bagging_freq': 4}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:33,013] Trial 88 finished with value: 6.1029731821664495 and parameters: {'learning_rate': 0.20143182354796169, 'num_leaves': 99, 'feature_fraction': 0.5313736946223961, 'bagging_fraction': 0.9462079786417218, 'bagging_freq': 4}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:34,163] Trial 89 finished with value: 6.194050958703286 and parameters: {'learning_rate': 0.22741128895326557, 'num_leaves': 105, 'feature_fraction': 0.3366608671091822, 'bagging_fraction': 0.9996897263907735, 'bagging_freq': 2}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:35,385] Trial 90 finished with value: 6.034731172220845 and parameters: {'learning_rate': 0.2074110636960622, 'num_leaves': 164, 'feature_fraction': 0.4266101563272363, 'bagging_fraction': 0.8794340017837181, 'bagging_freq': 3}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:36,463] Trial 91 finished with value: 6.018186344492767 and parameters: {'learning_rate': 0.23387392021588305, 'num_leaves': 124, 'feature_fraction': 0.3983920146459674, 'bagging_fraction': 0.9570030434089255, 'bagging_freq': 3}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:37,541] Trial 92 finished with value: 6.03463778916027 and parameters: {'learning_rate': 0.21840688215959048, 'num_leaves': 124, 'feature_fraction': 0.3935824199584885, 'bagging_fraction': 0.9259116042527029, 'bagging_freq': 3}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:38,511] Trial 93 finished with value: 6.0403731547248904 and parameters: {'learning_rate': 0.2341035050697014, 'num_leaves': 110, 'feature_fraction': 0.3641720958421661, 'bagging_fraction': 0.901722303154326, 'bagging_freq': 2}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:39,504] Trial 94 finished with value: 6.005438347515468 and parameters: {'learning_rate': 0.2142569955533495, 'num_leaves': 120, 'feature_fraction': 0.4080929675194175, 'bagging_fraction': 0.9517706923199584, 'bagging_freq': 3}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:40,461] Trial 95 finished with value: 6.10742826135311 and parameters: {'learning_rate': 0.21405066153673033, 'num_leaves': 97, 'feature_fraction': 0.4406641262891954, 'bagging_fraction': 0.959233751332022, 'bagging_freq': 4}. Best is trial 80 with value: 5.996730306488689.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2023-06-13 02:11:41,768] Trial 96 finished with value: 6.174521701171291 and parameters: {'learning_rate': 0.22513110660262353, 'num_leaves': 119, 'feature_fraction': 0.3359746946349825, 'bagging_fraction': 0.9148891606222573, 'bagging_freq': 3}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:42,683] Trial 97 finished with value: 6.0481086969374065 and parameters: {'learning_rate': 0.1949924508265502, 'num_leaves': 104, 'feature_fraction': 0.4708273189480902, 'bagging_fraction': 0.9844197540781021, 'bagging_freq': 4}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:43,636] Trial 98 finished with value: 6.076232882952512 and parameters: {'learning_rate': 0.24501124920407916, 'num_leaves': 84, 'feature_fraction': 0.41469225112044783, 'bagging_fraction': 0.8680469143573057, 'bagging_freq': 2}. Best is trial 80 with value: 5.996730306488689.


You can set `force_col_wise=true` to remove the overhead.


[I 2023-06-13 02:11:44,448] Trial 99 finished with value: 6.06537715647223 and parameters: {'learning_rate': 0.1777239223301375, 'num_leaves': 77, 'feature_fraction': 0.37358054475155206, 'bagging_fraction': 0.9556920414437668, 'bagging_freq': 2}. Best is trial 80 with value: 5.996730306488689.


### 최적의 하이퍼파라미터 출력

In [12]:
print("Best Hyperparameters:", study.best_params)

Best Hyperparameters: {'learning_rate': 0.196964601256542, 'num_leaves': 102, 'feature_fraction': 0.40679967020045177, 'bagging_fraction': 0.8801603093743194, 'bagging_freq': 3}


### 최적의 모델로 재학습

In [13]:
best_model = lgb.train({**study.best_params, 'verbose': 0}, train_data, num_boost_round=100)

You can set `force_col_wise=true` to remove the overhead.


### 검증 데이터에 대한 예측 수행 및 평가 지표 출력

In [14]:
y_pred = best_model.predict(X_val)

mae = mean_absolute_error(y_val, y_pred)
print("Mean Absolute Error:", mae)

Mean Absolute Error: 6.061292161637324


### Inference & Submit

In [15]:
preds = best_model.predict(test_x)

In [16]:
submission = pd.read_csv('sample_submission.csv')
submission['가격'] = preds
submission.to_csv('./lightgbm_with_optuna_submit.csv', index = False)