In [45]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 글꼴깨짐 방지
import matplotlib
import matplotlib.font_manager as fm

import warnings

# 경고 메시지를 무시하고 숨기거나
warnings.filterwarnings(action='ignore')

fm.get_fontconfig_fonts()
font_location = 'C:/Windows/Fonts/malgun.ttf' # For Windows
font_name = fm.FontProperties(fname=font_location).get_name()
matplotlib.rc('font', family=font_name)

In [46]:
df = pd.read_csv('train.csv')
df.head()

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수
0,C2483,900,아파트,경상북도,국민임대,39.72,134,38.0,A,15667000,103680,0.0,3.0,1425.0,1015.0
1,C2483,900,아파트,경상북도,국민임대,39.72,15,38.0,A,15667000,103680,0.0,3.0,1425.0,1015.0
2,C2483,900,아파트,경상북도,국민임대,51.93,385,38.0,A,27304000,184330,0.0,3.0,1425.0,1015.0
3,C2483,900,아파트,경상북도,국민임대,51.93,15,38.0,A,27304000,184330,0.0,3.0,1425.0,1015.0
4,C2483,900,아파트,경상북도,국민임대,51.93,41,38.0,A,27304000,184330,0.0,3.0,1425.0,1015.0


In [47]:
def preprocessing(df):
    # 오류 단지코드가 존재하는 행들을  사전에 제거
    df_error =  ['C1095', 'C2051', 'C1218', 'C1894', 'C2483', 'C1502', 'C1988']
    #df_error =  ['C2335', 'C1327']
    df = df[~df['단지코드'].isin(df_error)].reset_index(drop=True)
    df.rename(columns = {'도보 10분거리 내 지하철역 수(환승노선 수 반영)':'지하철','도보 10분거리 내 버스정류장 수':'버스'},inplace=True)
    df.drop(columns=['임대보증금','임대료','자격유형','임대건물구분'],axis = 1,inplace=True)
    지역_비율 = (df.groupby(['지역'])['총세대수'].count())/(df.groupby(['지역'])['총세대수'].count().sum())*100
    지역_비율=지역_비율.reset_index(name='지역_비율')
    공급유형_비율 = (df.groupby(['공급유형'])['총세대수'].count())/(df.groupby(['공급유형'])['총세대수'].count().sum())*100
    공급유형_비율=공급유형_비율.reset_index(name='공급유형_비율')
    df = pd.merge(df,지역_비율, on='지역')
    df = pd.merge(df,공급유형_비율, on='공급유형')
    df.drop(columns=['지역','공급유형','단지코드'],axis = 1,inplace=True)
    df=df.dropna(axis=0)
    df = df[['총세대수', '전용면적', '전용면적별세대수', '공가수', '지하철', '버스', '단지내주차면수', '공급유형_비율',
       '지역_비율', '등록차량수']]
    return df

In [48]:
df = preprocessing(df)
df

Unnamed: 0,총세대수,전용면적,전용면적별세대수,공가수,지하철,버스,단지내주차면수,공급유형_비율,지역_비율,등록차량수
0,545,33.48,276,17.0,0.0,3.0,624.0,59.944751,12.741713,205.0
1,545,39.60,60,17.0,0.0,3.0,624.0,59.944751,12.741713,205.0
2,545,39.60,20,17.0,0.0,3.0,624.0,59.944751,12.741713,205.0
3,545,46.90,38,17.0,0.0,3.0,624.0,59.944751,12.741713,205.0
4,545,46.90,19,17.0,0.0,3.0,624.0,59.944751,12.741713,205.0
...,...,...,...,...,...,...,...,...,...,...
2891,370,51.99,80,9.0,0.0,3.0,1590.0,0.310773,2.209945,1595.0
2892,370,59.93,142,9.0,0.0,3.0,1590.0,0.310773,2.209945,1595.0
2893,790,51.32,126,13.0,0.0,3.0,673.0,0.310773,2.209945,645.0
2894,790,59.88,49,13.0,0.0,3.0,673.0,0.310773,2.209945,645.0


In [49]:
#Splitting the data into train and test split
from sklearn.model_selection import train_test_split
X = df.iloc[:, 1:-1]
y = df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [38]:
import lightgbm as lgb

model = lgb.LGBMRegressor()
# model.fit(X_train, y_train)
# pred = model.predict(X_test)

In [39]:
# 스케일링없이
from time import time
from sklearn.metrics import explained_variance_score,mean_absolute_error,r2_score

start = time()
model.fit(X_train, y_train)
train_time = time() - start
start = time()
y_pred = model.predict(X_test)
predict_time = time()-start    
print(model)
print("\tTraining time: %0.3fs" % train_time)
print("\tPrediction time: %0.3fs" % predict_time)
print("\tExplained variance:", explained_variance_score(y_test, y_pred))
print("\tMean absolute error:", mean_absolute_error(y_test, y_pred))
print("\tR2 score:", r2_score(y_test, y_pred))
print()

LGBMRegressor()
	Training time: 0.204s
	Prediction time: 0.002s
	Explained variance: 0.9749516183438984
	Mean absolute error: 41.81102536265501
	R2 score: 0.9749361079985354



In [40]:
# 기본 스케일링
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc_X_train = sc.fit_transform(X_train)
sc_X_test = sc.transform(X_test)

In [41]:
import lightgbm as lgb

sc_model = lgb.LGBMRegressor()
model.fit(sc_X_train, y_train)
sc_pred = model.predict(sc_X_test)

In [42]:
# 스케일링한 모델 평가 mse 0.8% 잡았음
from time import time
from sklearn.metrics import explained_variance_score,mean_absolute_error,r2_score

start = time()
model.fit(sc_X_train, y_train)
train_time = time() - start
start = time()
y_pred = model.predict(sc_X_test)
predict_time = time()-start    
print(sc_model)
print("\tsc_Training time: %0.3fs" % train_time)
print("\tsc_Prediction time: %0.3fs" % predict_time)
print("\tsc_Explained variance:", explained_variance_score(y_test, y_pred))
print("\tsc_Mean absolute error:", mean_absolute_error(y_test, y_pred))
print("\tsc_R2 score:", r2_score(y_test, y_pred))
print()

LGBMRegressor()
	sc_Training time: 0.196s
	sc_Prediction time: 0.001s
	sc_Explained variance: 0.9756247864193484
	sc_Mean absolute error: 41.057764299546015
	sc_R2 score: 0.9755941987556696



In [43]:
# 핵심 파라미터들 넣기
params = {'learning_rate': (0.01,0.1), 
          'max_depth': [16, 12, 8, 4],
          'boosting': ['gbdt'],
          'application':['mean_absolute_error'],
          'min_data_in_leaf':[20],
          'objective': ['regression'], 
          'metric': ['mae'],
          'max_cat_group' :[64],
          'is_training_metric': [True], 
          'num_leaves': [144], 
          'feature_fraction': [0.9,0.8], 
          'bagging_fraction': [0.7,0.6 ],
          'bagging_freq': [5]}

In [44]:
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(sc_model,params, verbose=10)
sc_model = grid.fit(X,y)
print(sc_model.best_params_,'\n')
print(sc_model.best_estimator_,'\n')

Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV 1/5; 1/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 1/5; 1/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.495 total time=   0.1s
[CV 2/5; 1/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 1/32] END application=mean_absolute_error, bagging_fraction=0.7, baggi

[CV 3/5; 2/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 3/5; 2/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.576 total time=   0.0s
[CV 4/5; 2/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 4/5; 2/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_me

[CV 1/5; 4/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 1/5; 4/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.466 total time=   0.0s
[CV 2/5; 4/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 4/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metri

[CV 3/5; 5/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 3/5; 5/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.519 total time=   0.0s
[CV 4/5; 5/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 4/5; 5/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metri

[CV 5/5; 6/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.680 total time=   0.0s
[CV 1/5; 7/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 1/5; 7/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.283 total time=   0.0s
[CV 2/5; 7/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_

[CV 1/5; 9/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 1/5; 9/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.482 total time=   0.0s
[CV 2/5; 9/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 9/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_me

[CV 3/5; 10/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.571 total time=   0.0s
[CV 4/5; 10/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 4/5; 10/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.384 total time=   0.0s
[CV 5/5; 10/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt,

[CV 3/5; 12/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 3/5; 12/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.548 total time=   0.0s
[CV 4/5; 12/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 4/5; 12/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_m

[CV 5/5; 13/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.720 total time=   0.0s
[CV 1/5; 14/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 1/5; 14/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.281 total time=   0.0s
[CV 2/5; 14/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, fe

[CV 4/5; 15/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.758 total time=   0.0s
[CV 5/5; 15/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 5/5; 15/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.711 total time=   0.0s
[CV 1/5; 16/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, featu

[CV 2/5; 17/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 17/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.552 total time=   0.0s
[CV 3/5; 17/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 3/5; 17/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_trainin

[CV 5/5; 18/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 5/5; 18/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.498 total time=   0.0s
[CV 1/5; 19/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 1/5; 19/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training

[CV 5/5; 20/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.494 total time=   0.0s
[CV 1/5; 21/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 1/5; 21/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.290 total time=   0.0s
[CV 2/5; 21/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, fe

[CV 4/5; 22/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.734 total time=   0.0s
[CV 5/5; 22/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 5/5; 22/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.656 total time=   0.0s
[CV 1/5; 23/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, fe

[CV 2/5; 24/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 24/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.870 total time=   0.0s
[CV 3/5; 24/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 3/5; 24/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metr

[CV 4/5; 25/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 4/5; 25/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.383 total time=   0.0s
[CV 5/5; 25/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 5/5; 25/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_trainin

[CV 2/5; 27/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 27/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.524 total time=   0.0s
[CV 3/5; 27/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 3/5; 27/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_m

[CV 5/5; 28/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.455 total time=   0.0s
[CV 1/5; 29/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 1/5; 29/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.359 total time=   0.0s
[CV 2/5; 29/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, fe

[CV 4/5; 30/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 4/5; 30/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.717 total time=   0.1s
[CV 5/5; 30/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 5/5; 30/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_m

[CV 2/5; 32/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 32/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.869 total time=   0.0s
[CV 3/5; 32/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 3/5; 32/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metr

In [25]:
sc_params = sc_model.best_params_
sc_params

{'application': 'mean_absolute_error',
 'bagging_fraction': 0.7,
 'bagging_freq': 5,
 'boosting': 'gbdt',
 'feature_fraction': 0.9,
 'is_training_metric': True,
 'learning_rate': 0.1,
 'max_cat_group': 64,
 'max_depth': 4,
 'metric': 'mae',
 'min_data_in_leaf': 20,
 'num_leaves': 144,
 'objective': 'regression'}

In [26]:
scores_df = pd.DataFrame(grid.cv_results_)
scores_df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_application,param_bagging_fraction,param_bagging_freq,param_boosting,param_feature_fraction,param_is_training_metric,...,param_objective,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.080596,0.005776,0.0018,0.0003998518,mean_absolute_error,0.7,5,gbdt,0.9,True,...,regression,"{'application': 'mean_absolute_error', 'baggin...",0.494998,0.571094,0.577334,0.421873,0.492435,0.511547,0.05754,21
1,0.085,0.010881,0.0016,0.0004897069,mean_absolute_error,0.7,5,gbdt,0.9,True,...,regression,"{'application': 'mean_absolute_error', 'baggin...",0.49505,0.571253,0.576249,0.421943,0.492434,0.511386,0.057302,22
2,0.0776,0.006499,0.0016,0.0004898236,mean_absolute_error,0.7,5,gbdt,0.9,True,...,regression,"{'application': 'mean_absolute_error', 'baggin...",0.501767,0.569563,0.578369,0.42014,0.4938,0.512728,0.057602,20
3,0.0274,0.0008,0.0012,0.0003999711,mean_absolute_error,0.7,5,gbdt,0.9,True,...,regression,"{'application': 'mean_absolute_error', 'baggin...",0.466115,0.540497,0.578185,0.413308,0.486955,0.497012,0.057519,24
4,0.0856,0.010092,0.002,4.623108e-07,mean_absolute_error,0.7,5,gbdt,0.9,True,...,regression,"{'application': 'mean_absolute_error', 'baggin...",0.243705,0.838101,0.518701,0.733744,0.703299,0.60751,0.209057,14
5,0.0708,0.003868,0.0014,0.000489843,mean_absolute_error,0.7,5,gbdt,0.9,True,...,regression,"{'application': 'mean_absolute_error', 'baggin...",0.280928,0.832156,0.499802,0.751801,0.68002,0.608941,0.197378,12
6,0.0524,0.004964,0.0018,0.000399971,mean_absolute_error,0.7,5,gbdt,0.9,True,...,regression,"{'application': 'mean_absolute_error', 'baggin...",0.283349,0.833079,0.494515,0.753674,0.699054,0.612734,0.199167,11
7,0.0258,0.003059,0.0018,0.000400019,mean_absolute_error,0.7,5,gbdt,0.9,True,...,regression,"{'application': 'mean_absolute_error', 'baggin...",0.247001,0.861418,0.613742,0.783915,0.75391,0.651997,0.217776,1
8,0.0918,0.005635,0.0016,0.0004899988,mean_absolute_error,0.7,5,gbdt,0.8,True,...,regression,"{'application': 'mean_absolute_error', 'baggin...",0.482148,0.531077,0.571788,0.384081,0.43555,0.480929,0.066649,28
9,0.0862,0.002561,0.0016,0.0004898235,mean_absolute_error,0.7,5,gbdt,0.8,True,...,regression,"{'application': 'mean_absolute_error', 'baggin...",0.482037,0.530168,0.570818,0.384439,0.435733,0.480639,0.066119,29


In [27]:
grid.best_score_

0.6519970381060498

In [28]:
import lightgbm as lgb

In [29]:
LGBMRegressor_model = lgb.LGBMRegressor(application= 'mean_absolute_error',
 bagging_fraction= 0.7,
 bagging_freq= 5,
 boosting= 'gbdt',
 feature_fraction= 0.9,
 is_training_metric= True,
 learning_rate= 0.1,
 max_cat_group= 64,
 max_depth= 4,
 metric= 'mae',
 min_data_in_leaf= 20,
 num_leaves= 144,
 objective= 'regression')
LGBMRegressor_model.fit(sc_X_train, y_train)
pred = LGBMRegressor_model.predict(sc_X_test)

In [30]:
from sklearn.metrics import accuracy_score, mean_absolute_error
mean_absolute_error(y_test, pred)

75.10527601421106

In [50]:
# 로버츠 스케일링(아웃라이어 최소화)
from sklearn.preprocessing import RobustScaler
rc = RobustScaler()
rc_X_train = rc.fit_transform(X_train)
rc_X_test = rc.transform(X_test)

In [51]:
import lightgbm as lgb

rc_model = lgb.LGBMRegressor()
model.fit(rc_X_train, y_train)
rc_pred = model.predict(rc_X_test)

In [59]:
# 로버츠스케일링한 모델평가 기준모델보다 mae는 0.7정도,R2 score는 0.006가량 높음
from time import time
from sklearn.metrics import explained_variance_score,mean_absolute_error,r2_score

start = time()
model.fit(rc_X_train, y_train)
train_time = time() - start
start = time()
y_pred = model.predict(rc_X_test)
predict_time = time()-start    
print('rc_model')
print("\tsc_Training time: %0.3fs" % train_time)
print("\tsc_Prediction time: %0.3fs" % predict_time)
print("\tsc_Explained variance:", explained_variance_score(y_test, y_pred))
print("\tsc_Mean absolute error:", mean_absolute_error(y_test, y_pred))
print("\tsc_R2 score:", r2_score(y_test, y_pred))
print()

rc_model
	sc_Training time: 0.191s
	sc_Prediction time: 0.001s
	sc_Explained variance: 0.9755147798730136
	sc_Mean absolute error: 41.1481772945113
	sc_R2 score: 0.9754875971297458



In [55]:
# best params 찾기
from sklearn.model_selection import GridSearchCV

rc_grid = GridSearchCV(rc_model,params, verbose=10)
rc_model = grid.fit(X,y)
print(rc_model.best_params_,'\n')
print(rc_model.best_estimator_,'\n')


Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV 1/5; 1/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 1/5; 1/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.495 total time=   0.1s
[CV 2/5; 1/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 1/32] END application=mean_absolute_error, bagging_fraction=0.7, baggi

[CV 2/5; 2/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.571 total time=   0.0s
[CV 3/5; 2/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 3/5; 2/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.576 total time=   0.0s
[CV 4/5; 2/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, fea

[CV 1/5; 4/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 1/5; 4/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.466 total time=   0.0s
[CV 2/5; 4/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 4/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metri

[CV 3/5; 5/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 3/5; 5/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.519 total time=   0.0s
[CV 4/5; 5/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 4/5; 5/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metri

[CV 1/5; 7/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.283 total time=   0.0s
[CV 2/5; 7/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 7/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.833 total time=   0.0s
[CV 3/5; 7/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_f

[CV 1/5; 9/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.482 total time=   0.0s
[CV 2/5; 9/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 9/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.531 total time=   0.0s
[CV 3/5; 9/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, fea

[CV 4/5; 10/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.384 total time=   0.0s
[CV 5/5; 10/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 5/5; 10/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.436 total time=   0.0s
[CV 1/5; 11/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt,

[CV 5/5; 12/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.437 total time=   0.0s
[CV 1/5; 13/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 1/5; 13/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.307 total time=   0.0s
[CV 2/5; 13/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, fe

[CV 4/5; 14/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.761 total time=   0.0s
[CV 5/5; 14/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 5/5; 14/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.716 total time=   0.0s
[CV 1/5; 15/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, fe

[CV 2/5; 16/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 16/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.860 total time=   0.0s
[CV 3/5; 16/32] START application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 3/5; 16/32] END application=mean_absolute_error, bagging_fraction=0.7, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metr

[CV 4/5; 17/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.422 total time=   0.0s
[CV 5/5; 17/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 5/5; 17/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.499 total time=   0.0s
[CV 1/5; 18/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt,

[CV 3/5; 19/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.583 total time=   0.0s
[CV 4/5; 19/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 4/5; 19/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.421 total time=   0.0s
[CV 5/5; 19/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, fe

[CV 1/5; 21/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 1/5; 21/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.290 total time=   0.0s
[CV 2/5; 21/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 21/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_m

[CV 3/5; 22/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.495 total time=   0.0s
[CV 4/5; 22/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 4/5; 22/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.734 total time=   0.0s
[CV 5/5; 22/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, fe

[CV 3/5; 24/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 3/5; 24/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.632 total time=   0.0s
[CV 4/5; 24/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=4, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 4/5; 24/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.9, is_training_metr

[CV 4/5; 25/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.383 total time=   0.0s
[CV 5/5; 25/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 5/5; 25/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.449 total time=   0.0s
[CV 1/5; 26/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt,

[CV 2/5; 27/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.524 total time=   0.0s
[CV 3/5; 27/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 3/5; 27/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.01, max_cat_group=64, max_depth=8, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.566 total time=   0.0s
[CV 4/5; 27/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, fe

[CV 1/5; 29/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 1/5; 29/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.359 total time=   0.0s
[CV 2/5; 29/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=16, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 2/5; 29/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_m

[CV 4/5; 30/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.717 total time=   0.0s
[CV 5/5; 30/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression
[CV 5/5; 30/32] END application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, feature_fraction=0.8, is_training_metric=True, learning_rate=0.1, max_cat_group=64, max_depth=12, metric=mae, min_data_in_leaf=20, num_leaves=144, objective=regression;, score=0.696 total time=   0.0s
[CV 1/5; 31/32] START application=mean_absolute_error, bagging_fraction=0.6, bagging_freq=5, boosting=gbdt, fe

{'application': 'mean_absolute_error', 'bagging_fraction': 0.7, 'bagging_freq': 5, 'boosting': 'gbdt', 'feature_fraction': 0.9, 'is_training_metric': True, 'learning_rate': 0.1, 'max_cat_group': 64, 'max_depth': 4, 'metric': 'mae', 'min_data_in_leaf': 20, 'num_leaves': 144, 'objective': 'regression'} 

LGBMRegressor(application='mean_absolute_error', bagging_fraction=0.7,
              bagging_freq=5, boosting='gbdt', feature_fraction=0.9,
              is_training_metric=True, max_cat_group=64, max_depth=4,
              metric='mae', min_data_in_leaf=20, num_leaves=144,
              objective='regression') 



In [56]:
# 똑같이 나옴
rc_params = rc_model.best_params_
rc_params

{'application': 'mean_absolute_error',
 'bagging_fraction': 0.7,
 'bagging_freq': 5,
 'boosting': 'gbdt',
 'feature_fraction': 0.9,
 'is_training_metric': True,
 'learning_rate': 0.1,
 'max_cat_group': 64,
 'max_depth': 4,
 'metric': 'mae',
 'min_data_in_leaf': 20,
 'num_leaves': 144,
 'objective': 'regression'}

In [None]:
# 결론 기존 모델에서 sc한 모델이 가장 높은 정확도를 지녔다 