## 1. 라이브러리 import 및 설정

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from hyperopt import STATUS_OK, Trials, hp, space_eval, tpe, fmin, pyll
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold, train_test_split
from pathlib import Path
import warnings

In [3]:
pd.set_option('max_columns', 100)
pd.set_option("display.precision", 4)
warnings.simplefilter('ignore')

## 2. 경로 지정

In [4]:
data_dir = Path('.\\raw_data\\')
feature_dir = Path('.\\feature\\')
val_dir = Path('.\\val\\')
tst_dir = Path('.\\tst\\')
sub_dir = Path('.\\sub\\')

trn_file = data_dir / 'train.csv'
tst_file = data_dir / 'test.csv'
sample_file = data_dir / 'sample_submission.csv'

target_col = 'class'
n_fold = 5
n_class = 3
seed = 42

## 3. 학습 데이터 로드

In [5]:
trn = pd.read_csv(trn_file, index_col=0)

## 4. 시험 데이터 로드

In [6]:
tst = pd.read_csv(tst_file, index_col=0)

## 5. 학습/시험 데이터 결합

In [7]:
df = pd.concat([trn, tst], axis=0)

## 6. 결측값 처리

In [8]:
df.fillna(-1, inplace=True)

## 7. 피쳐 생성

In [9]:
df['d_dered_u'] = df['dered_u'] - df['u']
df['d_dered_g'] = df['dered_g'] - df['g']
df['d_dered_r'] = df['dered_r'] - df['r']
df['d_dered_i'] = df['dered_i'] - df['i']
df['d_dered_z'] = df['dered_z'] - df['z']

df['d_dered_rg'] = df['dered_r'] - df['dered_g']
df['d_dered_ig'] = df['dered_i'] - df['dered_g']
df['d_dered_zg'] = df['dered_z'] - df['dered_g']
df['d_dered_ri'] = df['dered_r'] - df['dered_i']
df['d_dered_rz'] = df['dered_r'] - df['dered_z']
df['d_dered_iz'] = df['dered_i'] - df['dered_z']

df['d_ug'] = df['u'] - df['g']
df['d_gr'] = df['g'] - df['r']
df['d_ri'] = df['r'] - df['i']
df['d_iz'] = df['i'] - df['z']

df['d_obs_det'] = df['nObserve'] - df['nDetect']

## 8. 피쳐 삭제

In [10]:
df.drop(['airmass_u', 'airmass_g', 'airmass_r', 'airmass_i', 'nDetect', 'z', 'g', 'r', 'i', 'd_dered_rg', 'd_dered_ri'
        ], 
        axis=1, inplace=True)

## 9. 새로운 학습 데이터 파일 저장

In [11]:
df.to_csv(feature_dir / 'feature.csv')

## 10. polyLGCV

### 10-1) 경로 설정

In [12]:
feature_name = 'polyLRCV'
feature_file = feature_dir / f'{feature_name}.csv'

p_val_file = val_dir / f'{feature_name}.val.csv'
p_tst_file = tst_dir / f'{feature_name}.tst.csv'

### 10-2) 피쳐 열기

In [13]:
df = pd.read_csv(feature_dir / 'feature.csv', index_col=0)

### 10-3) Polynomial 피쳐 만들기

In [14]:
scaler = StandardScaler()
poly = PolynomialFeatures(2)
X = poly.fit_transform(scaler.fit_transform(df.drop([target_col], axis=1)))
feature_names = poly.get_feature_names(df.columns)
feature_names

['1',
 'u',
 'redshift',
 'dered_u',
 'dered_g',
 'dered_r',
 'dered_i',
 'dered_z',
 'nObserve',
 'airmass_z',
 'class',
 'd_dered_u',
 'd_dered_g',
 'd_dered_r',
 'd_dered_i',
 'd_dered_z',
 'd_dered_ig',
 'd_dered_zg',
 'd_dered_rz',
 'd_dered_iz',
 'd_ug',
 'd_gr',
 'd_ri',
 'd_iz',
 'u^2',
 'u redshift',
 'u dered_u',
 'u dered_g',
 'u dered_r',
 'u dered_i',
 'u dered_z',
 'u nObserve',
 'u airmass_z',
 'u class',
 'u d_dered_u',
 'u d_dered_g',
 'u d_dered_r',
 'u d_dered_i',
 'u d_dered_z',
 'u d_dered_ig',
 'u d_dered_zg',
 'u d_dered_rz',
 'u d_dered_iz',
 'u d_ug',
 'u d_gr',
 'u d_ri',
 'u d_iz',
 'redshift^2',
 'redshift dered_u',
 'redshift dered_g',
 'redshift dered_r',
 'redshift dered_i',
 'redshift dered_z',
 'redshift nObserve',
 'redshift airmass_z',
 'redshift class',
 'redshift d_dered_u',
 'redshift d_dered_g',
 'redshift d_dered_r',
 'redshift d_dered_i',
 'redshift d_dered_z',
 'redshift d_dered_ig',
 'redshift d_dered_zg',
 'redshift d_dered_rz',
 'redshift d_

In [15]:
df_poly = pd.DataFrame(data=X, columns=feature_names, index=df.index)
df_poly[target_col] = df[target_col]
df_poly.head()
df_poly.to_csv(feature_file)
del df_poly, df

### 10-4) 만든 피쳐를 열기

In [16]:
df = pd.read_csv(feature_file, index_col=0)

### 10-5) 피쳐 분리

In [17]:
y = df[target_col].values[:320000]
df.drop(target_col, axis=1, inplace=True)
trn = df.iloc[:320000].values
tst = df.iloc[320000:].values
feature_name = df.columns.tolist()
print(y.shape, trn.shape, tst.shape)

(320000,) (320000, 299) (80000, 299)


### 10-6) 학습 및 앙상블 파일 생성

In [18]:
seed = 42
cv = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=seed)

In [19]:
p_val = np.zeros((trn.shape[0], n_class))
p_tst = np.zeros((tst.shape[0], n_class))
for i, (i_trn, i_val) in enumerate(cv.split(trn, y), 1):
    print(f'training model for CV #{i}')
    clf = LogisticRegression(multi_class='multinomial')
    clf.fit(trn[i_trn], y[i_trn])
    p_val[i_val, :] = clf.predict_proba(trn[i_val])
    p_tst += clf.predict_proba(tst) / n_fold

training model for CV #1
training model for CV #2
training model for CV #3
training model for CV #4
training model for CV #5


In [20]:
#셔플때문에 할 때마다 정확도가 달라집니다.
#제출시 정확도는 88.4031%였습니다.
print(f'{accuracy_score(y, np.argmax(p_val, axis=1)) * 100:.4f}%')

88.1884%


In [21]:
np.savetxt(p_val_file, p_val, fmt='%.6f', delimiter=',')
np.savetxt(p_tst_file, p_tst, fmt='%.6f', delimiter=',')

## 11. RFCV

### 11-1) 경로 설정

In [22]:
feature_name = 'RFCV'
feature_file = feature_dir / f'{feature_name}.csv'

p_val_file = val_dir / f'{feature_name}.val.csv'
p_tst_file = tst_dir / f'{feature_name}.tst.csv'

### 11-2) 피쳐 열기

In [23]:
df = pd.read_csv(feature_dir / 'feature.csv', index_col=0)

### 11-3) 피쳐 분리

In [24]:
y = df[target_col].values[:320000]
df.drop(target_col, axis=1, inplace=True)
trn = df.iloc[:320000].values
tst = df.iloc[320000:].values
feature_name = df.columns.tolist()
print(y.shape, trn.shape, tst.shape)

(320000,) (320000, 23) (80000, 23)


### 11-4) 하이퍼 파라미터 튜닝

In [25]:
X_trn, X_val, y_trn, y_val = train_test_split(trn, y, test_size=.2, random_state=seed)

In [26]:
params = {
    'n_jobs'                    : -1,
    'random_state'              : seed,
    'verbose'                   : 0,
    'warm_start'                : False,  #False로 해야 비교가 되지...
    
    
    #'bootstrap'                 : True,
    #'oob_score'                 : hp.choice('oob_score', [True, False]),
    'bootstrap'                : False,
    
    #class_weight               : None
    
    #'min_weight_fraction_leaf' : .0
    #'min_samples_split'        : 2
    # 'max_leaf_nodes'          : None,
    # 'min_impurity_decrease'   : .0,     : 불순도(?)감소가 주어진 값 이상이 되어야 분리됨,
    # 'min_impurity_split'      : None,      : early-stopping을 위한 threshold
    #'ccp_alpha'                : .0,
    #'max_samples'              : None,
}

#oob_score는 bootstrap이 true일 때만 가능
space = {
    'criterion'                 : hp.choice('criterion', ['entropy', 'gini']),
    'max_features'              : hp.choice('max_features', ['sqrt', 'log2', None]), 
    'n_estimators'              : hp.choice('n_estimators', np.arange(50, 300, dtype=int)),
    #overfitting 방지
    'min_samples_leaf'          : hp.choice('min_samples_leaf', np.arange(1, 30, dtype=int)),
    'max_depth'                 : hp.quniform('max_depth', 10, 100, 10)
}

### 최적화된 파라미터는 아래와 같습니다.

def objective(hyperparams):
    model = RandomForestClassifier(**params, **hyperparams)
    model.fit(X=X_trn, y=y_trn)
    score = accuracy_score(y_val, model.predict(X_val))

    return {'loss': -score, 'status': STATUS_OK, 'model': model}

trials = Trials()
best = fmin(fn=objective, space=space, trials=trials,
            algo=tpe.suggest, max_evals=100, verbose=1)
best

'criterion': 1,
 'max_depth': 80.0,
 'max_features': 1,
 'min_samples_leaf': 1,
 'n_estimators': 139

### 11-5) 학습 및 앙상블 파일 생성

In [27]:
cv = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=seed)

In [28]:
p_val = np.zeros((trn.shape[0], n_class))
p_tst = np.zeros((tst.shape[0], n_class))

for i, (i_trn, i_val) in enumerate(cv.split(trn, y), 1):
    print(f'training model for CV #{i}')
    clf = RandomForestClassifier(**params, criterion= 'gini',
                                 max_depth= 80.0,
                                 max_features= 'log2',
                                 min_samples_leaf= 1,
                                 n_estimators= 139)
    clf.fit(trn[i_trn], y[i_trn])
    
    p_val[i_val, :] = clf.predict_proba(trn[i_val])
    p_tst += clf.predict_proba(tst) / n_fold

training model for CV #1
training model for CV #2
training model for CV #3
training model for CV #4
training model for CV #5


In [29]:
#셔플때문에 할 때마다 정확도가 달라집니다.
#제출시 정확도는 93.3969%였습니다.
print(f'{accuracy_score(y, np.argmax(p_val, axis=1)) * 100:.4f}%')

93.3969%


In [30]:
np.savetxt(p_val_file, p_val, fmt='%.6f', delimiter=',')
np.savetxt(p_tst_file, p_tst, fmt='%.6f', delimiter=',')

## 12. NNCV

### 12-1) 경로 설정

In [31]:
feature_name = 'NNCV'
feature_file = feature_dir / f'{feature_name}.csv'

p_val_file = val_dir / f'{feature_name}.val.csv'
p_tst_file = tst_dir / f'{feature_name}.tst.csv'

### 12-2) 피쳐 열기

In [32]:
df = pd.read_csv(feature_dir / 'feature.csv', index_col=0)

### 12-3) 피쳐 분리

In [33]:
y = df[target_col].values[:320000]
df.drop(target_col, axis=1, inplace=True)
trn = df.iloc[:320000].values
tst = df.iloc[320000:].values
feature_name = df.columns.tolist()
print(y.shape, trn.shape, tst.shape)

(320000,) (320000, 23) (80000, 23)


### 12-4) 하이퍼 파라미터 튜닝

In [34]:
X_trn, X_val, y_trn, y_val = train_test_split(trn, y, test_size=.2, random_state=seed)

In [35]:
params = {
    'solver'                   : 'adam',
    'learning_rate'            : 'adaptive',
    'early_stopping'           : True,
    'n_iter_no_change'         : 10,
    'tol'                      : 1e-4,
    'shuffle'                  : True,
    'random_state'             : seed,
    'warm_start'               : False,
    'alpha'                    : 1e-4,
}

space = {
    'activation'               : hp.choice('activation', ['identity', 'logistic', 'tanh', 'relu']),
    'hidden_layer_sizes'        : hp.choice('hidden_layer_sizes', [(12, 8, 4),
                                                                (24, 16, 8), (48, 32, 16), (96, 64, 32), (100, )]),
    'learning_rate_init'       : hp.quniform('learning_rate_init', 0.001, 0.01, 0.0003),
    'max_iter'                 : hp.choice('max_iter', np.arange(200, 1000, dtype=int)),
    'momentum'                 : hp.quniform('momentum', 0.05, 0.95, 0.05),
    'nesterovs_momentum'       : hp.choice('nesterovs_momentum', [True, False]),
    'validation_fraction'      : hp.choice('validation_fraction', [.1, .2, .3]),
    'beta_1'                   : hp.quniform('beta_1', 0.8, 0.99, 0.01),
    'beta_2'                   : hp.quniform('beta_2', 0.9, 0.9999, 0.0004),
    'epsilon'                  : hp.choice('epsilon', [1e-9, 1e-8, 1e-7]),
}

In [36]:
#튜닝 코드: 이를 통해 구한 최적화된 파라미터는 아래 제시되어 있습니다.
# def objective(hyperparams):
#     model = MLPClassifier(**params, **hyperparams)
#     model.fit(X=X_trn, y=y_trn)
#     score = accuracy_score(y_val, model.predict(X_val))

#     return {'loss': -score, 'status': STATUS_OK, 'model': model}

# trials = Trials()
# best = fmin(fn=objective, space=space, trials=trials,
#             algo=tpe.suggest, max_evals=30, verbose=1)
# best

### 최적화된 파라미터는 아래와 같습니다.

'activation': 2,
 'beta_1': 0.84,
 'beta_2': 0.9388000000000001,
 'epsilon': 1,
 'hidden_layer_sizes': 3,
 'learning_rate_init': 0.0009,
 'max_iter': 92,
 'momentum': 0.7000000000000001,
 'nesterovs_momentum': 1,
 'validation_fraction': 1

### 12-5. 학습 및 앙상블 파일 생성

In [37]:
cv = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=seed)

In [38]:
p_val = np.zeros((trn.shape[0], n_class))
p_tst = np.zeros((tst.shape[0], n_class))

for i, (i_trn, i_val) in enumerate(cv.split(trn, y), 1):
    print(f'training model for CV #{i}')
    clf = MLPClassifier(**params, activation= 'tanh', beta_1 = 0.84, beta_2= 0.9388000000000001, epsilon= 1e-8, hidden_layer_sizes= (96, 64, 32), learning_rate_init= 0.0009, max_iter= 92, momentum= 0.7000000000000001, nesterovs_momentum= False, validation_fraction = 0.2)
    clf.fit(trn[i_trn], y[i_trn])
    
    p_val[i_val, :] = clf.predict_proba(trn[i_val])
    p_tst += clf.predict_proba(tst) / n_fold

training model for CV #1
training model for CV #2
training model for CV #3
training model for CV #4
training model for CV #5


In [39]:
#셔플때문에 할 때마다 정확도가 달라집니다.
#제출시 정확도는 93.0859%였습니다.
print(f'{accuracy_score(y, np.argmax(p_val, axis=1)) * 100:.4f}%')

93.1356%


In [40]:
np.savetxt(p_val_file, p_val, fmt='%.6f', delimiter=',')
np.savetxt(p_tst_file, p_tst, fmt='%.6f', delimiter=',')

## 13. LGBMCV

### 13-1) 경로 설정

In [41]:
feature_name = 'LGBMCV'
feature_file = feature_dir / f'{feature_name}.csv'

p_val_file = val_dir / f'{feature_name}.val.csv'
p_tst_file = tst_dir / f'{feature_name}.tst.csv'

### 13-2) 피쳐 열기

In [42]:
df = pd.read_csv(feature_dir / 'feature.csv', index_col=0)

### 13-3) 피쳐 분리

In [43]:
y = df[target_col].values[:320000]
df.drop(target_col, axis=1, inplace=True)
trn = df.iloc[:320000].values
tst = df.iloc[320000:].values
feature_name = df.columns.tolist()
print(y.shape, trn.shape, tst.shape)

(320000,) (320000, 23) (80000, 23)


### 13-4) 하이퍼 파라미터 튜닝

In [44]:
#하이퍼 파라미터 튜닝 코드 요

### 13-5) 학습 및 앙상블 파일 생성

In [45]:
cv = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=seed)

In [46]:
p_val = np.zeros((trn.shape[0], n_class))
p_tst = np.zeros((tst.shape[0], n_class))
for i, (i_trn, i_val) in enumerate(cv.split(trn, y), 1):
    print(f'training model for CV #{i}')
    clf = lgb.LGBMClassifier( objective='multiclass',
                        n_estimators=1000,
                         subsample_freq=1,
                         max_depth = 5,
                         bagging_fraction=0.5,
                         colsample_bytree=0.6000000000000001,
                         num_leaves=28,
                         learning_rate=0.03413112591083719,
                         min_child_samples=50,
                         subsample=.75,
                         
                         random_state=seed,
                         n_jobs=-1)
    clf.fit(trn[i_trn], y[i_trn],
            eval_set=[(trn[i_val], y[i_val])],
            eval_metric='multiclass',
            early_stopping_rounds=10)
    
    p_val[i_val, :] = clf.predict_proba(trn[i_val])
    p_tst += clf.predict_proba(tst) / n_fold

training model for CV #1
[1]	valid_0's multi_logloss: 0.944508
Training until validation scores don't improve for 10 rounds
[2]	valid_0's multi_logloss: 0.907805
[3]	valid_0's multi_logloss: 0.870278
[4]	valid_0's multi_logloss: 0.840037
[5]	valid_0's multi_logloss: 0.808393
[6]	valid_0's multi_logloss: 0.780402
[7]	valid_0's multi_logloss: 0.758798
[8]	valid_0's multi_logloss: 0.727799
[9]	valid_0's multi_logloss: 0.701214
[10]	valid_0's multi_logloss: 0.675174
[11]	valid_0's multi_logloss: 0.655044
[12]	valid_0's multi_logloss: 0.634239
[13]	valid_0's multi_logloss: 0.613636
[14]	valid_0's multi_logloss: 0.594034
[15]	valid_0's multi_logloss: 0.573818
[16]	valid_0's multi_logloss: 0.55827
[17]	valid_0's multi_logloss: 0.541173
[18]	valid_0's multi_logloss: 0.526896
[19]	valid_0's multi_logloss: 0.510885
[20]	valid_0's multi_logloss: 0.49624
[21]	valid_0's multi_logloss: 0.481554
[22]	valid_0's multi_logloss: 0.467715
[23]	valid_0's multi_logloss: 0.454476
[24]	valid_0's multi_logloss

[212]	valid_0's multi_logloss: 0.16449
[213]	valid_0's multi_logloss: 0.164425
[214]	valid_0's multi_logloss: 0.164337
[215]	valid_0's multi_logloss: 0.164288
[216]	valid_0's multi_logloss: 0.164204
[217]	valid_0's multi_logloss: 0.164126
[218]	valid_0's multi_logloss: 0.164034
[219]	valid_0's multi_logloss: 0.163936
[220]	valid_0's multi_logloss: 0.163897
[221]	valid_0's multi_logloss: 0.163835
[222]	valid_0's multi_logloss: 0.163757
[223]	valid_0's multi_logloss: 0.163706
[224]	valid_0's multi_logloss: 0.163663
[225]	valid_0's multi_logloss: 0.163583
[226]	valid_0's multi_logloss: 0.163516
[227]	valid_0's multi_logloss: 0.163479
[228]	valid_0's multi_logloss: 0.16342
[229]	valid_0's multi_logloss: 0.16338
[230]	valid_0's multi_logloss: 0.163308
[231]	valid_0's multi_logloss: 0.16326
[232]	valid_0's multi_logloss: 0.163197
[233]	valid_0's multi_logloss: 0.163117
[234]	valid_0's multi_logloss: 0.163063
[235]	valid_0's multi_logloss: 0.163043
[236]	valid_0's multi_logloss: 0.162995
[237

[417]	valid_0's multi_logloss: 0.157903
[418]	valid_0's multi_logloss: 0.157896
[419]	valid_0's multi_logloss: 0.157886
[420]	valid_0's multi_logloss: 0.157875
[421]	valid_0's multi_logloss: 0.157866
[422]	valid_0's multi_logloss: 0.157852
[423]	valid_0's multi_logloss: 0.157848
[424]	valid_0's multi_logloss: 0.157839
[425]	valid_0's multi_logloss: 0.157812
[426]	valid_0's multi_logloss: 0.157793
[427]	valid_0's multi_logloss: 0.157784
[428]	valid_0's multi_logloss: 0.157769
[429]	valid_0's multi_logloss: 0.157744
[430]	valid_0's multi_logloss: 0.157745
[431]	valid_0's multi_logloss: 0.157738
[432]	valid_0's multi_logloss: 0.157736
[433]	valid_0's multi_logloss: 0.157743
[434]	valid_0's multi_logloss: 0.157723
[435]	valid_0's multi_logloss: 0.157705
[436]	valid_0's multi_logloss: 0.157692
[437]	valid_0's multi_logloss: 0.157689
[438]	valid_0's multi_logloss: 0.157685
[439]	valid_0's multi_logloss: 0.157674
[440]	valid_0's multi_logloss: 0.157674
[441]	valid_0's multi_logloss: 0.157659


[632]	valid_0's multi_logloss: 0.156329
[633]	valid_0's multi_logloss: 0.156331
[634]	valid_0's multi_logloss: 0.156333
[635]	valid_0's multi_logloss: 0.156327
[636]	valid_0's multi_logloss: 0.156317
[637]	valid_0's multi_logloss: 0.156314
[638]	valid_0's multi_logloss: 0.156309
[639]	valid_0's multi_logloss: 0.15631
[640]	valid_0's multi_logloss: 0.156315
[641]	valid_0's multi_logloss: 0.156291
[642]	valid_0's multi_logloss: 0.156273
[643]	valid_0's multi_logloss: 0.156261
[644]	valid_0's multi_logloss: 0.156252
[645]	valid_0's multi_logloss: 0.156245
[646]	valid_0's multi_logloss: 0.156243
[647]	valid_0's multi_logloss: 0.15623
[648]	valid_0's multi_logloss: 0.156228
[649]	valid_0's multi_logloss: 0.156229
[650]	valid_0's multi_logloss: 0.156229
[651]	valid_0's multi_logloss: 0.156225
[652]	valid_0's multi_logloss: 0.156221
[653]	valid_0's multi_logloss: 0.156219
[654]	valid_0's multi_logloss: 0.156214
[655]	valid_0's multi_logloss: 0.156201
[656]	valid_0's multi_logloss: 0.156198
[6

[111]	valid_0's multi_logloss: 0.189028
[112]	valid_0's multi_logloss: 0.18834
[113]	valid_0's multi_logloss: 0.187753
[114]	valid_0's multi_logloss: 0.187234
[115]	valid_0's multi_logloss: 0.186683
[116]	valid_0's multi_logloss: 0.186158
[117]	valid_0's multi_logloss: 0.185691
[118]	valid_0's multi_logloss: 0.18527
[119]	valid_0's multi_logloss: 0.184797
[120]	valid_0's multi_logloss: 0.18439
[121]	valid_0's multi_logloss: 0.183874
[122]	valid_0's multi_logloss: 0.183307
[123]	valid_0's multi_logloss: 0.182918
[124]	valid_0's multi_logloss: 0.182524
[125]	valid_0's multi_logloss: 0.182055
[126]	valid_0's multi_logloss: 0.181749
[127]	valid_0's multi_logloss: 0.181357
[128]	valid_0's multi_logloss: 0.180919
[129]	valid_0's multi_logloss: 0.180663
[130]	valid_0's multi_logloss: 0.180228
[131]	valid_0's multi_logloss: 0.179874
[132]	valid_0's multi_logloss: 0.179634
[133]	valid_0's multi_logloss: 0.17931
[134]	valid_0's multi_logloss: 0.178976
[135]	valid_0's multi_logloss: 0.178641
[136

[322]	valid_0's multi_logloss: 0.160653
[323]	valid_0's multi_logloss: 0.160635
[324]	valid_0's multi_logloss: 0.160595
[325]	valid_0's multi_logloss: 0.160579
[326]	valid_0's multi_logloss: 0.160557
[327]	valid_0's multi_logloss: 0.160532
[328]	valid_0's multi_logloss: 0.160516
[329]	valid_0's multi_logloss: 0.160487
[330]	valid_0's multi_logloss: 0.160461
[331]	valid_0's multi_logloss: 0.160439
[332]	valid_0's multi_logloss: 0.160439
[333]	valid_0's multi_logloss: 0.160432
[334]	valid_0's multi_logloss: 0.160393
[335]	valid_0's multi_logloss: 0.160375
[336]	valid_0's multi_logloss: 0.160319
[337]	valid_0's multi_logloss: 0.160281
[338]	valid_0's multi_logloss: 0.160275
[339]	valid_0's multi_logloss: 0.160262
[340]	valid_0's multi_logloss: 0.16025
[341]	valid_0's multi_logloss: 0.160241
[342]	valid_0's multi_logloss: 0.160231
[343]	valid_0's multi_logloss: 0.160196
[344]	valid_0's multi_logloss: 0.160177
[345]	valid_0's multi_logloss: 0.160123
[346]	valid_0's multi_logloss: 0.160106
[

[530]	valid_0's multi_logloss: 0.157933
[531]	valid_0's multi_logloss: 0.157929
[532]	valid_0's multi_logloss: 0.157915
[533]	valid_0's multi_logloss: 0.157919
[534]	valid_0's multi_logloss: 0.157913
[535]	valid_0's multi_logloss: 0.157923
[536]	valid_0's multi_logloss: 0.157907
[537]	valid_0's multi_logloss: 0.157904
[538]	valid_0's multi_logloss: 0.1579
[539]	valid_0's multi_logloss: 0.157895
[540]	valid_0's multi_logloss: 0.157879
[541]	valid_0's multi_logloss: 0.157875
[542]	valid_0's multi_logloss: 0.157877
[543]	valid_0's multi_logloss: 0.157868
[544]	valid_0's multi_logloss: 0.157855
[545]	valid_0's multi_logloss: 0.157834
[546]	valid_0's multi_logloss: 0.157833
[547]	valid_0's multi_logloss: 0.157832
[548]	valid_0's multi_logloss: 0.157831
[549]	valid_0's multi_logloss: 0.157824
[550]	valid_0's multi_logloss: 0.157814
[551]	valid_0's multi_logloss: 0.157808
[552]	valid_0's multi_logloss: 0.157802
[553]	valid_0's multi_logloss: 0.157785
[554]	valid_0's multi_logloss: 0.157784
[5

[742]	valid_0's multi_logloss: 0.157151
[743]	valid_0's multi_logloss: 0.157153
[744]	valid_0's multi_logloss: 0.157149
[745]	valid_0's multi_logloss: 0.157147
[746]	valid_0's multi_logloss: 0.15714
[747]	valid_0's multi_logloss: 0.157138
[748]	valid_0's multi_logloss: 0.157136
[749]	valid_0's multi_logloss: 0.157131
[750]	valid_0's multi_logloss: 0.157119
[751]	valid_0's multi_logloss: 0.157113
[752]	valid_0's multi_logloss: 0.157109
[753]	valid_0's multi_logloss: 0.157116
[754]	valid_0's multi_logloss: 0.157111
[755]	valid_0's multi_logloss: 0.157109
[756]	valid_0's multi_logloss: 0.157111
[757]	valid_0's multi_logloss: 0.157117
[758]	valid_0's multi_logloss: 0.157115
[759]	valid_0's multi_logloss: 0.157119
[760]	valid_0's multi_logloss: 0.157113
[761]	valid_0's multi_logloss: 0.157113
[762]	valid_0's multi_logloss: 0.157113
Early stopping, best iteration is:
[752]	valid_0's multi_logloss: 0.157109
training model for CV #3
[1]	valid_0's multi_logloss: 0.9446
Training until validation

[182]	valid_0's multi_logloss: 0.17037
[183]	valid_0's multi_logloss: 0.170258
[184]	valid_0's multi_logloss: 0.170123
[185]	valid_0's multi_logloss: 0.169984
[186]	valid_0's multi_logloss: 0.169833
[187]	valid_0's multi_logloss: 0.16971
[188]	valid_0's multi_logloss: 0.16959
[189]	valid_0's multi_logloss: 0.169515
[190]	valid_0's multi_logloss: 0.169388
[191]	valid_0's multi_logloss: 0.169252
[192]	valid_0's multi_logloss: 0.169122
[193]	valid_0's multi_logloss: 0.168986
[194]	valid_0's multi_logloss: 0.168894
[195]	valid_0's multi_logloss: 0.168766
[196]	valid_0's multi_logloss: 0.168659
[197]	valid_0's multi_logloss: 0.168558
[198]	valid_0's multi_logloss: 0.168464
[199]	valid_0's multi_logloss: 0.168367
[200]	valid_0's multi_logloss: 0.1683
[201]	valid_0's multi_logloss: 0.168176
[202]	valid_0's multi_logloss: 0.168116
[203]	valid_0's multi_logloss: 0.168002
[204]	valid_0's multi_logloss: 0.16788
[205]	valid_0's multi_logloss: 0.167779
[206]	valid_0's multi_logloss: 0.167699
[207]	

[387]	valid_0's multi_logloss: 0.160313
[388]	valid_0's multi_logloss: 0.160298
[389]	valid_0's multi_logloss: 0.160285
[390]	valid_0's multi_logloss: 0.160269
[391]	valid_0's multi_logloss: 0.160252
[392]	valid_0's multi_logloss: 0.160232
[393]	valid_0's multi_logloss: 0.160217
[394]	valid_0's multi_logloss: 0.1602
[395]	valid_0's multi_logloss: 0.160191
[396]	valid_0's multi_logloss: 0.160186
[397]	valid_0's multi_logloss: 0.160178
[398]	valid_0's multi_logloss: 0.160151
[399]	valid_0's multi_logloss: 0.16015
[400]	valid_0's multi_logloss: 0.160125
[401]	valid_0's multi_logloss: 0.160115
[402]	valid_0's multi_logloss: 0.160099
[403]	valid_0's multi_logloss: 0.160092
[404]	valid_0's multi_logloss: 0.16008
[405]	valid_0's multi_logloss: 0.160076
[406]	valid_0's multi_logloss: 0.160065
[407]	valid_0's multi_logloss: 0.160044
[408]	valid_0's multi_logloss: 0.160032
[409]	valid_0's multi_logloss: 0.160014
[410]	valid_0's multi_logloss: 0.160013
[411]	valid_0's multi_logloss: 0.160006
[412

[594]	valid_0's multi_logloss: 0.158678
[595]	valid_0's multi_logloss: 0.158687
[596]	valid_0's multi_logloss: 0.158687
[597]	valid_0's multi_logloss: 0.158689
[598]	valid_0's multi_logloss: 0.158689
[599]	valid_0's multi_logloss: 0.158673
[600]	valid_0's multi_logloss: 0.158664
[601]	valid_0's multi_logloss: 0.15866
[602]	valid_0's multi_logloss: 0.158656
[603]	valid_0's multi_logloss: 0.158644
[604]	valid_0's multi_logloss: 0.158629
[605]	valid_0's multi_logloss: 0.158619
[606]	valid_0's multi_logloss: 0.158617
[607]	valid_0's multi_logloss: 0.158612
[608]	valid_0's multi_logloss: 0.158601
[609]	valid_0's multi_logloss: 0.158596
[610]	valid_0's multi_logloss: 0.158602
[611]	valid_0's multi_logloss: 0.158596
[612]	valid_0's multi_logloss: 0.15859
[613]	valid_0's multi_logloss: 0.158594
[614]	valid_0's multi_logloss: 0.158588
[615]	valid_0's multi_logloss: 0.158587
[616]	valid_0's multi_logloss: 0.158594
[617]	valid_0's multi_logloss: 0.158591
[618]	valid_0's multi_logloss: 0.15859
[61

[55]	valid_0's multi_logloss: 0.265609
[56]	valid_0's multi_logloss: 0.262274
[57]	valid_0's multi_logloss: 0.259232
[58]	valid_0's multi_logloss: 0.256158
[59]	valid_0's multi_logloss: 0.254009
[60]	valid_0's multi_logloss: 0.251554
[61]	valid_0's multi_logloss: 0.249092
[62]	valid_0's multi_logloss: 0.247582
[63]	valid_0's multi_logloss: 0.245615
[64]	valid_0's multi_logloss: 0.243557
[65]	valid_0's multi_logloss: 0.241066
[66]	valid_0's multi_logloss: 0.238738
[67]	valid_0's multi_logloss: 0.237575
[68]	valid_0's multi_logloss: 0.236236
[69]	valid_0's multi_logloss: 0.234383
[70]	valid_0's multi_logloss: 0.232481
[71]	valid_0's multi_logloss: 0.230342
[72]	valid_0's multi_logloss: 0.228406
[73]	valid_0's multi_logloss: 0.226519
[74]	valid_0's multi_logloss: 0.224991
[75]	valid_0's multi_logloss: 0.223579
[76]	valid_0's multi_logloss: 0.221788
[77]	valid_0's multi_logloss: 0.220435
[78]	valid_0's multi_logloss: 0.218828
[79]	valid_0's multi_logloss: 0.217378
[80]	valid_0's multi_logl

[264]	valid_0's multi_logloss: 0.162598
[265]	valid_0's multi_logloss: 0.162566
[266]	valid_0's multi_logloss: 0.162495
[267]	valid_0's multi_logloss: 0.162464
[268]	valid_0's multi_logloss: 0.162444
[269]	valid_0's multi_logloss: 0.162388
[270]	valid_0's multi_logloss: 0.162317
[271]	valid_0's multi_logloss: 0.162278
[272]	valid_0's multi_logloss: 0.162221
[273]	valid_0's multi_logloss: 0.162202
[274]	valid_0's multi_logloss: 0.162174
[275]	valid_0's multi_logloss: 0.162156
[276]	valid_0's multi_logloss: 0.162152
[277]	valid_0's multi_logloss: 0.162084
[278]	valid_0's multi_logloss: 0.162045
[279]	valid_0's multi_logloss: 0.162025
[280]	valid_0's multi_logloss: 0.161987
[281]	valid_0's multi_logloss: 0.161958
[282]	valid_0's multi_logloss: 0.161923
[283]	valid_0's multi_logloss: 0.161905
[284]	valid_0's multi_logloss: 0.161859
[285]	valid_0's multi_logloss: 0.161836
[286]	valid_0's multi_logloss: 0.161795
[287]	valid_0's multi_logloss: 0.161737
[288]	valid_0's multi_logloss: 0.161707


[477]	valid_0's multi_logloss: 0.158373
[478]	valid_0's multi_logloss: 0.158367
[479]	valid_0's multi_logloss: 0.158352
[480]	valid_0's multi_logloss: 0.158339
[481]	valid_0's multi_logloss: 0.158331
[482]	valid_0's multi_logloss: 0.15832
[483]	valid_0's multi_logloss: 0.158306
[484]	valid_0's multi_logloss: 0.158294
[485]	valid_0's multi_logloss: 0.15828
[486]	valid_0's multi_logloss: 0.158275
[487]	valid_0's multi_logloss: 0.158276
[488]	valid_0's multi_logloss: 0.158258
[489]	valid_0's multi_logloss: 0.15826
[490]	valid_0's multi_logloss: 0.158254
[491]	valid_0's multi_logloss: 0.158247
[492]	valid_0's multi_logloss: 0.158244
[493]	valid_0's multi_logloss: 0.158229
[494]	valid_0's multi_logloss: 0.158219
[495]	valid_0's multi_logloss: 0.158211
[496]	valid_0's multi_logloss: 0.158204
[497]	valid_0's multi_logloss: 0.158195
[498]	valid_0's multi_logloss: 0.158189
[499]	valid_0's multi_logloss: 0.15819
[500]	valid_0's multi_logloss: 0.158181
[501]	valid_0's multi_logloss: 0.158161
[502

[684]	valid_0's multi_logloss: 0.157212
[685]	valid_0's multi_logloss: 0.157208
[686]	valid_0's multi_logloss: 0.157203
[687]	valid_0's multi_logloss: 0.157192
[688]	valid_0's multi_logloss: 0.15719
[689]	valid_0's multi_logloss: 0.15719
[690]	valid_0's multi_logloss: 0.157193
[691]	valid_0's multi_logloss: 0.15718
[692]	valid_0's multi_logloss: 0.157181
[693]	valid_0's multi_logloss: 0.15719
[694]	valid_0's multi_logloss: 0.157187
[695]	valid_0's multi_logloss: 0.157181
[696]	valid_0's multi_logloss: 0.157179
[697]	valid_0's multi_logloss: 0.157169
[698]	valid_0's multi_logloss: 0.157155
[699]	valid_0's multi_logloss: 0.157148
[700]	valid_0's multi_logloss: 0.157141
[701]	valid_0's multi_logloss: 0.157143
[702]	valid_0's multi_logloss: 0.157136
[703]	valid_0's multi_logloss: 0.157127
[704]	valid_0's multi_logloss: 0.157136
[705]	valid_0's multi_logloss: 0.157134
[706]	valid_0's multi_logloss: 0.157144
[707]	valid_0's multi_logloss: 0.15714
[708]	valid_0's multi_logloss: 0.157124
[709]

[121]	valid_0's multi_logloss: 0.182643
[122]	valid_0's multi_logloss: 0.182102
[123]	valid_0's multi_logloss: 0.181767
[124]	valid_0's multi_logloss: 0.181375
[125]	valid_0's multi_logloss: 0.180969
[126]	valid_0's multi_logloss: 0.180605
[127]	valid_0's multi_logloss: 0.180131
[128]	valid_0's multi_logloss: 0.179731
[129]	valid_0's multi_logloss: 0.179477
[130]	valid_0's multi_logloss: 0.179064
[131]	valid_0's multi_logloss: 0.178709
[132]	valid_0's multi_logloss: 0.178432
[133]	valid_0's multi_logloss: 0.178072
[134]	valid_0's multi_logloss: 0.177805
[135]	valid_0's multi_logloss: 0.177447
[136]	valid_0's multi_logloss: 0.177172
[137]	valid_0's multi_logloss: 0.176847
[138]	valid_0's multi_logloss: 0.176504
[139]	valid_0's multi_logloss: 0.176274
[140]	valid_0's multi_logloss: 0.176009
[141]	valid_0's multi_logloss: 0.175789
[142]	valid_0's multi_logloss: 0.175528
[143]	valid_0's multi_logloss: 0.17523
[144]	valid_0's multi_logloss: 0.174996
[145]	valid_0's multi_logloss: 0.174797
[

[336]	valid_0's multi_logloss: 0.158864
[337]	valid_0's multi_logloss: 0.158844
[338]	valid_0's multi_logloss: 0.15884
[339]	valid_0's multi_logloss: 0.158823
[340]	valid_0's multi_logloss: 0.158811
[341]	valid_0's multi_logloss: 0.158803
[342]	valid_0's multi_logloss: 0.158774
[343]	valid_0's multi_logloss: 0.158759
[344]	valid_0's multi_logloss: 0.158749
[345]	valid_0's multi_logloss: 0.158699
[346]	valid_0's multi_logloss: 0.158673
[347]	valid_0's multi_logloss: 0.158663
[348]	valid_0's multi_logloss: 0.158636
[349]	valid_0's multi_logloss: 0.158632
[350]	valid_0's multi_logloss: 0.158613
[351]	valid_0's multi_logloss: 0.158581
[352]	valid_0's multi_logloss: 0.15857
[353]	valid_0's multi_logloss: 0.158557
[354]	valid_0's multi_logloss: 0.158506
[355]	valid_0's multi_logloss: 0.158476
[356]	valid_0's multi_logloss: 0.15847
[357]	valid_0's multi_logloss: 0.158468
[358]	valid_0's multi_logloss: 0.158448
[359]	valid_0's multi_logloss: 0.158423
[360]	valid_0's multi_logloss: 0.158401
[36

[543]	valid_0's multi_logloss: 0.156611
[544]	valid_0's multi_logloss: 0.156601
[545]	valid_0's multi_logloss: 0.156599
[546]	valid_0's multi_logloss: 0.156594
[547]	valid_0's multi_logloss: 0.156592
[548]	valid_0's multi_logloss: 0.156586
[549]	valid_0's multi_logloss: 0.15658
[550]	valid_0's multi_logloss: 0.156578
[551]	valid_0's multi_logloss: 0.15658
[552]	valid_0's multi_logloss: 0.156564
[553]	valid_0's multi_logloss: 0.156561
[554]	valid_0's multi_logloss: 0.156557
[555]	valid_0's multi_logloss: 0.156555
[556]	valid_0's multi_logloss: 0.156548
[557]	valid_0's multi_logloss: 0.156536
[558]	valid_0's multi_logloss: 0.156532
[559]	valid_0's multi_logloss: 0.156529
[560]	valid_0's multi_logloss: 0.156528
[561]	valid_0's multi_logloss: 0.156519
[562]	valid_0's multi_logloss: 0.156509
[563]	valid_0's multi_logloss: 0.15651
[564]	valid_0's multi_logloss: 0.156499
[565]	valid_0's multi_logloss: 0.156496
[566]	valid_0's multi_logloss: 0.15649
[567]	valid_0's multi_logloss: 0.156486
[568

[751]	valid_0's multi_logloss: 0.155879
[752]	valid_0's multi_logloss: 0.155882
[753]	valid_0's multi_logloss: 0.155881
[754]	valid_0's multi_logloss: 0.155875
[755]	valid_0's multi_logloss: 0.155865
[756]	valid_0's multi_logloss: 0.155869
[757]	valid_0's multi_logloss: 0.155862
[758]	valid_0's multi_logloss: 0.155857
[759]	valid_0's multi_logloss: 0.155854
[760]	valid_0's multi_logloss: 0.155852
[761]	valid_0's multi_logloss: 0.155852
[762]	valid_0's multi_logloss: 0.15585
[763]	valid_0's multi_logloss: 0.155846
[764]	valid_0's multi_logloss: 0.155832
[765]	valid_0's multi_logloss: 0.155843
[766]	valid_0's multi_logloss: 0.15585
[767]	valid_0's multi_logloss: 0.155849
[768]	valid_0's multi_logloss: 0.155844
[769]	valid_0's multi_logloss: 0.155851
[770]	valid_0's multi_logloss: 0.155847
[771]	valid_0's multi_logloss: 0.155837
[772]	valid_0's multi_logloss: 0.155837
[773]	valid_0's multi_logloss: 0.155835
[774]	valid_0's multi_logloss: 0.155825
[775]	valid_0's multi_logloss: 0.155836
[7

In [47]:
#셔플때문에 할 때마다 정확도가 달라집니다.
#제출시 정확도는 93.3934%였습니다.
print(f'{accuracy_score(y, np.argmax(p_val, axis=1)) * 100:.4f}%')

93.4144%


In [48]:
np.savetxt(p_val_file, p_val, fmt='%.6f', delimiter=',')
np.savetxt(p_tst_file, p_tst, fmt='%.6f', delimiter=',')

## 14. 최종 Staking

### 14-1) 경로 설정

In [49]:
feature_name = 'final'
feature_file = feature_dir / f'{feature_name}.csv'

p_val_file = val_dir / f'{feature_name}.val.csv'
p_tst_file = tst_dir / f'{feature_name}.tst.csv'
sub_file = sub_dir / f'{feature_name}.csv'

### 14-2) 스태킹

In [51]:
model_names = ['polyLRCV', 'RFCV', 'LGBMCV','NNCV']
trn = []
tst = []
feature_names = []
for model in model_names:
    trn.append(np.loadtxt(val_dir / f'{model}.val.csv', delimiter=','))
    tst.append(np.loadtxt(tst_dir / f'{model}.tst.csv', delimiter=','))
    feature_names += [f'{model}_class0', f'{model}_class1', f'{model}_class2']
    
trn = np.hstack(trn)
tst = np.hstack(tst)
feature_names

['polyLRCV_class0',
 'polyLRCV_class1',
 'polyLRCV_class2',
 'RFCV_class0',
 'RFCV_class1',
 'RFCV_class2',
 'LGBMCV_class0',
 'LGBMCV_class1',
 'LGBMCV_class2',
 'NNCV_class0',
 'NNCV_class1',
 'NNCV_class2']

In [52]:
y = pd.read_csv(trn_file, index_col=0, usecols=['id', target_col]).values.flatten()

In [53]:
X_trn, X_val, y_trn, y_val = train_test_split(trn, y, test_size=.2, random_state=seed)

### 14-3) 하이퍼 파라미터 튜닝

In [54]:
# params = {
#     'objective'             : 'multiclass', 
#     'n_estimators'          : 1000,
#     'subsample_freq'        : 1,
#     'random_state'          : 42,
#     'n_jobs'                : -1,
#     'bagging_fraction'      : 0.5,
# }

# space = {
#     "learning_rate": hp.loguniform("learning_rate", np.log(0.01), np.log(0.3)),
#     "num_leaves": hp.choice("num_leaves", [15, 31, 63, 127]),
#     "colsample_bytree": hp.quniform("colsample_bytree", .5, .9, 0.1),
#     "subsample": hp.quniform("subsample", .5, .9, 0.1),
#     "min_child_samples": hp.choice('min_child_samples', [10, 25, 50, 100])
#  }

In [55]:
# def objective(hyperparams):
#     model = lgb.LGBMClassifier(**params, **hyperparams)
#     model.fit(X=X_trn, y=y_trn,
#               eval_set=[(X_val, y_val)],
#               eval_metric="multi_logloss",
#               early_stopping_rounds=10,
#               verbose=False)
#     score = model.best_score_["valid_0"]["multi_logloss"]

#     return {'loss': score, 'status': STATUS_OK, 'model': model}

# trials = Trials()
# best = fmin(fn=objective, space=space, trials=trials,
#             algo=tpe.suggest, max_evals=10, verbose=1)

# hyperparams = space_eval(space, best)
# n_best = trials.best_trial['result']['model'].best_iteration_

### 최적의 파라미터는 아래와 같습니다.

params = {
    'objective'             : 'multiclass', 
    'n_estimators'          : 1000,
    'subsample_freq'        : 1,
    'random_state'          : 42,
    'n_jobs'                : -1,
    'bagging_fraction'      : 0.5,
    
    'colsample_bytree'      : 0.8,
    'learning_rate'         : 0.013114188498150362,
    'min_child_samples'     : 50,
    'num_leaves'            : 15,
    'subsample'             : 0.7000000000000001
}

### 14-4) 최종 학습

In [56]:
cv = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=seed)

In [57]:
p_val = np.zeros((trn.shape[0], n_class))
p_tst = np.zeros((tst.shape[0], n_class))
for i, (i_trn, i_val) in enumerate(cv.split(trn, y), 1):
    print(f'training model for CV #{i}')
    clf = lgb.LGBMClassifier(
                            objective             = 'multiclass', 
                            n_estimators          = 1000,
                            subsample_freq        = 1,
                            random_state          = 42,
                            n_jobs                = -1,
                            bagging_fraction      = 0.5,

                            colsample_bytree      = 0.8,
                            learning_rate         = 0.013114188498150362,
                            min_child_samples     = 50,
                            num_leaves            = 15,
                            subsample             = 0.7000000000000001)
    
    clf.fit(trn[i_trn], y[i_trn],
            eval_set=[(trn[i_val], y[i_val])],
            eval_metric='multiclass',
            early_stopping_rounds=10)
    
    p_val[i_val, :] = clf.predict_proba(trn[i_val])
    p_tst += clf.predict_proba(tst) / n_fold

training model for CV #1
[1]	valid_0's multi_logloss: 0.965062
Training until validation scores don't improve for 10 rounds
[2]	valid_0's multi_logloss: 0.944928
[3]	valid_0's multi_logloss: 0.925555
[4]	valid_0's multi_logloss: 0.906825
[5]	valid_0's multi_logloss: 0.888784
[6]	valid_0's multi_logloss: 0.871354
[7]	valid_0's multi_logloss: 0.854499
[8]	valid_0's multi_logloss: 0.838194
[9]	valid_0's multi_logloss: 0.82238
[10]	valid_0's multi_logloss: 0.807088
[11]	valid_0's multi_logloss: 0.792253
[12]	valid_0's multi_logloss: 0.777868
[13]	valid_0's multi_logloss: 0.763924
[14]	valid_0's multi_logloss: 0.750368
[15]	valid_0's multi_logloss: 0.737223
[16]	valid_0's multi_logloss: 0.724441
[17]	valid_0's multi_logloss: 0.71201
[18]	valid_0's multi_logloss: 0.699928
[19]	valid_0's multi_logloss: 0.688169
[20]	valid_0's multi_logloss: 0.676717
[21]	valid_0's multi_logloss: 0.665586
[22]	valid_0's multi_logloss: 0.654753
[23]	valid_0's multi_logloss: 0.644196
[24]	valid_0's multi_logloss

[213]	valid_0's multi_logloss: 0.170803
[214]	valid_0's multi_logloss: 0.170524
[215]	valid_0's multi_logloss: 0.170247
[216]	valid_0's multi_logloss: 0.169974
[217]	valid_0's multi_logloss: 0.169708
[218]	valid_0's multi_logloss: 0.169446
[219]	valid_0's multi_logloss: 0.169186
[220]	valid_0's multi_logloss: 0.16893
[221]	valid_0's multi_logloss: 0.168682
[222]	valid_0's multi_logloss: 0.168436
[223]	valid_0's multi_logloss: 0.168194
[224]	valid_0's multi_logloss: 0.167957
[225]	valid_0's multi_logloss: 0.16772
[226]	valid_0's multi_logloss: 0.167486
[227]	valid_0's multi_logloss: 0.167258
[228]	valid_0's multi_logloss: 0.167038
[229]	valid_0's multi_logloss: 0.166817
[230]	valid_0's multi_logloss: 0.166602
[231]	valid_0's multi_logloss: 0.166385
[232]	valid_0's multi_logloss: 0.166168
[233]	valid_0's multi_logloss: 0.16596
[234]	valid_0's multi_logloss: 0.165757
[235]	valid_0's multi_logloss: 0.165553
[236]	valid_0's multi_logloss: 0.165353
[237]	valid_0's multi_logloss: 0.165159
[23

[423]	valid_0's multi_logloss: 0.15234
[424]	valid_0's multi_logloss: 0.152319
[425]	valid_0's multi_logloss: 0.152304
[426]	valid_0's multi_logloss: 0.15229
[427]	valid_0's multi_logloss: 0.152274
[428]	valid_0's multi_logloss: 0.152258
[429]	valid_0's multi_logloss: 0.152242
[430]	valid_0's multi_logloss: 0.152227
[431]	valid_0's multi_logloss: 0.152212
[432]	valid_0's multi_logloss: 0.152196
[433]	valid_0's multi_logloss: 0.152177
[434]	valid_0's multi_logloss: 0.15216
[435]	valid_0's multi_logloss: 0.152146
[436]	valid_0's multi_logloss: 0.152132
[437]	valid_0's multi_logloss: 0.152117
[438]	valid_0's multi_logloss: 0.152104
[439]	valid_0's multi_logloss: 0.152091
[440]	valid_0's multi_logloss: 0.152076
[441]	valid_0's multi_logloss: 0.15206
[442]	valid_0's multi_logloss: 0.152044
[443]	valid_0's multi_logloss: 0.152035
[444]	valid_0's multi_logloss: 0.152022
[445]	valid_0's multi_logloss: 0.15201
[446]	valid_0's multi_logloss: 0.151995
[447]	valid_0's multi_logloss: 0.151982
[448]

[639]	valid_0's multi_logloss: 0.151019
[640]	valid_0's multi_logloss: 0.151017
[641]	valid_0's multi_logloss: 0.151016
[642]	valid_0's multi_logloss: 0.151013
[643]	valid_0's multi_logloss: 0.151008
[644]	valid_0's multi_logloss: 0.151003
[645]	valid_0's multi_logloss: 0.151002
[646]	valid_0's multi_logloss: 0.151001
[647]	valid_0's multi_logloss: 0.151001
[648]	valid_0's multi_logloss: 0.151
[649]	valid_0's multi_logloss: 0.150998
[650]	valid_0's multi_logloss: 0.150998
[651]	valid_0's multi_logloss: 0.150999
[652]	valid_0's multi_logloss: 0.151
[653]	valid_0's multi_logloss: 0.150997
[654]	valid_0's multi_logloss: 0.150996
[655]	valid_0's multi_logloss: 0.150996
[656]	valid_0's multi_logloss: 0.150995
[657]	valid_0's multi_logloss: 0.150996
[658]	valid_0's multi_logloss: 0.150996
[659]	valid_0's multi_logloss: 0.150992
[660]	valid_0's multi_logloss: 0.150989
[661]	valid_0's multi_logloss: 0.150988
[662]	valid_0's multi_logloss: 0.150985
[663]	valid_0's multi_logloss: 0.150982
[664]	

[159]	valid_0's multi_logloss: 0.196505
[160]	valid_0's multi_logloss: 0.195818
[161]	valid_0's multi_logloss: 0.195146
[162]	valid_0's multi_logloss: 0.194482
[163]	valid_0's multi_logloss: 0.193829
[164]	valid_0's multi_logloss: 0.193192
[165]	valid_0's multi_logloss: 0.192559
[166]	valid_0's multi_logloss: 0.191943
[167]	valid_0's multi_logloss: 0.191339
[168]	valid_0's multi_logloss: 0.190745
[169]	valid_0's multi_logloss: 0.190159
[170]	valid_0's multi_logloss: 0.189582
[171]	valid_0's multi_logloss: 0.189016
[172]	valid_0's multi_logloss: 0.188462
[173]	valid_0's multi_logloss: 0.187914
[174]	valid_0's multi_logloss: 0.187375
[175]	valid_0's multi_logloss: 0.186849
[176]	valid_0's multi_logloss: 0.18633
[177]	valid_0's multi_logloss: 0.18582
[178]	valid_0's multi_logloss: 0.185314
[179]	valid_0's multi_logloss: 0.184814
[180]	valid_0's multi_logloss: 0.184326
[181]	valid_0's multi_logloss: 0.183847
[182]	valid_0's multi_logloss: 0.183379
[183]	valid_0's multi_logloss: 0.182914
[1

[373]	valid_0's multi_logloss: 0.154557
[374]	valid_0's multi_logloss: 0.154526
[375]	valid_0's multi_logloss: 0.154495
[376]	valid_0's multi_logloss: 0.154466
[377]	valid_0's multi_logloss: 0.154436
[378]	valid_0's multi_logloss: 0.154405
[379]	valid_0's multi_logloss: 0.154376
[380]	valid_0's multi_logloss: 0.154345
[381]	valid_0's multi_logloss: 0.154318
[382]	valid_0's multi_logloss: 0.154293
[383]	valid_0's multi_logloss: 0.154266
[384]	valid_0's multi_logloss: 0.154241
[385]	valid_0's multi_logloss: 0.154214
[386]	valid_0's multi_logloss: 0.154189
[387]	valid_0's multi_logloss: 0.154164
[388]	valid_0's multi_logloss: 0.154136
[389]	valid_0's multi_logloss: 0.154111
[390]	valid_0's multi_logloss: 0.154084
[391]	valid_0's multi_logloss: 0.15406
[392]	valid_0's multi_logloss: 0.154036
[393]	valid_0's multi_logloss: 0.154006
[394]	valid_0's multi_logloss: 0.153979
[395]	valid_0's multi_logloss: 0.153953
[396]	valid_0's multi_logloss: 0.15393
[397]	valid_0's multi_logloss: 0.153908
[3

[583]	valid_0's multi_logloss: 0.152026
[584]	valid_0's multi_logloss: 0.152021
[585]	valid_0's multi_logloss: 0.152017
[586]	valid_0's multi_logloss: 0.152018
[587]	valid_0's multi_logloss: 0.152017
[588]	valid_0's multi_logloss: 0.152014
[589]	valid_0's multi_logloss: 0.15201
[590]	valid_0's multi_logloss: 0.15201
[591]	valid_0's multi_logloss: 0.152007
[592]	valid_0's multi_logloss: 0.152002
[593]	valid_0's multi_logloss: 0.151999
[594]	valid_0's multi_logloss: 0.151998
[595]	valid_0's multi_logloss: 0.151995
[596]	valid_0's multi_logloss: 0.151992
[597]	valid_0's multi_logloss: 0.15199
[598]	valid_0's multi_logloss: 0.151988
[599]	valid_0's multi_logloss: 0.151983
[600]	valid_0's multi_logloss: 0.151981
[601]	valid_0's multi_logloss: 0.151977
[602]	valid_0's multi_logloss: 0.151977
[603]	valid_0's multi_logloss: 0.151975
[604]	valid_0's multi_logloss: 0.151971
[605]	valid_0's multi_logloss: 0.151967
[606]	valid_0's multi_logloss: 0.151962
[607]	valid_0's multi_logloss: 0.151958
[60

[64]	valid_0's multi_logloss: 0.373356
[65]	valid_0's multi_logloss: 0.369326
[66]	valid_0's multi_logloss: 0.365385
[67]	valid_0's multi_logloss: 0.361517
[68]	valid_0's multi_logloss: 0.35773
[69]	valid_0's multi_logloss: 0.354013
[70]	valid_0's multi_logloss: 0.350384
[71]	valid_0's multi_logloss: 0.34682
[72]	valid_0's multi_logloss: 0.343326
[73]	valid_0's multi_logloss: 0.339907
[74]	valid_0's multi_logloss: 0.336556
[75]	valid_0's multi_logloss: 0.333278
[76]	valid_0's multi_logloss: 0.330059
[77]	valid_0's multi_logloss: 0.326912
[78]	valid_0's multi_logloss: 0.323812
[79]	valid_0's multi_logloss: 0.320781
[80]	valid_0's multi_logloss: 0.317813
[81]	valid_0's multi_logloss: 0.314897
[82]	valid_0's multi_logloss: 0.312045
[83]	valid_0's multi_logloss: 0.309244
[84]	valid_0's multi_logloss: 0.3065
[85]	valid_0's multi_logloss: 0.30381
[86]	valid_0's multi_logloss: 0.301173
[87]	valid_0's multi_logloss: 0.298583
[88]	valid_0's multi_logloss: 0.29605
[89]	valid_0's multi_logloss: 0

[281]	valid_0's multi_logloss: 0.161051
[282]	valid_0's multi_logloss: 0.160951
[283]	valid_0's multi_logloss: 0.160854
[284]	valid_0's multi_logloss: 0.160754
[285]	valid_0's multi_logloss: 0.160657
[286]	valid_0's multi_logloss: 0.160562
[287]	valid_0's multi_logloss: 0.160465
[288]	valid_0's multi_logloss: 0.160368
[289]	valid_0's multi_logloss: 0.160276
[290]	valid_0's multi_logloss: 0.160183
[291]	valid_0's multi_logloss: 0.160092
[292]	valid_0's multi_logloss: 0.160006
[293]	valid_0's multi_logloss: 0.159909
[294]	valid_0's multi_logloss: 0.159822
[295]	valid_0's multi_logloss: 0.159739
[296]	valid_0's multi_logloss: 0.159655
[297]	valid_0's multi_logloss: 0.159574
[298]	valid_0's multi_logloss: 0.159492
[299]	valid_0's multi_logloss: 0.159417
[300]	valid_0's multi_logloss: 0.159341
[301]	valid_0's multi_logloss: 0.159263
[302]	valid_0's multi_logloss: 0.159189
[303]	valid_0's multi_logloss: 0.159113
[304]	valid_0's multi_logloss: 0.159037
[305]	valid_0's multi_logloss: 0.158966


[497]	valid_0's multi_logloss: 0.153656
[498]	valid_0's multi_logloss: 0.15365
[499]	valid_0's multi_logloss: 0.153642
[500]	valid_0's multi_logloss: 0.153634
[501]	valid_0's multi_logloss: 0.153624
[502]	valid_0's multi_logloss: 0.153617
[503]	valid_0's multi_logloss: 0.153609
[504]	valid_0's multi_logloss: 0.1536
[505]	valid_0's multi_logloss: 0.15359
[506]	valid_0's multi_logloss: 0.153582
[507]	valid_0's multi_logloss: 0.153577
[508]	valid_0's multi_logloss: 0.153574
[509]	valid_0's multi_logloss: 0.15357
[510]	valid_0's multi_logloss: 0.153566
[511]	valid_0's multi_logloss: 0.153558
[512]	valid_0's multi_logloss: 0.153553
[513]	valid_0's multi_logloss: 0.153548
[514]	valid_0's multi_logloss: 0.153541
[515]	valid_0's multi_logloss: 0.153535
[516]	valid_0's multi_logloss: 0.153531
[517]	valid_0's multi_logloss: 0.153528
[518]	valid_0's multi_logloss: 0.153525
[519]	valid_0's multi_logloss: 0.153519
[520]	valid_0's multi_logloss: 0.153507
[521]	valid_0's multi_logloss: 0.153499
[522]

[14]	valid_0's multi_logloss: 0.750905
[15]	valid_0's multi_logloss: 0.737791
[16]	valid_0's multi_logloss: 0.72504
[17]	valid_0's multi_logloss: 0.712637
[18]	valid_0's multi_logloss: 0.700559
[19]	valid_0's multi_logloss: 0.688804
[20]	valid_0's multi_logloss: 0.677399
[21]	valid_0's multi_logloss: 0.666262
[22]	valid_0's multi_logloss: 0.655419
[23]	valid_0's multi_logloss: 0.644874
[24]	valid_0's multi_logloss: 0.634619
[25]	valid_0's multi_logloss: 0.62461
[26]	valid_0's multi_logloss: 0.614831
[27]	valid_0's multi_logloss: 0.605296
[28]	valid_0's multi_logloss: 0.596026
[29]	valid_0's multi_logloss: 0.586991
[30]	valid_0's multi_logloss: 0.578184
[31]	valid_0's multi_logloss: 0.569603
[32]	valid_0's multi_logloss: 0.561205
[33]	valid_0's multi_logloss: 0.553002
[34]	valid_0's multi_logloss: 0.545021
[35]	valid_0's multi_logloss: 0.537208
[36]	valid_0's multi_logloss: 0.529567
[37]	valid_0's multi_logloss: 0.522117
[38]	valid_0's multi_logloss: 0.514839
[39]	valid_0's multi_loglos

[231]	valid_0's multi_logloss: 0.166933
[232]	valid_0's multi_logloss: 0.166722
[233]	valid_0's multi_logloss: 0.166513
[234]	valid_0's multi_logloss: 0.166304
[235]	valid_0's multi_logloss: 0.166103
[236]	valid_0's multi_logloss: 0.165905
[237]	valid_0's multi_logloss: 0.165713
[238]	valid_0's multi_logloss: 0.165522
[239]	valid_0's multi_logloss: 0.165334
[240]	valid_0's multi_logloss: 0.165146
[241]	valid_0's multi_logloss: 0.164962
[242]	valid_0's multi_logloss: 0.164779
[243]	valid_0's multi_logloss: 0.164599
[244]	valid_0's multi_logloss: 0.164421
[245]	valid_0's multi_logloss: 0.164247
[246]	valid_0's multi_logloss: 0.164074
[247]	valid_0's multi_logloss: 0.163908
[248]	valid_0's multi_logloss: 0.163743
[249]	valid_0's multi_logloss: 0.163579
[250]	valid_0's multi_logloss: 0.163418
[251]	valid_0's multi_logloss: 0.163259
[252]	valid_0's multi_logloss: 0.163108
[253]	valid_0's multi_logloss: 0.162956
[254]	valid_0's multi_logloss: 0.162806
[255]	valid_0's multi_logloss: 0.162654


[438]	valid_0's multi_logloss: 0.152684
[439]	valid_0's multi_logloss: 0.152671
[440]	valid_0's multi_logloss: 0.152654
[441]	valid_0's multi_logloss: 0.152642
[442]	valid_0's multi_logloss: 0.152626
[443]	valid_0's multi_logloss: 0.15261
[444]	valid_0's multi_logloss: 0.152596
[445]	valid_0's multi_logloss: 0.152583
[446]	valid_0's multi_logloss: 0.152573
[447]	valid_0's multi_logloss: 0.152561
[448]	valid_0's multi_logloss: 0.152548
[449]	valid_0's multi_logloss: 0.152536
[450]	valid_0's multi_logloss: 0.152523
[451]	valid_0's multi_logloss: 0.15251
[452]	valid_0's multi_logloss: 0.152497
[453]	valid_0's multi_logloss: 0.152487
[454]	valid_0's multi_logloss: 0.152479
[455]	valid_0's multi_logloss: 0.152466
[456]	valid_0's multi_logloss: 0.152454
[457]	valid_0's multi_logloss: 0.152444
[458]	valid_0's multi_logloss: 0.152431
[459]	valid_0's multi_logloss: 0.152421
[460]	valid_0's multi_logloss: 0.152412
[461]	valid_0's multi_logloss: 0.1524
[462]	valid_0's multi_logloss: 0.152387
[463

[648]	valid_0's multi_logloss: 0.15159
[649]	valid_0's multi_logloss: 0.15159
[650]	valid_0's multi_logloss: 0.151587
[651]	valid_0's multi_logloss: 0.151585
[652]	valid_0's multi_logloss: 0.151581
[653]	valid_0's multi_logloss: 0.151581
[654]	valid_0's multi_logloss: 0.151576
[655]	valid_0's multi_logloss: 0.151574
[656]	valid_0's multi_logloss: 0.151574
[657]	valid_0's multi_logloss: 0.151576
[658]	valid_0's multi_logloss: 0.151571
[659]	valid_0's multi_logloss: 0.15157
[660]	valid_0's multi_logloss: 0.151571
[661]	valid_0's multi_logloss: 0.151567
[662]	valid_0's multi_logloss: 0.151567
[663]	valid_0's multi_logloss: 0.151565
[664]	valid_0's multi_logloss: 0.151561
[665]	valid_0's multi_logloss: 0.15156
[666]	valid_0's multi_logloss: 0.151559
[667]	valid_0's multi_logloss: 0.151559
[668]	valid_0's multi_logloss: 0.151558
[669]	valid_0's multi_logloss: 0.151555
[670]	valid_0's multi_logloss: 0.151552
[671]	valid_0's multi_logloss: 0.151552
[672]	valid_0's multi_logloss: 0.151551
[673

[161]	valid_0's multi_logloss: 0.193659
[162]	valid_0's multi_logloss: 0.193003
[163]	valid_0's multi_logloss: 0.192353
[164]	valid_0's multi_logloss: 0.191719
[165]	valid_0's multi_logloss: 0.191095
[166]	valid_0's multi_logloss: 0.190481
[167]	valid_0's multi_logloss: 0.189877
[168]	valid_0's multi_logloss: 0.189288
[169]	valid_0's multi_logloss: 0.188702
[170]	valid_0's multi_logloss: 0.188127
[171]	valid_0's multi_logloss: 0.187558
[172]	valid_0's multi_logloss: 0.187001
[173]	valid_0's multi_logloss: 0.186449
[174]	valid_0's multi_logloss: 0.185911
[175]	valid_0's multi_logloss: 0.185376
[176]	valid_0's multi_logloss: 0.184853
[177]	valid_0's multi_logloss: 0.18434
[178]	valid_0's multi_logloss: 0.183841
[179]	valid_0's multi_logloss: 0.183346
[180]	valid_0's multi_logloss: 0.182867
[181]	valid_0's multi_logloss: 0.182392
[182]	valid_0's multi_logloss: 0.181921
[183]	valid_0's multi_logloss: 0.181456
[184]	valid_0's multi_logloss: 0.180996
[185]	valid_0's multi_logloss: 0.180552
[

[367]	valid_0's multi_logloss: 0.153585
[368]	valid_0's multi_logloss: 0.15355
[369]	valid_0's multi_logloss: 0.153516
[370]	valid_0's multi_logloss: 0.153484
[371]	valid_0's multi_logloss: 0.15345
[372]	valid_0's multi_logloss: 0.153419
[373]	valid_0's multi_logloss: 0.153388
[374]	valid_0's multi_logloss: 0.153357
[375]	valid_0's multi_logloss: 0.153325
[376]	valid_0's multi_logloss: 0.153294
[377]	valid_0's multi_logloss: 0.153264
[378]	valid_0's multi_logloss: 0.153234
[379]	valid_0's multi_logloss: 0.153204
[380]	valid_0's multi_logloss: 0.153174
[381]	valid_0's multi_logloss: 0.153146
[382]	valid_0's multi_logloss: 0.153116
[383]	valid_0's multi_logloss: 0.153091
[384]	valid_0's multi_logloss: 0.153062
[385]	valid_0's multi_logloss: 0.153035
[386]	valid_0's multi_logloss: 0.153009
[387]	valid_0's multi_logloss: 0.152982
[388]	valid_0's multi_logloss: 0.152956
[389]	valid_0's multi_logloss: 0.152932
[390]	valid_0's multi_logloss: 0.152903
[391]	valid_0's multi_logloss: 0.152873
[3

[575]	valid_0's multi_logloss: 0.150892
[576]	valid_0's multi_logloss: 0.150888
[577]	valid_0's multi_logloss: 0.150881
[578]	valid_0's multi_logloss: 0.150874
[579]	valid_0's multi_logloss: 0.150872
[580]	valid_0's multi_logloss: 0.15087
[581]	valid_0's multi_logloss: 0.15087
[582]	valid_0's multi_logloss: 0.150868
[583]	valid_0's multi_logloss: 0.150862
[584]	valid_0's multi_logloss: 0.150861
[585]	valid_0's multi_logloss: 0.150858
[586]	valid_0's multi_logloss: 0.150854
[587]	valid_0's multi_logloss: 0.150851
[588]	valid_0's multi_logloss: 0.150849
[589]	valid_0's multi_logloss: 0.150845
[590]	valid_0's multi_logloss: 0.150843
[591]	valid_0's multi_logloss: 0.150839
[592]	valid_0's multi_logloss: 0.150835
[593]	valid_0's multi_logloss: 0.150831
[594]	valid_0's multi_logloss: 0.15083
[595]	valid_0's multi_logloss: 0.150829
[596]	valid_0's multi_logloss: 0.150827
[597]	valid_0's multi_logloss: 0.150827
[598]	valid_0's multi_logloss: 0.150822
[599]	valid_0's multi_logloss: 0.150821
[60

In [58]:
print(f'{accuracy_score(y, np.argmax(p_val, axis=1)) * 100:.4f}%')

93.6422%


### 14-5) 제출 파일 생성

In [59]:
sub = pd.read_csv(sample_file, index_col=0)
sub[target_col] = np.argmax(p_tst, axis=1)
sub.to_csv(sub_file)