## dataload

In [72]:
import mlflow
import mlflow.lightgbm
from datetime import datetime
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
import pandas as pd
import os
import random
import warnings
warnings.filterwarnings('ignore')
import lightgbm as lgb
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
import numpy as np
from utils import custom_train_test_split, lgbm_predict, post_slack, title2filename

SEED=13
## 1. 데이터 로딩
data_dir = '/opt/ml/input/data' # 경로
after_fe_path = os.path.join(data_dir, 'after_fe_train_test_bigcategory_fe.pkl')
df = pd.read_pickle(after_fe_path)

train_df = df[df.kind=='train']
train, valid = custom_train_test_split(train_df, ratio=0.7, seed=SEED) # 훈련데이터 split
test = df[df.kind=='test'] # 테스트 데이터
train2 = test[test.answerCode!=-1] # 테스트데이터 마지막 제출 2번쨰꺼까지 훈련데이터로 사용
train = pd.concat([train,train2]) # 훈련데이터 병합
train.shape, valid.shape, test.shape

x_train = train.drop('answerCode',axis=1)
y_train = train[['answerCode']]

x_valid = valid.drop('answerCode',axis=1)
y_valid = valid[['answerCode']]
x_train.shape, y_train.shape, x_valid.shape, y_valid.shape

((1845539, 40), (1974, 40), (260114, 40))

((1845539, 39), (1845539, 1), (1974, 39), (1974, 1))

In [2]:
df[:3]

Unnamed: 0,userID,assessmentItemID,testId,answerCode,Timestamp,KnowledgeTag,kind,uidIdx,assIdx,testIdx,...,Timestamp2,solvetime,solvesec,solvesec_3600,time_category,solvesec_cumsum,solvecumsum_category,big_category_acc,big_category_std,big_category_cumconut
0,0,A060001001,A060000001,1,2020-03-24 00:17:11,7224,train,0,5354,975,...,2020-03-24 00:17:11,0 days 00:00:00,0.0,0.0,0,0.0,0,0.711898,0.453371,0
1,0,A060001002,A060000001,1,2020-03-24 00:17:14,7225,train,0,5355,975,...,2020-03-24 00:17:14,0 days 00:00:03,3.0,3.0,1,3.0,1,0.711898,0.453371,1
2,0,A060001003,A060000001,1,2020-03-24 00:17:22,7225,train,0,5356,975,...,2020-03-24 00:17:22,0 days 00:00:08,8.0,8.0,3,11.0,4,0.711898,0.453371,2


## Hyper Parameter 설정

In [79]:
# Hyper parameter 설정
params = {
#     "max_depth": 8,  # 8,
#     "min_data_in_leaf": 1000,
    # "feature_fraction": 0.6,  # 0.8,
#     "bagging_fraction": 0.75,
    # "max_cat_group": 64,
    "objective": "binary",
#     "boosting": "gbdt",  # dart
#     "learning_rate": 0.01,  # 0.01,
    # "bagging_freq": 5,
    "seed": 42,
    # "max_bin": 50,
#     "num_leaves": 80,  # 40,
#     "metric": "auc",
}

##  big_category 정답률, std, cumcount 추가

In [4]:
x_train.columns

Index(['userID', 'assessmentItemID', 'testId', 'Timestamp', 'KnowledgeTag',
       'kind', 'uidIdx', 'assIdx', 'testIdx', 'user_correct_answer',
       'user_total_answer', 'user_acc', 'month', 'day', 'hour', 'dayname',
       'big_category', 'problem_num', 'mid_category', 'test_mean', 'test_std',
       'test_sum', 'tag_mean', 'tag_std', 'tag_sum', 'Timestamp2', 'solvetime',
       'solvesec', 'solvesec_3600', 'time_category', 'solvesec_cumsum',
       'solvecumsum_category', 'big_category_acc', 'big_category_std',
       'big_category_cumconut'],
      dtype='object')

In [5]:
### 피처 설정
# 사용할 Feature 설정
FEATS = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
         'big_category_acc',
         'big_category_std',
         'big_category_cumconut'
        ]

cat_feats = ['uidIdx','assIdx','testIdx','KnowledgeTag','big_category','mid_category',
             'problem_num','dayname','month','time_category','solvecumsum_category']
cat_feats_idx = [i for i,e in enumerate(FEATS) if e in cat_feats]

### 학습 및 예측

date = datetime.now().strftime('%m/%d %a')
title=f"🌈({date})[LGBM big_category 정답률, std, cumcount 추가] 피처: {len(FEATS)}개"
using_feats=", ".join(FEATS)
desc=f"사용된 피처({len(FEATS)})\n{using_feats}"
cat_feats_idx, title, desc

mlflow.lightgbm.autolog()
lgb_x_train = lgb.Dataset(x_train[FEATS], y_train)
lgb_x_valid = lgb.Dataset(x_valid[FEATS], y_valid)

with mlflow.start_run(run_name=title, description=desc) as run:
    model1 = lgb.train(
        params, 
        lgb_x_train,
        valid_sets=[lgb_x_valid],
        verbose_eval=100,
        num_boost_round=3200,
        categorical_feature=cat_feats_idx
    #     early_stopping_rounds=200,
    )

    preds = model1.predict(x_valid[FEATS])
    acc = accuracy_score(y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(y_valid, preds)
    print(f'VALID AUC : {auc} ACC : {acc}\n')
    mlflow.log_metric("VAL AUC",auc)
    mlflow.log_metric("VAL Set SEED",SEED)
    model1_run_id=run.info.run_id
    post_slack("done")
    print(f"{model1_run_id=}")
    file_name = title2filename(title)


([0, 1, 2, 3, 6, 7, 8, 9, 11, 21, 23],
 '🌈(11/30 Wed)[LGBM big_category 정답률, std, cumcount 추가] 피처: 27개',
 '사용된 피처(27)\nuidIdx, assIdx, testIdx, KnowledgeTag, user_correct_answer, user_total_answer, big_category, mid_category, problem_num, month, day, dayname, hour, user_acc, test_mean, test_sum, test_std, tag_std, tag_mean, tag_sum, solvesec_3600, time_category, solvesec_cumsum, solvecumsum_category, big_category_acc, big_category_std, big_category_cumconut')

[LightGBM] [Info] Number of positive: 1208276, number of negative: 637263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19316
[LightGBM] [Info] Number of data points in the train set: 1845539, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654701 -> initscore=0.639767
[LightGBM] [Info] Start training from score 0.639767
[100]	valid_0's binary_logloss: 0.560243
[200]	valid_0's binary_logloss: 0.551904
[300]	valid_0's binary_logloss: 0.549396
[400]	valid_0's binary_logloss: 0.547937
[500]	valid_0's binary_logloss: 0.546311
[600]	valid_0's binary_logloss: 0.544856
[700]	valid_0's binary_logloss: 0.544792
[800]	valid_0's binary_logloss: 0.544924
[900]	valid_0's binary_logloss: 0.545558
[1000]	valid_0's binary_logloss: 0.545992
[1100]	valid_0's binary_logloss: 0.546207
[1200]	valid_0's binary_logloss: 0.546522
[1300]	valid_0's binary_logloss: 0.546992
[1400

NameError: name 'model4' is not defined

In [6]:
lgbm_predict(test, model1, FEATS, f'{file_name}.csv')

writing prediction : output/11_30_Wed_LGBM_big_category_정답률_std_cumcount_추가_피처_27개.csv


In [13]:
### 제출 mlflow 등록
# 제출시
LB_AUC=0.8111
run_id = model1_run_id
mlflow.start_run(run_id=run_id)
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))
mlflow.log_metric("LB AUC",LB_AUC)
mlflow.end_run()

<ActiveRun: >

Active run_id: 6a42312b2c7044a8b69dc362a392f4c8


## big_category 정답률, std, cumcount 추가 - earlystop

In [75]:
x_train[['big_category_acc','big_category_std','big_category_cumconut']].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
big_category_acc,1845539.0,0.654365,0.105236,0.45447,0.521167,0.679714,0.711898,0.801241
big_category_std,1845539.0,0.463369,0.031221,0.399685,0.453371,0.467279,0.498214,0.500802
big_category_cumconut,1845539.0,159.33801,159.836556,0.0,41.0,110.0,228.0,1268.0


In [73]:
### 피처 설정
# 사용할 Feature 설정
FEATS = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
         'big_category_acc',
         'big_category_std',
         'big_category_cumconut'
        ]

cat_feats = ['uidIdx','assIdx','testIdx','KnowledgeTag','big_category','mid_category',
             'problem_num','dayname','month','time_category','solvecumsum_category']
cat_feats_idx = [i for i,e in enumerate(FEATS) if e in cat_feats]

### 학습 및 예측

date = datetime.now().strftime('%m/%d %a')
title=f"🌈({date})[LGBM big_category 정답률, std, cumcount 추가 earlystop] 피처: {len(FEATS)}개"
using_feats=", ".join(FEATS)
desc=f"사용된 피처({len(FEATS)})\n{using_feats}"
cat_feats_idx, title, desc

mlflow.lightgbm.autolog()
lgb_x_train = lgb.Dataset(x_train[FEATS], y_train)
lgb_x_valid = lgb.Dataset(x_valid[FEATS], y_valid)

with mlflow.start_run(run_name=title, description=desc) as run:
    model2re = lgb.train(
        params, 
        lgb_x_train,
        valid_sets=[lgb_x_valid],
        verbose_eval=100,
        num_boost_round=3200,
        categorical_feature=cat_feats_idx,
        early_stopping_rounds=200,
    )

    preds = model2re.predict(x_valid[FEATS])
    acc = accuracy_score(y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(y_valid, preds)
    print(f'VALID AUC : {auc} ACC : {acc}\n')
    mlflow.log_metric("VAL AUC",auc)
    mlflow.log_metric("VAL Set SEED",SEED)
    model2re_run_id=run.info.run_id
    post_slack("done")
    print(f"{model2re_run_id=}")
    file_name = title2filename(title)
    lgbm_predict(test, model2re, FEATS, f'{file_name}.csv')

([0, 1, 2, 3, 6, 7, 8, 9, 11, 21, 23],
 '🌈(12/02 Fri)[LGBM big_category 정답률, std, cumcount 추가 earlystop] 피처: 27개',
 '사용된 피처(27)\nuidIdx, assIdx, testIdx, KnowledgeTag, user_correct_answer, user_total_answer, big_category, mid_category, problem_num, month, day, dayname, hour, user_acc, test_mean, test_sum, test_std, tag_std, tag_mean, tag_sum, solvesec_3600, time_category, solvesec_cumsum, solvecumsum_category, big_category_acc, big_category_std, big_category_cumconut')

[LightGBM] [Info] Number of positive: 1208276, number of negative: 637263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19316
[LightGBM] [Info] Number of data points in the train set: 1845539, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654701 -> initscore=0.639767
[LightGBM] [Info] Start training from score 0.639767
Training until validation scores don't improve for 200 rounds
[100]	valid_0's binary_logloss: 0.560243
[200]	valid_0's binary_logloss: 0.551904
[300]	valid_0's binary_logloss: 0.549396
[400]	valid_0's binary_logloss: 0.547937
[500]	valid_0's binary_logloss: 0.546311
[600]	valid_0's binary_logloss: 0.544856
[700]	valid_0's binary_logloss: 0.544792
[800]	valid_0's binary_logloss: 0.544924
Early stopping, best iteration is:
[672]	valid_0's binary_logloss: 0.544468
VALID AUC : 0.7970238976941687 ACC : 0.7304964539007093

model2re_run_id='46

In [8]:
### 피처 설정
# 사용할 Feature 설정
FEATS = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
         'big_category_acc',
         'big_category_std',
         'big_category_cumconut'
        ]

cat_feats = ['uidIdx','assIdx','testIdx','KnowledgeTag','big_category','mid_category',
             'problem_num','dayname','month','time_category','solvecumsum_category']
cat_feats_idx = [i for i,e in enumerate(FEATS) if e in cat_feats]

### 학습 및 예측

date = datetime.now().strftime('%m/%d %a')
title=f"🌈({date})[LGBM big_category 정답률, std, cumcount 추가 earlystop] 피처: {len(FEATS)}개"
using_feats=", ".join(FEATS)
desc=f"사용된 피처({len(FEATS)})\n{using_feats}"
cat_feats_idx, title, desc

mlflow.lightgbm.autolog()
lgb_x_train = lgb.Dataset(x_train[FEATS], y_train)
lgb_x_valid = lgb.Dataset(x_valid[FEATS], y_valid)

with mlflow.start_run(run_name=title, description=desc) as run:
    model2 = lgb.train(
        params, 
        lgb_x_train,
        valid_sets=[lgb_x_valid],
        verbose_eval=100,
        num_boost_round=3200,
        categorical_feature=cat_feats_idx,
        early_stopping_rounds=200,
    )

    preds = model2.predict(x_valid[FEATS])
    acc = accuracy_score(y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(y_valid, preds)
    print(f'VALID AUC : {auc} ACC : {acc}\n')
    mlflow.log_metric("VAL AUC",auc)
    mlflow.log_metric("VAL Set SEED",SEED)
    model2_run_id=run.info.run_id
    post_slack("done")
    print(f"{model2_run_id=}")
    file_name = title2filename(title)
    lgbm_predict(test, model2, FEATS, f'{file_name}.csv')

([0, 1, 2, 3, 6, 7, 8, 9, 11, 21, 23],
 '🌈(11/30 Wed)[LGBM big_category 정답률, std, cumcount 추가 earlystop] 피처: 27개',
 '사용된 피처(27)\nuidIdx, assIdx, testIdx, KnowledgeTag, user_correct_answer, user_total_answer, big_category, mid_category, problem_num, month, day, dayname, hour, user_acc, test_mean, test_sum, test_std, tag_std, tag_mean, tag_sum, solvesec_3600, time_category, solvesec_cumsum, solvecumsum_category, big_category_acc, big_category_std, big_category_cumconut')

[LightGBM] [Info] Number of positive: 1208276, number of negative: 637263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19316
[LightGBM] [Info] Number of data points in the train set: 1845539, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654701 -> initscore=0.639767
[LightGBM] [Info] Start training from score 0.639767
Training until validation scores don't improve for 200 rounds
[100]	valid_0's binary_logloss: 0.560243
[200]	valid_0's binary_logloss: 0.551904
[300]	valid_0's binary_logloss: 0.549396
[400]	valid_0's binary_logloss: 0.547937
[500]	valid_0's binary_logloss: 0.546311
[600]	valid_0's binary_logloss: 0.544856
[700]	valid_0's binary_logloss: 0.544792
[800]	valid_0's binary_logloss: 0.544924
Early stopping, best iteration is:
[672]	valid_0's binary_logloss: 0.544468
VALID AUC : 0.7970238976941687 ACC : 0.7304964539007093

model2_run_id='88aa

In [12]:
### 제출 mlflow 등록
# 제출시
LB_AUC=0.8155
run_id = model2_run_id
mlflow.start_run(run_id=run_id)
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))
mlflow.log_metric("LB AUC",LB_AUC)
mlflow.end_run()

<ActiveRun: >

Active run_id: 88aac4c9c9cb42c089856d46ef8ba746


## 기존 month 추가한것 까지만 earlystop

In [71]:
test.shape

(260114, 40)

In [10]:
### 피처 설정
# 사용할 Feature 설정
FEATS = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
        ]

cat_feats = ['uidIdx','assIdx','testIdx','KnowledgeTag','big_category','mid_category',
             'problem_num','dayname','month','time_category','solvecumsum_category']
cat_feats_idx = [i for i,e in enumerate(FEATS) if e in cat_feats]

### 학습 및 예측

date = datetime.now().strftime('%m/%d %a')
title=f"🌈({date})[LGBM month추가한것 까지만 earlystop] 피처: {len(FEATS)}개"
using_feats=", ".join(FEATS)
desc=f"사용된 피처({len(FEATS)})\n{using_feats}"
cat_feats_idx, title, desc

mlflow.lightgbm.autolog()
lgb_x_train = lgb.Dataset(x_train[FEATS], y_train)
lgb_x_valid = lgb.Dataset(x_valid[FEATS], y_valid)

with mlflow.start_run(run_name=title, description=desc) as run:
    model5 = lgb.train(
        params, 
        lgb_x_train,
        valid_sets=[lgb_x_valid],
        verbose_eval=100,
        num_boost_round=3200,
        categorical_feature=cat_feats_idx,
        early_stopping_rounds=200,
    )

    preds = model5.predict(x_valid[FEATS])
    acc = accuracy_score(y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(y_valid, preds)
    print(f'VALID AUC : {auc} ACC : {acc}\n')
    mlflow.log_metric("VAL AUC",auc)
    mlflow.log_metric("VAL Set SEED",SEED)
    model5_run_id=run.info.run_id
    post_slack("done")
    print(f"{model5_run_id=}")
    file_name = title2filename(title)
    lgbm_predict(test, model5, FEATS, f'{file_name}.csv')

([0, 1, 2, 3, 6, 7, 8, 9, 11, 21, 23],
 '🌈(12/01 Thu)[LGBM month추가한것 까지만 earlystop] 피처: 24개',
 '사용된 피처(24)\nuidIdx, assIdx, testIdx, KnowledgeTag, user_correct_answer, user_total_answer, big_category, mid_category, problem_num, month, day, dayname, hour, user_acc, test_mean, test_sum, test_std, tag_std, tag_mean, tag_sum, solvesec_3600, time_category, solvesec_cumsum, solvecumsum_category')

[LightGBM] [Info] Number of positive: 1208276, number of negative: 637263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19041
[LightGBM] [Info] Number of data points in the train set: 1845539, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654701 -> initscore=0.639767
[LightGBM] [Info] Start training from score 0.639767
Training until validation scores don't improve for 200 rounds
[100]	valid_0's binary_logloss: 0.561618
[200]	valid_0's binary_logloss: 0.554541
[300]	valid_0's binary_logloss: 0.552477
[400]	valid_0's binary_logloss: 0.550644
[500]	valid_0's binary_logloss: 0.549608
[600]	valid_0's binary_logloss: 0.549414
[700]	valid_0's binary_logloss: 0.549202
Early stopping, best iteration is:
[543]	valid_0's binary_logloss: 0.548983
VALID AUC : 0.7934421238005892 ACC : 0.7239108409321175

model5_run_id='d63b95d1bcd243e9ace084652f35c287'
writing pre

In [10]:
### 피처 설정
# 사용할 Feature 설정
FEATS = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
        ]

cat_feats = ['uidIdx','assIdx','testIdx','KnowledgeTag','big_category','mid_category',
             'problem_num','dayname','month','time_category','solvecumsum_category']
cat_feats_idx = [i for i,e in enumerate(FEATS) if e in cat_feats]

### 학습 및 예측

date = datetime.now().strftime('%m/%d %a')
title=f"🌈({date})[LGBM month추가한것 까지만 earlystop] 피처: {len(FEATS)}개"
using_feats=", ".join(FEATS)
desc=f"사용된 피처({len(FEATS)})\n{using_feats}"
cat_feats_idx, title, desc

mlflow.lightgbm.autolog()
lgb_x_train = lgb.Dataset(x_train[FEATS], y_train)
lgb_x_valid = lgb.Dataset(x_valid[FEATS], y_valid)

with mlflow.start_run(run_name=title, description=desc) as run:
    model5 = lgb.train(
        params, 
        lgb_x_train,
        valid_sets=[lgb_x_valid],
        verbose_eval=100,
        num_boost_round=3200,
        categorical_feature=cat_feats_idx,
        early_stopping_rounds=200,
    )

    preds = model5.predict(x_valid[FEATS])
    acc = accuracy_score(y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(y_valid, preds)
    print(f'VALID AUC : {auc} ACC : {acc}\n')
    mlflow.log_metric("VAL AUC",auc)
    mlflow.log_metric("VAL Set SEED",SEED)
    model5_run_id=run.info.run_id
    post_slack("done")
    print(f"{model5_run_id=}")
    file_name = title2filename(title)
    lgbm_predict(test, model5, FEATS, f'{file_name}.csv')

([0, 1, 2, 3, 6, 7, 8, 9, 11, 21, 23],
 '🌈(12/01 Thu)[LGBM month추가한것 까지만 earlystop] 피처: 24개',
 '사용된 피처(24)\nuidIdx, assIdx, testIdx, KnowledgeTag, user_correct_answer, user_total_answer, big_category, mid_category, problem_num, month, day, dayname, hour, user_acc, test_mean, test_sum, test_std, tag_std, tag_mean, tag_sum, solvesec_3600, time_category, solvesec_cumsum, solvecumsum_category')

[LightGBM] [Info] Number of positive: 1208276, number of negative: 637263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19041
[LightGBM] [Info] Number of data points in the train set: 1845539, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654701 -> initscore=0.639767
[LightGBM] [Info] Start training from score 0.639767
Training until validation scores don't improve for 200 rounds
[100]	valid_0's binary_logloss: 0.561618
[200]	valid_0's binary_logloss: 0.554541
[300]	valid_0's binary_logloss: 0.552477
[400]	valid_0's binary_logloss: 0.550644
[500]	valid_0's binary_logloss: 0.549608
[600]	valid_0's binary_logloss: 0.549414
[700]	valid_0's binary_logloss: 0.549202
Early stopping, best iteration is:
[543]	valid_0's binary_logloss: 0.548983
VALID AUC : 0.7934421238005892 ACC : 0.7239108409321175

model5_run_id='d63b95d1bcd243e9ace084652f35c287'
writing pre

In [11]:
### 제출 mlflow 등록
# 제출시
LB_AUC=0.8112
run_id = model5_run_id
mlflow.start_run(run_id=run_id)
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))
mlflow.log_metric("LB AUC",LB_AUC)
mlflow.end_run()

<ActiveRun: >

Active run_id: d63b95d1bcd243e9ace084652f35c287


In [18]:
df.columns

Index(['userID', 'assessmentItemID', 'testId', 'answerCode', 'Timestamp',
       'KnowledgeTag', 'kind', 'uidIdx', 'assIdx', 'testIdx',
       'user_correct_answer', 'user_total_answer', 'user_acc', 'month', 'day',
       'hour', 'dayname', 'big_category', 'problem_num', 'mid_category',
       'test_mean', 'test_std', 'test_sum', 'tag_mean', 'tag_std', 'tag_sum',
       'Timestamp2', 'solvetime', 'solvesec', 'solvesec_3600', 'time_category',
       'solvesec_cumsum', 'solvecumsum_category', 'big_category_acc',
       'big_category_std', 'big_category_cumconut', 'big_category_user_acc',
       'big_category_user_std', 'big_category_answer',
       'big_category_answer_log1p'],
      dtype='object')

## big category 유저별 정답률, std, cumcount

In [22]:
### 피처 설정
# 사용할 Feature 설정
FEATS = [
         'uidIdx','assIdx','testIdx','KnowledgeTag',
         'user_correct_answer','user_total_answer','user_acc',
         'month','day','dayname','hour',
         'test_mean','test_sum','test_std',
         'tag_std','tag_mean','tag_sum',
         'solvesec_3600','time_category',
         'solvesec_cumsum','solvecumsum_category',
         'big_category',
         'big_category_user_acc','big_category_user_std','big_category_cumconut'
         'mid_category',
         'problem_num',
        ]

cat_feats = ['uidIdx','assIdx','testIdx','KnowledgeTag','big_category','mid_category',
             'problem_num','dayname','month','time_category','solvecumsum_category']
cat_feats_idx = [i for i,e in enumerate(FEATS) if e in cat_feats]

### 학습 및 예측

date = datetime.now().strftime('%m/%d %a')
title=f"🌈({date})[LGBM big category 유저별 정답률, std, cumcount] 피처: {len(FEATS)}개"
using_feats=", ".join(FEATS)
desc=f"사용된 피처({len(FEATS)})\n{using_feats}"
cat_feats_idx, title, desc

mlflow.lightgbm.autolog()
lgb_x_train = lgb.Dataset(x_train[FEATS], y_train)
lgb_x_valid = lgb.Dataset(x_valid[FEATS], y_valid)

with mlflow.start_run(run_name=title, description=desc) as run:
    model6 = lgb.train(
        params, 
        lgb_x_train,
        valid_sets=[lgb_x_valid],
        verbose_eval=100,
        num_boost_round=3200,
        categorical_feature=cat_feats_idx,
        early_stopping_rounds=200,
    )

    preds = model6.predict(x_valid[FEATS])
    acc = accuracy_score(y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(y_valid, preds)
    print(f'VALID AUC : {auc} ACC : {acc}\n')
    mlflow.log_metric("VAL AUC",auc)
    mlflow.log_metric("VAL Set SEED",SEED)
    model6_run_id=run.info.run_id
    post_slack("done")
    print(f"{model6_run_id=}")
    file_name = title2filename(title)
    lgbm_predict(test, model6, FEATS, f'{file_name}.csv')

([0, 1, 2, 3, 6, 7, 8, 9, 11, 21, 23],
 '🌈(12/01 Thu)[LGBM big category 유저별 정답률, std, cumcount] 피처: 27개',
 '사용된 피처(27)\nuidIdx, assIdx, testIdx, KnowledgeTag, user_correct_answer, user_total_answer, big_category, mid_category, problem_num, month, day, dayname, hour, user_acc, test_mean, test_sum, test_std, tag_std, tag_mean, tag_sum, solvesec_3600, time_category, solvesec_cumsum, solvecumsum_category, big_category_user_acc, big_category_user_std, big_category_cumconut')

[LightGBM] [Info] Number of positive: 1208276, number of negative: 637263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19806
[LightGBM] [Info] Number of data points in the train set: 1845539, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654701 -> initscore=0.639767
[LightGBM] [Info] Start training from score 0.639767
Training until validation scores don't improve for 200 rounds
[100]	valid_0's binary_logloss: 0.513417
[200]	valid_0's binary_logloss: 0.502819
[300]	valid_0's binary_logloss: 0.500721
[400]	valid_0's binary_logloss: 0.500024
[500]	valid_0's binary_logloss: 0.499942
[600]	valid_0's binary_logloss: 0.500928
Early stopping, best iteration is:
[425]	valid_0's binary_logloss: 0.499722
VALID AUC : 0.8335442074425188 ACC : 0.7563323201621074

model6_run_id='3af96264ab7544119d567b2ad3ec4921'
writing prediction : output/12_01_Thu_LGBM_big_categ

In [25]:
### 제출 mlflow 등록
# 제출시
LB_AUC=0.8024
run_id = model6_run_id
mlflow.start_run(run_id=run_id)
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))
mlflow.log_metric("LB AUC",LB_AUC)
mlflow.end_run()

<ActiveRun: >

Active run_id: 3af96264ab7544119d567b2ad3ec4921


## big_category_answer_log1p 추가

In [23]:
### 피처 설정
# 사용할 Feature 설정
FEATS = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
         'big_category_user_acc',
         'big_category_user_std',
         'big_category_cumconut',
         'big_category_answer_log1p'
        ]

cat_feats = ['uidIdx','assIdx','testIdx','KnowledgeTag','big_category','mid_category',
             'problem_num','dayname','month','time_category','solvecumsum_category']
cat_feats_idx = [i for i,e in enumerate(FEATS) if e in cat_feats]

### 학습 및 예측

date = datetime.now().strftime('%m/%d %a')
title=f"🌈({date})[LGBM big_category_answer_log1p 추가] 피처: {len(FEATS)}개"
using_feats=", ".join(FEATS)
desc=f"사용된 피처({len(FEATS)})\n{using_feats}"
cat_feats_idx, title, desc

mlflow.lightgbm.autolog()
lgb_x_train = lgb.Dataset(x_train[FEATS], y_train)
lgb_x_valid = lgb.Dataset(x_valid[FEATS], y_valid)

with mlflow.start_run(run_name=title, description=desc) as run:
    model7 = lgb.train(
        params, 
        lgb_x_train,
        valid_sets=[lgb_x_valid],
        verbose_eval=100,
        num_boost_round=3200,
        categorical_feature=cat_feats_idx,
        early_stopping_rounds=200,
    )

    preds = model7.predict(x_valid[FEATS])
    acc = accuracy_score(y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(y_valid, preds)
    print(f'VALID AUC : {auc} ACC : {acc}\n')
    mlflow.log_metric("VAL AUC",auc)
    mlflow.log_metric("VAL Set SEED",SEED)
    model7_run_id=run.info.run_id
    post_slack("done")
    print(f"{model7_run_id=}")
    file_name = title2filename(title)
    lgbm_predict(test, model7, FEATS, f'{file_name}.csv')

([0, 1, 2, 3, 6, 7, 8, 9, 11, 21, 23],
 '🌈(12/01 Thu)[LGBM big_category_answer_log1p 추가] 피처: 28개',
 '사용된 피처(28)\nuidIdx, assIdx, testIdx, KnowledgeTag, user_correct_answer, user_total_answer, big_category, mid_category, problem_num, month, day, dayname, hour, user_acc, test_mean, test_sum, test_std, tag_std, tag_mean, tag_sum, solvesec_3600, time_category, solvesec_cumsum, solvecumsum_category, big_category_user_acc, big_category_user_std, big_category_cumconut, big_category_answer_log1p')

[LightGBM] [Info] Number of positive: 1208276, number of negative: 637263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 20061
[LightGBM] [Info] Number of data points in the train set: 1845539, number of used features: 28
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654701 -> initscore=0.639767
[LightGBM] [Info] Start training from score 0.639767
Training until validation scores don't improve for 200 rounds
[100]	valid_0's binary_logloss: 0.512626
[200]	valid_0's binary_logloss: 0.503799
[300]	valid_0's binary_logloss: 0.500915
[400]	valid_0's binary_logloss: 0.500644
[500]	valid_0's binary_logloss: 0.500709
[600]	valid_0's binary_logloss: 0.500692
[700]	valid_0's binary_logloss: 0.501092
Early stopping, best iteration is:
[545]	valid_0's binary_logloss: 0.500358
VALID AUC : 0.8335503793676656 ACC : 0.756838905775076

model7_run_id='9673ba65009845779aed5d57850755e3'
writing pred

In [26]:
### 제출 mlflow 등록
# 제출시
LB_AUC=0.8062
run_id = model7_run_id
mlflow.start_run(run_id=run_id)
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))
mlflow.log_metric("LB AUC",LB_AUC)
mlflow.end_run()

<ActiveRun: >

Active run_id: 9673ba65009845779aed5d57850755e3


### big_category_answer_log1p 추가 3200 epoch

In [28]:
### 피처 설정
# 사용할 Feature 설정
FEATS = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
         'big_category_user_acc',
         'big_category_user_std',
         'big_category_cumconut',
         'big_category_answer_log1p'
        ]

cat_feats = ['uidIdx','assIdx','testIdx','KnowledgeTag','big_category','mid_category',
             'problem_num','dayname','month','time_category','solvecumsum_category']
cat_feats_idx = [i for i,e in enumerate(FEATS) if e in cat_feats]

### 학습 및 예측

date = datetime.now().strftime('%m/%d %a')
title=f"🌈({date})[LGBM big_category_answer_log1p 추가 3200epoch] 피처: {len(FEATS)}개"
using_feats=", ".join(FEATS)
desc=f"사용된 피처({len(FEATS)})\n{using_feats}"
cat_feats_idx, title, desc

mlflow.lightgbm.autolog()
lgb_x_train = lgb.Dataset(x_train[FEATS], y_train)
lgb_x_valid = lgb.Dataset(x_valid[FEATS], y_valid)

with mlflow.start_run(run_name=title, description=desc) as run:
    model71 = lgb.train(
        params, 
        lgb_x_train,
        valid_sets=[lgb_x_valid],
        verbose_eval=100,
        num_boost_round=3200,
        categorical_feature=cat_feats_idx,
#         early_stopping_rounds=200,
    )

    preds = model71.predict(x_valid[FEATS])
    acc = accuracy_score(y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(y_valid, preds)
    print(f'VALID AUC : {auc} ACC : {acc}\n')
    mlflow.log_metric("VAL AUC",auc)
    mlflow.log_metric("VAL Set SEED",SEED)
    model71_run_id=run.info.run_id
    post_slack("done")
    print(f"{model71_run_id=}")
    file_name = title2filename(title)
    lgbm_predict(test, model71, FEATS, f'{file_name}.csv')

([0, 1, 2, 3, 6, 7, 8, 9, 11, 21, 23],
 '🌈(12/01 Thu)[LGBM big_category_answer_log1p 추가 3200epoch] 피처: 28개',
 '사용된 피처(28)\nuidIdx, assIdx, testIdx, KnowledgeTag, user_correct_answer, user_total_answer, big_category, mid_category, problem_num, month, day, dayname, hour, user_acc, test_mean, test_sum, test_std, tag_std, tag_mean, tag_sum, solvesec_3600, time_category, solvesec_cumsum, solvecumsum_category, big_category_user_acc, big_category_user_std, big_category_cumconut, big_category_answer_log1p')

[LightGBM] [Info] Number of positive: 1208276, number of negative: 637263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 20061
[LightGBM] [Info] Number of data points in the train set: 1845539, number of used features: 28
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654701 -> initscore=0.639767
[LightGBM] [Info] Start training from score 0.639767
[100]	valid_0's binary_logloss: 0.512626
[200]	valid_0's binary_logloss: 0.503799
[300]	valid_0's binary_logloss: 0.500915
[400]	valid_0's binary_logloss: 0.500644
[500]	valid_0's binary_logloss: 0.500709
[600]	valid_0's binary_logloss: 0.500692
[700]	valid_0's binary_logloss: 0.501092
[800]	valid_0's binary_logloss: 0.501289
[900]	valid_0's binary_logloss: 0.501496
[1000]	valid_0's binary_logloss: 0.502544
[1100]	valid_0's binary_logloss: 0.503477
[1200]	valid_0's binary_logloss: 0.504137
[1300]	valid_0's binary_logloss: 0.503887
[1400

In [29]:
### 제출 mlflow 등록
# 제출시
LB_AUC=0.7948
run_id = model71_run_id
mlflow.start_run(run_id=run_id)
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))
mlflow.log_metric("LB AUC",LB_AUC)
mlflow.end_run()

<ActiveRun: >

Active run_id: 254fd0dba60b46cf84742c32cc677bb4


## big_category_answer log1p 안한것

In [24]:
### 피처 설정
# 사용할 Feature 설정
FEATS = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
         'big_category_user_acc',
         'big_category_user_std',
         'big_category_cumconut',
         'big_category_answer'
        ]

cat_feats = ['uidIdx','assIdx','testIdx','KnowledgeTag','big_category','mid_category',
             'problem_num','dayname','month','time_category','solvecumsum_category']
cat_feats_idx = [i for i,e in enumerate(FEATS) if e in cat_feats]

### 학습 및 예측

date = datetime.now().strftime('%m/%d %a')
title=f"🌈({date})[LGBM big_category_answer log1p 안한것] 피처: {len(FEATS)}개"
using_feats=", ".join(FEATS)
desc=f"사용된 피처({len(FEATS)})\n{using_feats}"
cat_feats_idx, title, desc

mlflow.lightgbm.autolog()
lgb_x_train = lgb.Dataset(x_train[FEATS], y_train)
lgb_x_valid = lgb.Dataset(x_valid[FEATS], y_valid)

with mlflow.start_run(run_name=title, description=desc) as run:
    model8 = lgb.train(
        params, 
        lgb_x_train,
        valid_sets=[lgb_x_valid],
        verbose_eval=100,
        num_boost_round=3200,
        categorical_feature=cat_feats_idx,
        early_stopping_rounds=200,
    )

    preds = model8.predict(x_valid[FEATS])
    acc = accuracy_score(y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(y_valid, preds)
    print(f'VALID AUC : {auc} ACC : {acc}\n')
    mlflow.log_metric("VAL AUC",auc)
    mlflow.log_metric("VAL Set SEED",SEED)
    model8_run_id=run.info.run_id
    post_slack("done")
    print(f"{model8_run_id=}")
    file_name = title2filename(title)
    lgbm_predict(test, model8, FEATS, f'{file_name}.csv')

([0, 1, 2, 3, 6, 7, 8, 9, 11, 21, 23],
 '🌈(12/01 Thu)[LGBM big_category_answer log1p 안한것] 피처: 28개',
 '사용된 피처(28)\nuidIdx, assIdx, testIdx, KnowledgeTag, user_correct_answer, user_total_answer, big_category, mid_category, problem_num, month, day, dayname, hour, user_acc, test_mean, test_sum, test_std, tag_std, tag_mean, tag_sum, solvesec_3600, time_category, solvesec_cumsum, solvecumsum_category, big_category_user_acc, big_category_user_std, big_category_cumconut, big_category_answer')

[LightGBM] [Info] Number of positive: 1208276, number of negative: 637263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 20061
[LightGBM] [Info] Number of data points in the train set: 1845539, number of used features: 28
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654701 -> initscore=0.639767
[LightGBM] [Info] Start training from score 0.639767
Training until validation scores don't improve for 200 rounds
[100]	valid_0's binary_logloss: 0.512626
[200]	valid_0's binary_logloss: 0.503799
[300]	valid_0's binary_logloss: 0.500915
[400]	valid_0's binary_logloss: 0.500644
[500]	valid_0's binary_logloss: 0.500709
[600]	valid_0's binary_logloss: 0.500692
[700]	valid_0's binary_logloss: 0.501092
Early stopping, best iteration is:
[545]	valid_0's binary_logloss: 0.500358
VALID AUC : 0.8335503793676656 ACC : 0.756838905775076

model8_run_id='3c9e5adba003459694dbdd9ca5384119'
writing pred

In [None]:
### 제출 mlflow 등록
# 제출시
LB_AUC=0.8112
run_id = model8_run_id
mlflow.start_run(run_id=run_id)
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))
mlflow.log_metric("LB AUC",LB_AUC)
mlflow.end_run()

## Validation Set Align

In [31]:
# model1
FEATS1 = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
         'big_category_acc',
         'big_category_std',
         'big_category_cumconut'
        ]

# model2
FEATS2 = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
         'big_category_acc',
         'big_category_std',
         'big_category_cumconut'
        ]

# model5
FEATS5 = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
        ]
# model6
FEATS6 = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
         'big_category_user_acc',
         'big_category_user_std',
         'big_category_cumconut'
        ]

# model7
FEATS7 = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
         'big_category_user_acc',
         'big_category_user_std',
         'big_category_cumconut',
         'big_category_answer_log1p'
        ]

# model71
FEATS71 = ['uidIdx',
         'assIdx',
         'testIdx',
         'KnowledgeTag',
         'user_correct_answer',
         'user_total_answer',
         'big_category',
         'mid_category',
         'problem_num',
         'month','day','dayname','hour',
         'user_acc',
         'test_mean',
         'test_sum',
         'test_std',
         'tag_std',
         'tag_mean',
         'tag_sum',
         'solvesec_3600',
         'time_category',
         'solvesec_cumsum',
         'solvecumsum_category',
         'big_category_user_acc',
         'big_category_user_std',
         'big_category_cumconut',
         'big_category_answer_log1p'
        ]

In [53]:
def model2auc(model, x_valid, y_valid, FEATS):
    preds = model.predict(x_valid[FEATS])
    auc = roc_auc_score(y_valid, preds)
    return auc

In [34]:
from collections import defaultdict

model_dict = defaultdict(list)

for n in [1,2,5,6,7,71]:
    model_dict[f"model{n}"].append(eval(f'model{n}'))
    model_dict[f"model{n}"].append(eval(f'FEATS{n}'))

In [40]:
model_dict.keys()

dict_keys(['model1', 'model2', 'model5', 'model6', 'model7', 'model71'])

## seed별 학습 및 auc 비교

In [None]:
model2 # 0.8155 - big_category 정답률, std, cumcount 추가 early stop
model6 # 0.8024 - big_category 유저별 정답률, std, cumcount 추가 early stop
model71 # 0.7948 - big_category_answer_log1p 추가 early stop

In [69]:
def model_train2auc(run_title, x_valid, y_valid, FEATS, seed, experiment_id):
    lgb_x_train = lgb.Dataset(x_train[FEATS], y_train)
    lgb_x_valid = lgb.Dataset(x_valid[FEATS], y_valid)
    with mlflow.start_run(run_name=run_title, experiment_id=experiment_id) as run:
        model = lgb.train(
            params, 
            lgb_x_train,
            valid_sets=[lgb_x_valid],
            verbose_eval=-1,
            num_boost_round=3200,
            categorical_feature=cat_feats_idx,
            early_stopping_rounds=200,
        )
        preds = model.predict(x_valid[FEATS])
        auc = roc_auc_score(y_valid, preds)
        mlflow.log_metric("VAL AUC",auc)
        mlflow.log_metric("VAL Set SEED",seed)
    return model, auc

In [None]:
from mlflow.tracking import MlflowClient
client = MlflowClient()
experiment_name = "Validation Set Align2"
experiment_id = client.create_experiment(experiment_name)

In [65]:
experiment_id

'312762763748397651'

In [70]:
mlflow.lightgbm.autolog()
for s in range(100):
    print(f"----------------------------- SEED {s} -----------------------------")
    train, valid = custom_train_test_split(train_df, ratio=0.7, seed=s) # 훈련데이터 split
    test = df[df.kind=='test'] # 테스트 데이터
    train2 = test[test.answerCode!=-1] # 테스트데이터 마지막 제출 2번쨰꺼까지 훈련데이터로 사용
    train = pd.concat([train,train2]) # 훈련데이터 병합
    x_train = train.drop('answerCode',axis=1)
    y_train = train[['answerCode']]
    x_valid = valid.drop('answerCode',axis=1)
    y_valid = valid[['answerCode']]
    x_train.shape, y_train.shape, x_valid.shape, y_valid.shape
    aucs=[]
    for n,lb in zip([2,6,71],[0.8155,0.8024,0.7948]):
        model_name = f"model{n}"
        run_title = f"model{n} - LB AUC:{lb}"
        model,feats = model_dict[model_name]
        model, auc = model_train2auc(run_title, x_valid, y_valid, feats, s, experiment_id)
        print(f"{model_name} Val Auc: {auc:.3f}")
        model_dict[model_name][0] = model
        aucs.append(auc)
    saucs = sorted(aucs)
    sort_index = [saucs.index(i) for i in aucs]
    print(sort_index)
    if sort_index==[3,4,1,2,0]:
        print(s)
        post_slack("done")
        break

----------------------------- SEED 0 -----------------------------


((1845739, 39), (1845739, 1), (1968, 39), (1968, 1))

[LightGBM] [Info] Number of positive: 1204303, number of negative: 641436
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19310
[LightGBM] [Info] Number of data points in the train set: 1845739, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652477 -> initscore=0.629947
[LightGBM] [Info] Start training from score 0.629947
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[760]	valid_0's binary_logloss: 0.525738
model2 Val Auc: 0.812
[LightGBM] [Info] Number of positive: 1204303, number of negative: 641436
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19800
[LightGBM] [Info] Number of data points in the train set: 1845739, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652477

((1845952, 39), (1845952, 1), (2015, 39), (2015, 1))

[LightGBM] [Info] Number of positive: 1208247, number of negative: 637705
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19249
[LightGBM] [Info] Number of data points in the train set: 1845952, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654539 -> initscore=0.639050
[LightGBM] [Info] Start training from score 0.639050
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1182]	valid_0's binary_logloss: 0.519259
model2 Val Auc: 0.817
[LightGBM] [Info] Number of positive: 1208247, number of negative: 637705
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19739
[LightGBM] [Info] Number of data points in the train set: 1845952, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.65453

((1845977, 39), (1845977, 1), (2011, 39), (2011, 1))

[LightGBM] [Info] Number of positive: 1209855, number of negative: 636122
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19272
[LightGBM] [Info] Number of data points in the train set: 1845977, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655401 -> initscore=0.642865
[LightGBM] [Info] Start training from score 0.642865
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[661]	valid_0's binary_logloss: 0.534324
model2 Val Auc: 0.805
[LightGBM] [Info] Number of positive: 1209855, number of negative: 636122
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19762
[LightGBM] [Info] Number of data points in the train set: 1845977, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655401

((1845528, 39), (1845528, 1), (2009, 39), (2009, 1))

[LightGBM] [Info] Number of positive: 1206497, number of negative: 639031
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19266
[LightGBM] [Info] Number of data points in the train set: 1845528, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653741 -> initscore=0.635523
[LightGBM] [Info] Start training from score 0.635523
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[642]	valid_0's binary_logloss: 0.524985
model2 Val Auc: 0.815
[LightGBM] [Info] Number of positive: 1206497, number of negative: 639031
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19756
[LightGBM] [Info] Number of data points in the train set: 1845528, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653741

((1845319, 39), (1845319, 1), (2016, 39), (2016, 1))

[LightGBM] [Info] Number of positive: 1213592, number of negative: 631727
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19221
[LightGBM] [Info] Number of data points in the train set: 1845319, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657660 -> initscore=0.652882
[LightGBM] [Info] Start training from score 0.652882
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[581]	valid_0's binary_logloss: 0.532356
model2 Val Auc: 0.808
[LightGBM] [Info] Number of positive: 1213592, number of negative: 631727
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19711
[LightGBM] [Info] Number of data points in the train set: 1845319, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657660

((1845636, 39), (1845636, 1), (1977, 39), (1977, 1))

[LightGBM] [Info] Number of positive: 1203348, number of negative: 642288
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19324
[LightGBM] [Info] Number of data points in the train set: 1845636, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.651996 -> initscore=0.627826
[LightGBM] [Info] Start training from score 0.627826
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[679]	valid_0's binary_logloss: 0.540712
model2 Val Auc: 0.801
[LightGBM] [Info] Number of positive: 1203348, number of negative: 642288
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19814
[LightGBM] [Info] Number of data points in the train set: 1845636, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.651996

((1845842, 39), (1845842, 1), (1992, 39), (1992, 1))

[LightGBM] [Info] Number of positive: 1209744, number of negative: 636098
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19283
[LightGBM] [Info] Number of data points in the train set: 1845842, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655389 -> initscore=0.642811
[LightGBM] [Info] Start training from score 0.642811
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[564]	valid_0's binary_logloss: 0.528752
model2 Val Auc: 0.809
[LightGBM] [Info] Number of positive: 1209744, number of negative: 636098
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19773
[LightGBM] [Info] Number of data points in the train set: 1845842, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655389

((1845872, 39), (1845872, 1), (1951, 39), (1951, 1))

[LightGBM] [Info] Number of positive: 1208582, number of negative: 637290
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19318
[LightGBM] [Info] Number of data points in the train set: 1845872, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654749 -> initscore=0.639978
[LightGBM] [Info] Start training from score 0.639978
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[816]	valid_0's binary_logloss: 0.532198
model2 Val Auc: 0.806
[LightGBM] [Info] Number of positive: 1208582, number of negative: 637290
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19808
[LightGBM] [Info] Number of data points in the train set: 1845872, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654749

((1845720, 39), (1845720, 1), (2000, 39), (2000, 1))

[LightGBM] [Info] Number of positive: 1204294, number of negative: 641426
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19259
[LightGBM] [Info] Number of data points in the train set: 1845720, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652479 -> initscore=0.629955
[LightGBM] [Info] Start training from score 0.629955
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[568]	valid_0's binary_logloss: 0.53642
model2 Val Auc: 0.804
[LightGBM] [Info] Number of positive: 1204294, number of negative: 641426
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19749
[LightGBM] [Info] Number of data points in the train set: 1845720, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652479 

((1845915, 39), (1845915, 1), (1940, 39), (1940, 1))

[LightGBM] [Info] Number of positive: 1210029, number of negative: 635886
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19343
[LightGBM] [Info] Number of data points in the train set: 1845915, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655517 -> initscore=0.643380
[LightGBM] [Info] Start training from score 0.643380
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1004]	valid_0's binary_logloss: 0.524605
model2 Val Auc: 0.813
[LightGBM] [Info] Number of positive: 1210029, number of negative: 635886
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19833
[LightGBM] [Info] Number of data points in the train set: 1845915, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.65551

((1845869, 39), (1845869, 1), (2036, 39), (2036, 1))

[LightGBM] [Info] Number of positive: 1208705, number of negative: 637164
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19280
[LightGBM] [Info] Number of data points in the train set: 1845869, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654816 -> initscore=0.640278
[LightGBM] [Info] Start training from score 0.640278
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[336]	valid_0's binary_logloss: 0.534368
model2 Val Auc: 0.807
[LightGBM] [Info] Number of positive: 1208705, number of negative: 637164
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19770
[LightGBM] [Info] Number of data points in the train set: 1845869, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654816

((1845919, 39), (1845919, 1), (2006, 39), (2006, 1))

[LightGBM] [Info] Number of positive: 1206759, number of negative: 639160
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19257
[LightGBM] [Info] Number of data points in the train set: 1845919, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653744 -> initscore=0.635539
[LightGBM] [Info] Start training from score 0.635539
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[852]	valid_0's binary_logloss: 0.520483
model2 Val Auc: 0.816
[LightGBM] [Info] Number of positive: 1206759, number of negative: 639160
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19747
[LightGBM] [Info] Number of data points in the train set: 1845919, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653744

((1845257, 39), (1845257, 1), (2031, 39), (2031, 1))

[LightGBM] [Info] Number of positive: 1208975, number of negative: 636282
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19241
[LightGBM] [Info] Number of data points in the train set: 1845257, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655180 -> initscore=0.641886
[LightGBM] [Info] Start training from score 0.641886
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[860]	valid_0's binary_logloss: 0.533095
model2 Val Auc: 0.805
[LightGBM] [Info] Number of positive: 1208975, number of negative: 636282
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19731
[LightGBM] [Info] Number of data points in the train set: 1845257, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655180

((1845539, 39), (1845539, 1), (1974, 39), (1974, 1))

[LightGBM] [Info] Number of positive: 1208276, number of negative: 637263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19316
[LightGBM] [Info] Number of data points in the train set: 1845539, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654701 -> initscore=0.639767
[LightGBM] [Info] Start training from score 0.639767
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[672]	valid_0's binary_logloss: 0.544468
model2 Val Auc: 0.797
[LightGBM] [Info] Number of positive: 1208276, number of negative: 637263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19806
[LightGBM] [Info] Number of data points in the train set: 1845539, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654701

((1845583, 39), (1845583, 1), (1991, 39), (1991, 1))

[LightGBM] [Info] Number of positive: 1206944, number of negative: 638639
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19270
[LightGBM] [Info] Number of data points in the train set: 1845583, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653964 -> initscore=0.636507
[LightGBM] [Info] Start training from score 0.636507
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[636]	valid_0's binary_logloss: 0.528562
model2 Val Auc: 0.809
[LightGBM] [Info] Number of positive: 1206944, number of negative: 638639
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19760
[LightGBM] [Info] Number of data points in the train set: 1845583, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653964

((1845753, 39), (1845753, 1), (1975, 39), (1975, 1))

[LightGBM] [Info] Number of positive: 1205073, number of negative: 640680
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19287
[LightGBM] [Info] Number of data points in the train set: 1845753, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652890 -> initscore=0.631765
[LightGBM] [Info] Start training from score 0.631765
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[915]	valid_0's binary_logloss: 0.536845
model2 Val Auc: 0.806
[LightGBM] [Info] Number of positive: 1205073, number of negative: 640680
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19777
[LightGBM] [Info] Number of data points in the train set: 1845753, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652890

((1845890, 39), (1845890, 1), (1966, 39), (1966, 1))

[LightGBM] [Info] Number of positive: 1204859, number of negative: 641031
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19311
[LightGBM] [Info] Number of data points in the train set: 1845890, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652725 -> initscore=0.631040
[LightGBM] [Info] Start training from score 0.631040
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[602]	valid_0's binary_logloss: 0.534157
model2 Val Auc: 0.807
[LightGBM] [Info] Number of positive: 1204859, number of negative: 641031
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19801
[LightGBM] [Info] Number of data points in the train set: 1845890, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652725

((1845409, 39), (1845409, 1), (1992, 39), (1992, 1))

[LightGBM] [Info] Number of positive: 1210497, number of negative: 634912
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19286
[LightGBM] [Info] Number of data points in the train set: 1845409, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655951 -> initscore=0.645300
[LightGBM] [Info] Start training from score 0.645300
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[838]	valid_0's binary_logloss: 0.526817
model2 Val Auc: 0.811
[LightGBM] [Info] Number of positive: 1210497, number of negative: 634912
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19776
[LightGBM] [Info] Number of data points in the train set: 1845409, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655951

((1845711, 39), (1845711, 1), (2035, 39), (2035, 1))

[LightGBM] [Info] Number of positive: 1213226, number of negative: 632485
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19230
[LightGBM] [Info] Number of data points in the train set: 1845711, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657322 -> initscore=0.651382
[LightGBM] [Info] Start training from score 0.651382
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1032]	valid_0's binary_logloss: 0.53469
model2 Val Auc: 0.804
[LightGBM] [Info] Number of positive: 1213226, number of negative: 632485
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19720
[LightGBM] [Info] Number of data points in the train set: 1845711, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657322

((1845937, 39), (1845937, 1), (2032, 39), (2032, 1))

[LightGBM] [Info] Number of positive: 1211726, number of negative: 634211
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19249
[LightGBM] [Info] Number of data points in the train set: 1845937, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656429 -> initscore=0.647419
[LightGBM] [Info] Start training from score 0.647419
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1094]	valid_0's binary_logloss: 0.525749
model2 Val Auc: 0.811
[LightGBM] [Info] Number of positive: 1211726, number of negative: 634211
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19739
[LightGBM] [Info] Number of data points in the train set: 1845937, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.65642

((1845954, 39), (1845954, 1), (1983, 39), (1983, 1))

[LightGBM] [Info] Number of positive: 1209141, number of negative: 636813
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19293
[LightGBM] [Info] Number of data points in the train set: 1845954, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655022 -> initscore=0.641189
[LightGBM] [Info] Start training from score 0.641189
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1061]	valid_0's binary_logloss: 0.528091
model2 Val Auc: 0.810
[LightGBM] [Info] Number of positive: 1209141, number of negative: 636813
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19783
[LightGBM] [Info] Number of data points in the train set: 1845954, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.65502

((1845674, 39), (1845674, 1), (2027, 39), (2027, 1))

[LightGBM] [Info] Number of positive: 1206656, number of negative: 639018
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19271
[LightGBM] [Info] Number of data points in the train set: 1845674, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653775 -> initscore=0.635676
[LightGBM] [Info] Start training from score 0.635676
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[875]	valid_0's binary_logloss: 0.532638
model2 Val Auc: 0.806
[LightGBM] [Info] Number of positive: 1206656, number of negative: 639018
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19761
[LightGBM] [Info] Number of data points in the train set: 1845674, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653775

((1845820, 39), (1845820, 1), (1981, 39), (1981, 1))

[LightGBM] [Info] Number of positive: 1208828, number of negative: 636992
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19281
[LightGBM] [Info] Number of data points in the train set: 1845820, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654900 -> initscore=0.640649
[LightGBM] [Info] Start training from score 0.640649
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[652]	valid_0's binary_logloss: 0.539191
model2 Val Auc: 0.801
[LightGBM] [Info] Number of positive: 1208828, number of negative: 636992
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19771
[LightGBM] [Info] Number of data points in the train set: 1845820, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654900

((1845978, 39), (1845978, 1), (1970, 39), (1970, 1))

[LightGBM] [Info] Number of positive: 1208587, number of negative: 637391
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19305
[LightGBM] [Info] Number of data points in the train set: 1845978, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654714 -> initscore=0.639824
[LightGBM] [Info] Start training from score 0.639824
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[798]	valid_0's binary_logloss: 0.520826
model2 Val Auc: 0.817
[LightGBM] [Info] Number of positive: 1208587, number of negative: 637391
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19795
[LightGBM] [Info] Number of data points in the train set: 1845978, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654714

((1845901, 39), (1845901, 1), (2044, 39), (2044, 1))

[LightGBM] [Info] Number of positive: 1208438, number of negative: 637463
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19213
[LightGBM] [Info] Number of data points in the train set: 1845901, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654660 -> initscore=0.639588
[LightGBM] [Info] Start training from score 0.639588
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1351]	valid_0's binary_logloss: 0.521406
model2 Val Auc: 0.814
[LightGBM] [Info] Number of positive: 1208438, number of negative: 637463
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19703
[LightGBM] [Info] Number of data points in the train set: 1845901, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.65466

((1845312, 39), (1845312, 1), (1966, 39), (1966, 1))

[LightGBM] [Info] Number of positive: 1208261, number of negative: 637051
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19307
[LightGBM] [Info] Number of data points in the train set: 1845312, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654773 -> initscore=0.640088
[LightGBM] [Info] Start training from score 0.640088
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[600]	valid_0's binary_logloss: 0.529735
model2 Val Auc: 0.809
[LightGBM] [Info] Number of positive: 1208261, number of negative: 637051
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19797
[LightGBM] [Info] Number of data points in the train set: 1845312, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654773

((1844865, 39), (1844865, 1), (2069, 39), (2069, 1))

[LightGBM] [Info] Number of positive: 1207442, number of negative: 637423
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19196
[LightGBM] [Info] Number of data points in the train set: 1844865, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654488 -> initscore=0.638826
[LightGBM] [Info] Start training from score 0.638826
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[938]	valid_0's binary_logloss: 0.524123
model2 Val Auc: 0.813
[LightGBM] [Info] Number of positive: 1207442, number of negative: 637423
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19686
[LightGBM] [Info] Number of data points in the train set: 1844865, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654488 -> initscore=0.638826
[LightGBM] [Info] Start training from sco

((1845900, 39), (1845900, 1), (2019, 39), (2019, 1))

[LightGBM] [Info] Number of positive: 1213485, number of negative: 632415
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19247
[LightGBM] [Info] Number of data points in the train set: 1845900, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657395 -> initscore=0.651706
[LightGBM] [Info] Start training from score 0.651706
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[883]	valid_0's binary_logloss: 0.533255
model2 Val Auc: 0.807
[LightGBM] [Info] Number of positive: 1213485, number of negative: 632415
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19737
[LightGBM] [Info] Number of data points in the train set: 1845900, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657395

((1845851, 39), (1845851, 1), (2003, 39), (2003, 1))

[LightGBM] [Info] Number of positive: 1201478, number of negative: 644373
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19298
[LightGBM] [Info] Number of data points in the train set: 1845851, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650907 -> initscore=0.623030
[LightGBM] [Info] Start training from score 0.623030
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[726]	valid_0's binary_logloss: 0.53551
model2 Val Auc: 0.805
[LightGBM] [Info] Number of positive: 1201478, number of negative: 644373
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19788
[LightGBM] [Info] Number of data points in the train set: 1845851, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650907 

((1845822, 39), (1845822, 1), (2007, 39), (2007, 1))

[LightGBM] [Info] Number of positive: 1207603, number of negative: 638219
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19294
[LightGBM] [Info] Number of data points in the train set: 1845822, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654236 -> initscore=0.637711
[LightGBM] [Info] Start training from score 0.637711
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[739]	valid_0's binary_logloss: 0.536371
model2 Val Auc: 0.806
[LightGBM] [Info] Number of positive: 1207603, number of negative: 638219
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19784
[LightGBM] [Info] Number of data points in the train set: 1845822, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654236

((1845891, 39), (1845891, 1), (1996, 39), (1996, 1))

[LightGBM] [Info] Number of positive: 1205186, number of negative: 640705
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19270
[LightGBM] [Info] Number of data points in the train set: 1845891, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652902 -> initscore=0.631820
[LightGBM] [Info] Start training from score 0.631820
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[589]	valid_0's binary_logloss: 0.545092
model2 Val Auc: 0.795
[LightGBM] [Info] Number of positive: 1205186, number of negative: 640705
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19760
[LightGBM] [Info] Number of data points in the train set: 1845891, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652902

((1845542, 39), (1845542, 1), (1987, 39), (1987, 1))

[LightGBM] [Info] Number of positive: 1206721, number of negative: 638821
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19275
[LightGBM] [Info] Number of data points in the train set: 1845542, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653857 -> initscore=0.636038
[LightGBM] [Info] Start training from score 0.636038
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1251]	valid_0's binary_logloss: 0.519899
model2 Val Auc: 0.817
[LightGBM] [Info] Number of positive: 1206721, number of negative: 638821
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19765
[LightGBM] [Info] Number of data points in the train set: 1845542, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.65385

((1845146, 39), (1845146, 1), (1920, 39), (1920, 1))

[LightGBM] [Info] Number of positive: 1211445, number of negative: 633701
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19348
[LightGBM] [Info] Number of data points in the train set: 1845146, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656558 -> initscore=0.647992
[LightGBM] [Info] Start training from score 0.647992
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[649]	valid_0's binary_logloss: 0.516279
model2 Val Auc: 0.821
[LightGBM] [Info] Number of positive: 1211445, number of negative: 633701
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19838
[LightGBM] [Info] Number of data points in the train set: 1845146, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656558

((1845805, 39), (1845805, 1), (2007, 39), (2007, 1))

[LightGBM] [Info] Number of positive: 1211416, number of negative: 634389
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19253
[LightGBM] [Info] Number of data points in the train set: 1845805, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656308 -> initscore=0.646883
[LightGBM] [Info] Start training from score 0.646883
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[695]	valid_0's binary_logloss: 0.52263
model2 Val Auc: 0.813
[LightGBM] [Info] Number of positive: 1211416, number of negative: 634389
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19743
[LightGBM] [Info] Number of data points in the train set: 1845805, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656308 

((1845771, 39), (1845771, 1), (2020, 39), (2020, 1))

[LightGBM] [Info] Number of positive: 1207993, number of negative: 637778
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19226
[LightGBM] [Info] Number of data points in the train set: 1845771, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654465 -> initscore=0.638725
[LightGBM] [Info] Start training from score 0.638725
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[540]	valid_0's binary_logloss: 0.532763
model2 Val Auc: 0.809
[LightGBM] [Info] Number of positive: 1207993, number of negative: 637778
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19716
[LightGBM] [Info] Number of data points in the train set: 1845771, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654465

((1845977, 39), (1845977, 1), (1947, 39), (1947, 1))

[LightGBM] [Info] Number of positive: 1209559, number of negative: 636418
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19344
[LightGBM] [Info] Number of data points in the train set: 1845977, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655241 -> initscore=0.642156
[LightGBM] [Info] Start training from score 0.642156
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[633]	valid_0's binary_logloss: 0.542738
model2 Val Auc: 0.798
[LightGBM] [Info] Number of positive: 1209559, number of negative: 636418
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19834
[LightGBM] [Info] Number of data points in the train set: 1845977, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655241

((1845739, 39), (1845739, 1), (2013, 39), (2013, 1))

[LightGBM] [Info] Number of positive: 1204410, number of negative: 641329
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19272
[LightGBM] [Info] Number of data points in the train set: 1845739, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652535 -> initscore=0.630203
[LightGBM] [Info] Start training from score 0.630203
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[992]	valid_0's binary_logloss: 0.535851
model2 Val Auc: 0.805
[LightGBM] [Info] Number of positive: 1204410, number of negative: 641329
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19762
[LightGBM] [Info] Number of data points in the train set: 1845739, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652535

((1845868, 39), (1845868, 1), (1990, 39), (1990, 1))

[LightGBM] [Info] Number of positive: 1209502, number of negative: 636366
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19288
[LightGBM] [Info] Number of data points in the train set: 1845868, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655248 -> initscore=0.642190
[LightGBM] [Info] Start training from score 0.642190
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[847]	valid_0's binary_logloss: 0.53164
model2 Val Auc: 0.806
[LightGBM] [Info] Number of positive: 1209502, number of negative: 636366
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19778
[LightGBM] [Info] Number of data points in the train set: 1845868, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655248 

((1845887, 39), (1845887, 1), (2043, 39), (2043, 1))

[LightGBM] [Info] Number of positive: 1212391, number of negative: 633496
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19236
[LightGBM] [Info] Number of data points in the train set: 1845887, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656807 -> initscore=0.649096
[LightGBM] [Info] Start training from score 0.649096
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[829]	valid_0's binary_logloss: 0.526048
model2 Val Auc: 0.812
[LightGBM] [Info] Number of positive: 1212391, number of negative: 633496
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19726
[LightGBM] [Info] Number of data points in the train set: 1845887, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656807

((1845699, 39), (1845699, 1), (1977, 39), (1977, 1))

[LightGBM] [Info] Number of positive: 1205979, number of negative: 639720
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19296
[LightGBM] [Info] Number of data points in the train set: 1845699, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653400 -> initscore=0.634016
[LightGBM] [Info] Start training from score 0.634016
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1012]	valid_0's binary_logloss: 0.533223
model2 Val Auc: 0.806
[LightGBM] [Info] Number of positive: 1205979, number of negative: 639720
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19786
[LightGBM] [Info] Number of data points in the train set: 1845699, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.65340

((1845922, 39), (1845922, 1), (2042, 39), (2042, 1))

[LightGBM] [Info] Number of positive: 1210772, number of negative: 635150
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19243
[LightGBM] [Info] Number of data points in the train set: 1845922, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655917 -> initscore=0.645152
[LightGBM] [Info] Start training from score 0.645152
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[443]	valid_0's binary_logloss: 0.526731
model2 Val Auc: 0.813
[LightGBM] [Info] Number of positive: 1210772, number of negative: 635150
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19733
[LightGBM] [Info] Number of data points in the train set: 1845922, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655917

((1845652, 39), (1845652, 1), (2037, 39), (2037, 1))

[LightGBM] [Info] Number of positive: 1212937, number of negative: 632715
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19229
[LightGBM] [Info] Number of data points in the train set: 1845652, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657186 -> initscore=0.650780
[LightGBM] [Info] Start training from score 0.650780
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1022]	valid_0's binary_logloss: 0.539056
model2 Val Auc: 0.800
[LightGBM] [Info] Number of positive: 1212937, number of negative: 632715
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19719
[LightGBM] [Info] Number of data points in the train set: 1845652, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.65718

((1845527, 39), (1845527, 1), (2007, 39), (2007, 1))

[LightGBM] [Info] Number of positive: 1209948, number of negative: 635579
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19229
[LightGBM] [Info] Number of data points in the train set: 1845527, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655611 -> initscore=0.643796
[LightGBM] [Info] Start training from score 0.643796
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[558]	valid_0's binary_logloss: 0.52991
model2 Val Auc: 0.809
[LightGBM] [Info] Number of positive: 1209948, number of negative: 635579
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19719
[LightGBM] [Info] Number of data points in the train set: 1845527, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655611 

((1845922, 39), (1845922, 1), (1993, 39), (1993, 1))

[LightGBM] [Info] Number of positive: 1205958, number of negative: 639964
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19277
[LightGBM] [Info] Number of data points in the train set: 1845922, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653309 -> initscore=0.633618
[LightGBM] [Info] Start training from score 0.633618
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[604]	valid_0's binary_logloss: 0.533884
model2 Val Auc: 0.804
[LightGBM] [Info] Number of positive: 1205958, number of negative: 639964
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19767
[LightGBM] [Info] Number of data points in the train set: 1845922, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653309

((1845794, 39), (1845794, 1), (1961, 39), (1961, 1))

[LightGBM] [Info] Number of positive: 1208300, number of negative: 637494
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19306
[LightGBM] [Info] Number of data points in the train set: 1845794, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654623 -> initscore=0.639425
[LightGBM] [Info] Start training from score 0.639425
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[753]	valid_0's binary_logloss: 0.516833
model2 Val Auc: 0.823
[LightGBM] [Info] Number of positive: 1208300, number of negative: 637494
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19796
[LightGBM] [Info] Number of data points in the train set: 1845794, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654623

((1845500, 39), (1845500, 1), (2016, 39), (2016, 1))

[LightGBM] [Info] Number of positive: 1210144, number of negative: 635356
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19258
[LightGBM] [Info] Number of data points in the train set: 1845500, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655727 -> initscore=0.644309
[LightGBM] [Info] Start training from score 0.644309
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1268]	valid_0's binary_logloss: 0.52766
model2 Val Auc: 0.812
[LightGBM] [Info] Number of positive: 1210144, number of negative: 635356
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19748
[LightGBM] [Info] Number of data points in the train set: 1845500, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655727

((1845974, 39), (1845974, 1), (1944, 39), (1944, 1))

[LightGBM] [Info] Number of positive: 1209901, number of negative: 636073
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19309
[LightGBM] [Info] Number of data points in the train set: 1845974, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655427 -> initscore=0.642980
[LightGBM] [Info] Start training from score 0.642980
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[904]	valid_0's binary_logloss: 0.546054
model2 Val Auc: 0.795
[LightGBM] [Info] Number of positive: 1209901, number of negative: 636073
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19799
[LightGBM] [Info] Number of data points in the train set: 1845974, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655427

((1845521, 39), (1845521, 1), (2060, 39), (2060, 1))

[LightGBM] [Info] Number of positive: 1210416, number of negative: 635105
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19224
[LightGBM] [Info] Number of data points in the train set: 1845521, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655867 -> initscore=0.644929
[LightGBM] [Info] Start training from score 0.644929
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[773]	valid_0's binary_logloss: 0.535615
model2 Val Auc: 0.804
[LightGBM] [Info] Number of positive: 1210416, number of negative: 635105
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19714
[LightGBM] [Info] Number of data points in the train set: 1845521, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655867

((1845500, 39), (1845500, 1), (1995, 39), (1995, 1))

[LightGBM] [Info] Number of positive: 1208061, number of negative: 637439
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19272
[LightGBM] [Info] Number of data points in the train set: 1845500, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654598 -> initscore=0.639313
[LightGBM] [Info] Start training from score 0.639313
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[714]	valid_0's binary_logloss: 0.530679
model2 Val Auc: 0.808
[LightGBM] [Info] Number of positive: 1208061, number of negative: 637439
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19762
[LightGBM] [Info] Number of data points in the train set: 1845500, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654598

((1845797, 39), (1845797, 1), (2000, 39), (2000, 1))

[LightGBM] [Info] Number of positive: 1204139, number of negative: 641658
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19289
[LightGBM] [Info] Number of data points in the train set: 1845797, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652368 -> initscore=0.629465
[LightGBM] [Info] Start training from score 0.629465
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[960]	valid_0's binary_logloss: 0.525903
model2 Val Auc: 0.812
[LightGBM] [Info] Number of positive: 1204139, number of negative: 641658
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19779
[LightGBM] [Info] Number of data points in the train set: 1845797, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652368

((1845470, 39), (1845470, 1), (2073, 39), (2073, 1))

[LightGBM] [Info] Number of positive: 1206350, number of negative: 639120
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19245
[LightGBM] [Info] Number of data points in the train set: 1845470, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653682 -> initscore=0.635262
[LightGBM] [Info] Start training from score 0.635262
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[927]	valid_0's binary_logloss: 0.526986
model2 Val Auc: 0.811
[LightGBM] [Info] Number of positive: 1206350, number of negative: 639120
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19735
[LightGBM] [Info] Number of data points in the train set: 1845470, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653682

((1845514, 39), (1845514, 1), (1955, 39), (1955, 1))

[LightGBM] [Info] Number of positive: 1208789, number of negative: 636725
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19316
[LightGBM] [Info] Number of data points in the train set: 1845514, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654988 -> initscore=0.641036
[LightGBM] [Info] Start training from score 0.641036
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[815]	valid_0's binary_logloss: 0.5138
model2 Val Auc: 0.823
[LightGBM] [Info] Number of positive: 1208789, number of negative: 636725
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19806
[LightGBM] [Info] Number of data points in the train set: 1845514, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654988 -

((1845303, 39), (1845303, 1), (1996, 39), (1996, 1))

[LightGBM] [Info] Number of positive: 1207262, number of negative: 638041
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19284
[LightGBM] [Info] Number of data points in the train set: 1845303, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654235 -> initscore=0.637708
[LightGBM] [Info] Start training from score 0.637708
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[999]	valid_0's binary_logloss: 0.512973
model2 Val Auc: 0.822
[LightGBM] [Info] Number of positive: 1207262, number of negative: 638041
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19774
[LightGBM] [Info] Number of data points in the train set: 1845303, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654235

((1845978, 39), (1845978, 1), (2058, 39), (2058, 1))

[LightGBM] [Info] Number of positive: 1208157, number of negative: 637821
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19228
[LightGBM] [Info] Number of data points in the train set: 1845978, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654481 -> initscore=0.638794
[LightGBM] [Info] Start training from score 0.638794
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[711]	valid_0's binary_logloss: 0.5319
model2 Val Auc: 0.808
[LightGBM] [Info] Number of positive: 1208157, number of negative: 637821
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19718
[LightGBM] [Info] Number of data points in the train set: 1845978, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654481 -

((1845753, 39), (1845753, 1), (2076, 39), (2076, 1))

[LightGBM] [Info] Number of positive: 1209669, number of negative: 636084
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19204
[LightGBM] [Info] Number of data points in the train set: 1845753, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655380 -> initscore=0.642771
[LightGBM] [Info] Start training from score 0.642771
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[888]	valid_0's binary_logloss: 0.539966
model2 Val Auc: 0.800
[LightGBM] [Info] Number of positive: 1209669, number of negative: 636084
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19694
[LightGBM] [Info] Number of data points in the train set: 1845753, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655380

((1845855, 39), (1845855, 1), (2033, 39), (2033, 1))

[LightGBM] [Info] Number of positive: 1213621, number of negative: 632234
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19243
[LightGBM] [Info] Number of data points in the train set: 1845855, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657484 -> initscore=0.652104
[LightGBM] [Info] Start training from score 0.652104
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[528]	valid_0's binary_logloss: 0.538484
model2 Val Auc: 0.801
[LightGBM] [Info] Number of positive: 1213621, number of negative: 632234
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19733
[LightGBM] [Info] Number of data points in the train set: 1845855, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657484

((1845418, 39), (1845418, 1), (2015, 39), (2015, 1))

[LightGBM] [Info] Number of positive: 1206995, number of negative: 638423
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19249
[LightGBM] [Info] Number of data points in the train set: 1845418, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654050 -> initscore=0.636888
[LightGBM] [Info] Start training from score 0.636888
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[437]	valid_0's binary_logloss: 0.528328
model2 Val Auc: 0.810
[LightGBM] [Info] Number of positive: 1206995, number of negative: 638423
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19739
[LightGBM] [Info] Number of data points in the train set: 1845418, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654050

((1845562, 39), (1845562, 1), (1972, 39), (1972, 1))

[LightGBM] [Info] Number of positive: 1208965, number of negative: 636597
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19294
[LightGBM] [Info] Number of data points in the train set: 1845562, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655066 -> initscore=0.641383
[LightGBM] [Info] Start training from score 0.641383
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[494]	valid_0's binary_logloss: 0.532757
model2 Val Auc: 0.807
[LightGBM] [Info] Number of positive: 1208965, number of negative: 636597
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19784
[LightGBM] [Info] Number of data points in the train set: 1845562, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655066

((1845929, 39), (1845929, 1), (2079, 39), (2079, 1))

[LightGBM] [Info] Number of positive: 1205385, number of negative: 640544
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19179
[LightGBM] [Info] Number of data points in the train set: 1845929, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652996 -> initscore=0.632236
[LightGBM] [Info] Start training from score 0.632236
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1332]	valid_0's binary_logloss: 0.518993
model2 Val Auc: 0.818
[LightGBM] [Info] Number of positive: 1205385, number of negative: 640544
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19669
[LightGBM] [Info] Number of data points in the train set: 1845929, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.65299

((1845923, 39), (1845923, 1), (1994, 39), (1994, 1))

[LightGBM] [Info] Number of positive: 1208712, number of negative: 637211
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19248
[LightGBM] [Info] Number of data points in the train set: 1845923, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654801 -> initscore=0.640210
[LightGBM] [Info] Start training from score 0.640210
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1123]	valid_0's binary_logloss: 0.525709
model2 Val Auc: 0.813
[LightGBM] [Info] Number of positive: 1208712, number of negative: 637211
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19738
[LightGBM] [Info] Number of data points in the train set: 1845923, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.65480

((1844831, 39), (1844831, 1), (2036, 39), (2036, 1))

[LightGBM] [Info] Number of positive: 1209632, number of negative: 635199
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19247
[LightGBM] [Info] Number of data points in the train set: 1844831, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655687 -> initscore=0.644133
[LightGBM] [Info] Start training from score 0.644133
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[636]	valid_0's binary_logloss: 0.536773
model2 Val Auc: 0.805
[LightGBM] [Info] Number of positive: 1209632, number of negative: 635199
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19737
[LightGBM] [Info] Number of data points in the train set: 1844831, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655687

((1845846, 39), (1845846, 1), (1945, 39), (1945, 1))

[LightGBM] [Info] Number of positive: 1207410, number of negative: 638436
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19329
[LightGBM] [Info] Number of data points in the train set: 1845846, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654123 -> initscore=0.637211
[LightGBM] [Info] Start training from score 0.637211
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[611]	valid_0's binary_logloss: 0.523162
model2 Val Auc: 0.817
[LightGBM] [Info] Number of positive: 1207410, number of negative: 638436
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19819
[LightGBM] [Info] Number of data points in the train set: 1845846, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654123

((1845429, 39), (1845429, 1), (1987, 39), (1987, 1))

[LightGBM] [Info] Number of positive: 1205781, number of negative: 639648
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19281
[LightGBM] [Info] Number of data points in the train set: 1845429, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653388 -> initscore=0.633965
[LightGBM] [Info] Start training from score 0.633965
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[697]	valid_0's binary_logloss: 0.539608
model2 Val Auc: 0.801
[LightGBM] [Info] Number of positive: 1205781, number of negative: 639648
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19771
[LightGBM] [Info] Number of data points in the train set: 1845429, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653388

((1845700, 39), (1845700, 1), (1990, 39), (1990, 1))

[LightGBM] [Info] Number of positive: 1209634, number of negative: 636066
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19279
[LightGBM] [Info] Number of data points in the train set: 1845700, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655380 -> initscore=0.642771
[LightGBM] [Info] Start training from score 0.642771
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1114]	valid_0's binary_logloss: 0.531611
model2 Val Auc: 0.809
[LightGBM] [Info] Number of positive: 1209634, number of negative: 636066
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19769
[LightGBM] [Info] Number of data points in the train set: 1845700, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.65538

((1845876, 39), (1845876, 1), (2050, 39), (2050, 1))

[LightGBM] [Info] Number of positive: 1203629, number of negative: 642247
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19235
[LightGBM] [Info] Number of data points in the train set: 1845876, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652064 -> initscore=0.628123
[LightGBM] [Info] Start training from score 0.628123
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1002]	valid_0's binary_logloss: 0.529765
model2 Val Auc: 0.809
[LightGBM] [Info] Number of positive: 1203629, number of negative: 642247
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19725
[LightGBM] [Info] Number of data points in the train set: 1845876, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.65206

((1845728, 39), (1845728, 1), (2011, 39), (2011, 1))

[LightGBM] [Info] Number of positive: 1207190, number of negative: 638538
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19286
[LightGBM] [Info] Number of data points in the train set: 1845728, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654045 -> initscore=0.636869
[LightGBM] [Info] Start training from score 0.636869
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[820]	valid_0's binary_logloss: 0.539985
model2 Val Auc: 0.799
[LightGBM] [Info] Number of positive: 1207190, number of negative: 638538
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19776
[LightGBM] [Info] Number of data points in the train set: 1845728, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654045

((1845940, 39), (1845940, 1), (2092, 39), (2092, 1))

[LightGBM] [Info] Number of positive: 1213758, number of negative: 632182
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19195
[LightGBM] [Info] Number of data points in the train set: 1845940, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657528 -> initscore=0.652299
[LightGBM] [Info] Start training from score 0.652299
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[743]	valid_0's binary_logloss: 0.529695
model2 Val Auc: 0.808
[LightGBM] [Info] Number of positive: 1213758, number of negative: 632182
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19685
[LightGBM] [Info] Number of data points in the train set: 1845940, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657528

((1845310, 39), (1845310, 1), (1993, 39), (1993, 1))

[LightGBM] [Info] Number of positive: 1205517, number of negative: 639793
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19286
[LightGBM] [Info] Number of data points in the train set: 1845310, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653287 -> initscore=0.633519
[LightGBM] [Info] Start training from score 0.633519
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[645]	valid_0's binary_logloss: 0.552769
model2 Val Auc: 0.790
[LightGBM] [Info] Number of positive: 1205517, number of negative: 639793
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19776
[LightGBM] [Info] Number of data points in the train set: 1845310, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653287

((1845740, 39), (1845740, 1), (1990, 39), (1990, 1))

[LightGBM] [Info] Number of positive: 1211638, number of negative: 634102
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19280
[LightGBM] [Info] Number of data points in the train set: 1845740, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656451 -> initscore=0.647519
[LightGBM] [Info] Start training from score 0.647519
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[978]	valid_0's binary_logloss: 0.534114
model2 Val Auc: 0.807
[LightGBM] [Info] Number of positive: 1211638, number of negative: 634102
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19770
[LightGBM] [Info] Number of data points in the train set: 1845740, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656451

((1845969, 39), (1845969, 1), (2048, 39), (2048, 1))

[LightGBM] [Info] Number of positive: 1211463, number of negative: 634506
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19227
[LightGBM] [Info] Number of data points in the train set: 1845969, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656275 -> initscore=0.646737
[LightGBM] [Info] Start training from score 0.646737
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[932]	valid_0's binary_logloss: 0.522412
model2 Val Auc: 0.815
[LightGBM] [Info] Number of positive: 1211463, number of negative: 634506
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19717
[LightGBM] [Info] Number of data points in the train set: 1845969, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656275

((1845893, 39), (1845893, 1), (1995, 39), (1995, 1))

[LightGBM] [Info] Number of positive: 1207411, number of negative: 638482
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19306
[LightGBM] [Info] Number of data points in the train set: 1845893, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654107 -> initscore=0.637140
[LightGBM] [Info] Start training from score 0.637140
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1378]	valid_0's binary_logloss: 0.52415
model2 Val Auc: 0.814
[LightGBM] [Info] Number of positive: 1207411, number of negative: 638482
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19796
[LightGBM] [Info] Number of data points in the train set: 1845893, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654107

((1845789, 39), (1845789, 1), (2031, 39), (2031, 1))

[LightGBM] [Info] Number of positive: 1207112, number of negative: 638677
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19218
[LightGBM] [Info] Number of data points in the train set: 1845789, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653982 -> initscore=0.636587
[LightGBM] [Info] Start training from score 0.636587
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[938]	valid_0's binary_logloss: 0.512201
model2 Val Auc: 0.825
[LightGBM] [Info] Number of positive: 1207112, number of negative: 638677
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19708
[LightGBM] [Info] Number of data points in the train set: 1845789, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653982

((1845679, 39), (1845679, 1), (2020, 39), (2020, 1))

[LightGBM] [Info] Number of positive: 1204724, number of negative: 640955
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19278
[LightGBM] [Info] Number of data points in the train set: 1845679, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652727 -> initscore=0.631047
[LightGBM] [Info] Start training from score 0.631047
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[642]	valid_0's binary_logloss: 0.530253
model2 Val Auc: 0.809
[LightGBM] [Info] Number of positive: 1204724, number of negative: 640955
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19768
[LightGBM] [Info] Number of data points in the train set: 1845679, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652727

((1845678, 39), (1845678, 1), (2035, 39), (2035, 1))

[LightGBM] [Info] Number of positive: 1211928, number of negative: 633750
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19248
[LightGBM] [Info] Number of data points in the train set: 1845678, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656630 -> initscore=0.648313
[LightGBM] [Info] Start training from score 0.648313
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1003]	valid_0's binary_logloss: 0.53484
model2 Val Auc: 0.804
[LightGBM] [Info] Number of positive: 1211928, number of negative: 633750
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19738
[LightGBM] [Info] Number of data points in the train set: 1845678, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656630

((1845704, 39), (1845704, 1), (2072, 39), (2072, 1))

[LightGBM] [Info] Number of positive: 1208635, number of negative: 637069
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19245
[LightGBM] [Info] Number of data points in the train set: 1845704, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654837 -> initscore=0.640369
[LightGBM] [Info] Start training from score 0.640369
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[728]	valid_0's binary_logloss: 0.530834
model2 Val Auc: 0.808
[LightGBM] [Info] Number of positive: 1208635, number of negative: 637069
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19735
[LightGBM] [Info] Number of data points in the train set: 1845704, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654837

((1845956, 39), (1845956, 1), (1989, 39), (1989, 1))

[LightGBM] [Info] Number of positive: 1209212, number of negative: 636744
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19303
[LightGBM] [Info] Number of data points in the train set: 1845956, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655060 -> initscore=0.641356
[LightGBM] [Info] Start training from score 0.641356
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[592]	valid_0's binary_logloss: 0.543325
model2 Val Auc: 0.798
[LightGBM] [Info] Number of positive: 1209212, number of negative: 636744
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19793
[LightGBM] [Info] Number of data points in the train set: 1845956, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655060

((1845524, 39), (1845524, 1), (2022, 39), (2022, 1))

[LightGBM] [Info] Number of positive: 1207433, number of negative: 638091
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19279
[LightGBM] [Info] Number of data points in the train set: 1845524, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654249 -> initscore=0.637771
[LightGBM] [Info] Start training from score 0.637771
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[928]	valid_0's binary_logloss: 0.529117
model2 Val Auc: 0.809
[LightGBM] [Info] Number of positive: 1207433, number of negative: 638091
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19769
[LightGBM] [Info] Number of data points in the train set: 1845524, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654249

((1845930, 39), (1845930, 1), (1988, 39), (1988, 1))

[LightGBM] [Info] Number of positive: 1211477, number of negative: 634453
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19295
[LightGBM] [Info] Number of data points in the train set: 1845930, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656296 -> initscore=0.646832
[LightGBM] [Info] Start training from score 0.646832
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[871]	valid_0's binary_logloss: 0.531952
model2 Val Auc: 0.806
[LightGBM] [Info] Number of positive: 1211477, number of negative: 634453
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19785
[LightGBM] [Info] Number of data points in the train set: 1845930, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656296

((1845521, 39), (1845521, 1), (2014, 39), (2014, 1))

[LightGBM] [Info] Number of positive: 1204443, number of negative: 641078
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19274
[LightGBM] [Info] Number of data points in the train set: 1845521, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652630 -> initscore=0.630621
[LightGBM] [Info] Start training from score 0.630621
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[770]	valid_0's binary_logloss: 0.51333
model2 Val Auc: 0.823
[LightGBM] [Info] Number of positive: 1204443, number of negative: 641078
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19764
[LightGBM] [Info] Number of data points in the train set: 1845521, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652630 

((1845719, 39), (1845719, 1), (2020, 39), (2020, 1))

[LightGBM] [Info] Number of positive: 1215266, number of negative: 630453
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19248
[LightGBM] [Info] Number of data points in the train set: 1845719, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.658424 -> initscore=0.656280
[LightGBM] [Info] Start training from score 0.656280
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[709]	valid_0's binary_logloss: 0.557438
model2 Val Auc: 0.788
[LightGBM] [Info] Number of positive: 1215266, number of negative: 630453
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19738
[LightGBM] [Info] Number of data points in the train set: 1845719, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.658424

((1845813, 39), (1845813, 1), (2013, 39), (2013, 1))

[LightGBM] [Info] Number of positive: 1205061, number of negative: 640752
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19298
[LightGBM] [Info] Number of data points in the train set: 1845813, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652862 -> initscore=0.631643
[LightGBM] [Info] Start training from score 0.631643
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[388]	valid_0's binary_logloss: 0.526302
model2 Val Auc: 0.812
[LightGBM] [Info] Number of positive: 1205061, number of negative: 640752
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19788
[LightGBM] [Info] Number of data points in the train set: 1845813, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652862

((1845888, 39), (1845888, 1), (1998, 39), (1998, 1))

[LightGBM] [Info] Number of positive: 1205999, number of negative: 639889
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19252
[LightGBM] [Info] Number of data points in the train set: 1845888, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653344 -> initscore=0.633769
[LightGBM] [Info] Start training from score 0.633769
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[640]	valid_0's binary_logloss: 0.539683
model2 Val Auc: 0.799
[LightGBM] [Info] Number of positive: 1205999, number of negative: 639889
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19742
[LightGBM] [Info] Number of data points in the train set: 1845888, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653344

((1845517, 39), (1845517, 1), (2028, 39), (2028, 1))

[LightGBM] [Info] Number of positive: 1212483, number of negative: 633034
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19264
[LightGBM] [Info] Number of data points in the train set: 1845517, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656988 -> initscore=0.649901
[LightGBM] [Info] Start training from score 0.649901
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[723]	valid_0's binary_logloss: 0.519001
model2 Val Auc: 0.818
[LightGBM] [Info] Number of positive: 1212483, number of negative: 633034
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19754
[LightGBM] [Info] Number of data points in the train set: 1845517, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656988

((1845847, 39), (1845847, 1), (2078, 39), (2078, 1))

[LightGBM] [Info] Number of positive: 1205673, number of negative: 640174
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19188
[LightGBM] [Info] Number of data points in the train set: 1845847, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653181 -> initscore=0.633053
[LightGBM] [Info] Start training from score 0.633053
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[989]	valid_0's binary_logloss: 0.5347
model2 Val Auc: 0.805
[LightGBM] [Info] Number of positive: 1205673, number of negative: 640174
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19678
[LightGBM] [Info] Number of data points in the train set: 1845847, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653181 -

((1844623, 39), (1844623, 1), (1985, 39), (1985, 1))

[LightGBM] [Info] Number of positive: 1204441, number of negative: 640182
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19279
[LightGBM] [Info] Number of data points in the train set: 1844623, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652947 -> initscore=0.632018
[LightGBM] [Info] Start training from score 0.632018
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[929]	valid_0's binary_logloss: 0.534791
model2 Val Auc: 0.805
[LightGBM] [Info] Number of positive: 1204441, number of negative: 640182
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19769
[LightGBM] [Info] Number of data points in the train set: 1844623, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.652947

((1845839, 39), (1845839, 1), (2040, 39), (2040, 1))

[LightGBM] [Info] Number of positive: 1207423, number of negative: 638416
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19248
[LightGBM] [Info] Number of data points in the train set: 1845839, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654132 -> initscore=0.637254
[LightGBM] [Info] Start training from score 0.637254
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[497]	valid_0's binary_logloss: 0.528029
model2 Val Auc: 0.811
[LightGBM] [Info] Number of positive: 1207423, number of negative: 638416
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19738
[LightGBM] [Info] Number of data points in the train set: 1845839, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654132

((1845739, 39), (1845739, 1), (2031, 39), (2031, 1))

[LightGBM] [Info] Number of positive: 1206345, number of negative: 639394
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19257
[LightGBM] [Info] Number of data points in the train set: 1845739, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653584 -> initscore=0.634830
[LightGBM] [Info] Start training from score 0.634830
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1012]	valid_0's binary_logloss: 0.526636
model2 Val Auc: 0.812
[LightGBM] [Info] Number of positive: 1206345, number of negative: 639394
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19747
[LightGBM] [Info] Number of data points in the train set: 1845739, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.65358

((1845638, 39), (1845638, 1), (1980, 39), (1980, 1))

[LightGBM] [Info] Number of positive: 1208598, number of negative: 637040
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19280
[LightGBM] [Info] Number of data points in the train set: 1845638, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654840 -> initscore=0.640384
[LightGBM] [Info] Start training from score 0.640384
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[626]	valid_0's binary_logloss: 0.515243
model2 Val Auc: 0.821
[LightGBM] [Info] Number of positive: 1208598, number of negative: 637040
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19770
[LightGBM] [Info] Number of data points in the train set: 1845638, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654840

((1845590, 39), (1845590, 1), (2008, 39), (2008, 1))

[LightGBM] [Info] Number of positive: 1207960, number of negative: 637630
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19254
[LightGBM] [Info] Number of data points in the train set: 1845590, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654512 -> initscore=0.638930
[LightGBM] [Info] Start training from score 0.638930
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[289]	valid_0's binary_logloss: 0.552042
model2 Val Auc: 0.789
[LightGBM] [Info] Number of positive: 1207960, number of negative: 637630
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19744
[LightGBM] [Info] Number of data points in the train set: 1845590, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654512

((1845848, 39), (1845848, 1), (1966, 39), (1966, 1))

[LightGBM] [Info] Number of positive: 1212445, number of negative: 633403
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19312
[LightGBM] [Info] Number of data points in the train set: 1845848, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656850 -> initscore=0.649287
[LightGBM] [Info] Start training from score 0.649287
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[843]	valid_0's binary_logloss: 0.523505
model2 Val Auc: 0.815
[LightGBM] [Info] Number of positive: 1212445, number of negative: 633403
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19802
[LightGBM] [Info] Number of data points in the train set: 1845848, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656850

((1845900, 39), (1845900, 1), (1987, 39), (1987, 1))

[LightGBM] [Info] Number of positive: 1206692, number of negative: 639208
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19275
[LightGBM] [Info] Number of data points in the train set: 1845900, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653715 -> initscore=0.635408
[LightGBM] [Info] Start training from score 0.635408
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[603]	valid_0's binary_logloss: 0.532848
model2 Val Auc: 0.804
[LightGBM] [Info] Number of positive: 1206692, number of negative: 639208
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19765
[LightGBM] [Info] Number of data points in the train set: 1845900, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653715

((1845587, 39), (1845587, 1), (2062, 39), (2062, 1))

[LightGBM] [Info] Number of positive: 1208311, number of negative: 637276
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19219
[LightGBM] [Info] Number of data points in the train set: 1845587, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654703 -> initscore=0.639776
[LightGBM] [Info] Start training from score 0.639776
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[768]	valid_0's binary_logloss: 0.53791
model2 Val Auc: 0.804
[LightGBM] [Info] Number of positive: 1208311, number of negative: 637276
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19709
[LightGBM] [Info] Number of data points in the train set: 1845587, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654703 

((1845809, 39), (1845809, 1), (2043, 39), (2043, 1))

[LightGBM] [Info] Number of positive: 1208465, number of negative: 637344
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19238
[LightGBM] [Info] Number of data points in the train set: 1845809, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654708 -> initscore=0.639797
[LightGBM] [Info] Start training from score 0.639797
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[823]	valid_0's binary_logloss: 0.525798
model2 Val Auc: 0.812
[LightGBM] [Info] Number of positive: 1208465, number of negative: 637344
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19728
[LightGBM] [Info] Number of data points in the train set: 1845809, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654708

((1845524, 39), (1845524, 1), (1943, 39), (1943, 1))

[LightGBM] [Info] Number of positive: 1207246, number of negative: 638278
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19314
[LightGBM] [Info] Number of data points in the train set: 1845524, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654148 -> initscore=0.637323
[LightGBM] [Info] Start training from score 0.637323
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[672]	valid_0's binary_logloss: 0.546346
model2 Val Auc: 0.793
[LightGBM] [Info] Number of positive: 1207246, number of negative: 638278
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19804
[LightGBM] [Info] Number of data points in the train set: 1845524, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654148

((1845408, 39), (1845408, 1), (2047, 39), (2047, 1))

[LightGBM] [Info] Number of positive: 1211366, number of negative: 634042
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19204
[LightGBM] [Info] Number of data points in the train set: 1845408, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656422 -> initscore=0.647389
[LightGBM] [Info] Start training from score 0.647389
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[925]	valid_0's binary_logloss: 0.535539
model2 Val Auc: 0.803
[LightGBM] [Info] Number of positive: 1211366, number of negative: 634042
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19694
[LightGBM] [Info] Number of data points in the train set: 1845408, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656422

((1845774, 39), (1845774, 1), (2031, 39), (2031, 1))

[LightGBM] [Info] Number of positive: 1210969, number of negative: 634805
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19232
[LightGBM] [Info] Number of data points in the train set: 1845774, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656077 -> initscore=0.645858
[LightGBM] [Info] Start training from score 0.645858
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[988]	valid_0's binary_logloss: 0.544921
model2 Val Auc: 0.796
[LightGBM] [Info] Number of positive: 1210969, number of negative: 634805
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19722
[LightGBM] [Info] Number of data points in the train set: 1845774, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656077

((1845491, 39), (1845491, 1), (2040, 39), (2040, 1))

[LightGBM] [Info] Number of positive: 1210990, number of negative: 634501
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19254
[LightGBM] [Info] Number of data points in the train set: 1845491, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656189 -> initscore=0.646355
[LightGBM] [Info] Start training from score 0.646355
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[808]	valid_0's binary_logloss: 0.524009
model2 Val Auc: 0.813
[LightGBM] [Info] Number of positive: 1210990, number of negative: 634501
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19744
[LightGBM] [Info] Number of data points in the train set: 1845491, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.656189

((1845808, 39), (1845808, 1), (2110, 39), (2110, 1))

[LightGBM] [Info] Number of positive: 1208434, number of negative: 637374
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19196
[LightGBM] [Info] Number of data points in the train set: 1845808, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654691 -> initscore=0.639724
[LightGBM] [Info] Start training from score 0.639724
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[827]	valid_0's binary_logloss: 0.525786
model2 Val Auc: 0.813
[LightGBM] [Info] Number of positive: 1208434, number of negative: 637374
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19686
[LightGBM] [Info] Number of data points in the train set: 1845808, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654691

((1845858, 39), (1845858, 1), (2014, 39), (2014, 1))

[LightGBM] [Info] Number of positive: 1207033, number of negative: 638825
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19287
[LightGBM] [Info] Number of data points in the train set: 1845858, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653914 -> initscore=0.636290
[LightGBM] [Info] Start training from score 0.636290
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[965]	valid_0's binary_logloss: 0.522364
model2 Val Auc: 0.813
[LightGBM] [Info] Number of positive: 1207033, number of negative: 638825
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19777
[LightGBM] [Info] Number of data points in the train set: 1845858, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653914

((1845722, 39), (1845722, 1), (1980, 39), (1980, 1))

[LightGBM] [Info] Number of positive: 1210271, number of negative: 635451
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19309
[LightGBM] [Info] Number of data points in the train set: 1845722, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655717 -> initscore=0.644265
[LightGBM] [Info] Start training from score 0.644265
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[812]	valid_0's binary_logloss: 0.543128
model2 Val Auc: 0.798
[LightGBM] [Info] Number of positive: 1210271, number of negative: 635451
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19799
[LightGBM] [Info] Number of data points in the train set: 1845722, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655717

### seed별 auc 확인

In [52]:
for s in range(100):
    train, valid = custom_train_test_split(train_df, ratio=0.7, seed=s) # 훈련데이터 split
    test = df[df.kind=='test'] # 테스트 데이터
    train2 = test[test.answerCode!=-1] # 테스트데이터 마지막 제출 2번쨰꺼까지 훈련데이터로 사용
    train = pd.concat([train,train2]) # 훈련데이터 병합
    x_train = train.drop('answerCode',axis=1)
    y_train = train[['answerCode']]
    x_valid = valid.drop('answerCode',axis=1)
    y_valid = valid[['answerCode']]
    x_train.shape, y_train.shape, x_valid.shape, y_valid.shape
    aucs=[]
    for n in [1,2,6,7,71]:
        model_name = f"model{n}"
        model,feats = model_dict[model_name]
        auc = model2auc(model, x_valid, y_valid, feats)
        print(f"{model_name} {auc:.3f}",end=' ')
        aucs.append(auc)
    saucs = sorted(aucs)
    sort_index = [saucs.index(i) for i in aucs]
    print(sort_index)
    if sort_index==[3,4,1,2,0]:
        print(s)
        post_slack("done")
        break

((1845739, 39), (1845739, 1), (1968, 39), (1968, 1))

model1 0.888 model2 0.856 model6 0.867 model7 0.870 model71 0.905 [3, 0, 1, 2, 4]


((1845952, 39), (1845952, 1), (2015, 39), (2015, 1))

model1 0.896 model2 0.864 model6 0.871 model7 0.876 model71 0.912 [3, 0, 1, 2, 4]


((1845977, 39), (1845977, 1), (2011, 39), (2011, 1))

model1 0.882 model2 0.849 model6 0.859 model7 0.864 model71 0.903 [3, 0, 1, 2, 4]


((1845528, 39), (1845528, 1), (2009, 39), (2009, 1))

model1 0.899 model2 0.865 model6 0.872 model7 0.877 model71 0.913 [3, 0, 1, 2, 4]


((1845319, 39), (1845319, 1), (2016, 39), (2016, 1))

model1 0.887 model2 0.856 model6 0.861 model7 0.868 model71 0.907 [3, 0, 1, 2, 4]


((1845636, 39), (1845636, 1), (1977, 39), (1977, 1))

model1 0.881 model2 0.845 model6 0.864 model7 0.868 model71 0.907 [3, 0, 1, 2, 4]


((1845842, 39), (1845842, 1), (1992, 39), (1992, 1))

model1 0.890 model2 0.857 model6 0.870 model7 0.874 model71 0.910 [3, 0, 1, 2, 4]


((1845872, 39), (1845872, 1), (1951, 39), (1951, 1))

model1 0.889 model2 0.856 model6 0.868 model7 0.873 model71 0.909 [3, 0, 1, 2, 4]


((1845720, 39), (1845720, 1), (2000, 39), (2000, 1))

model1 0.878 model2 0.849 model6 0.862 model7 0.866 model71 0.904 [3, 0, 1, 2, 4]


((1845915, 39), (1845915, 1), (1940, 39), (1940, 1))

model1 0.894 model2 0.857 model6 0.866 model7 0.870 model71 0.911 [3, 0, 1, 2, 4]


((1845869, 39), (1845869, 1), (2036, 39), (2036, 1))

model1 0.884 model2 0.855 model6 0.865 model7 0.870 model71 0.906 [3, 0, 1, 2, 4]


((1845919, 39), (1845919, 1), (2006, 39), (2006, 1))

model1 0.894 model2 0.860 model6 0.865 model7 0.869 model71 0.907 [3, 0, 1, 2, 4]


((1845257, 39), (1845257, 1), (2031, 39), (2031, 1))

model1 0.883 model2 0.849 model6 0.864 model7 0.868 model71 0.907 [3, 0, 1, 2, 4]


((1845539, 39), (1845539, 1), (1974, 39), (1974, 1))

model1 0.791 model2 0.797 model6 0.834 model7 0.834 model71 0.832 [0, 1, 3, 4, 2]


((1845583, 39), (1845583, 1), (1991, 39), (1991, 1))

model1 0.888 model2 0.855 model6 0.867 model7 0.872 model71 0.910 [3, 0, 1, 2, 4]


((1845753, 39), (1845753, 1), (1975, 39), (1975, 1))

model1 0.888 model2 0.851 model6 0.864 model7 0.870 model71 0.908 [3, 0, 1, 2, 4]


((1845890, 39), (1845890, 1), (1966, 39), (1966, 1))

model1 0.889 model2 0.852 model6 0.863 model7 0.868 model71 0.907 [3, 0, 1, 2, 4]


((1845409, 39), (1845409, 1), (1992, 39), (1992, 1))

model1 0.886 model2 0.857 model6 0.863 model7 0.867 model71 0.901 [3, 0, 1, 2, 4]


((1845711, 39), (1845711, 1), (2035, 39), (2035, 1))

model1 0.884 model2 0.852 model6 0.864 model7 0.869 model71 0.906 [3, 0, 1, 2, 4]


((1845937, 39), (1845937, 1), (2032, 39), (2032, 1))

model1 0.897 model2 0.862 model6 0.872 model7 0.877 model71 0.913 [3, 0, 1, 2, 4]


((1845954, 39), (1845954, 1), (1983, 39), (1983, 1))

model1 0.882 model2 0.844 model6 0.858 model7 0.863 model71 0.903 [3, 0, 1, 2, 4]


((1845674, 39), (1845674, 1), (2027, 39), (2027, 1))

model1 0.889 model2 0.855 model6 0.869 model7 0.873 model71 0.908 [3, 0, 1, 2, 4]


((1845820, 39), (1845820, 1), (1981, 39), (1981, 1))

model1 0.883 model2 0.846 model6 0.859 model7 0.864 model71 0.903 [3, 0, 1, 2, 4]


((1845978, 39), (1845978, 1), (1970, 39), (1970, 1))

model1 0.892 model2 0.863 model6 0.877 model7 0.881 model71 0.914 [3, 0, 1, 2, 4]


((1845901, 39), (1845901, 1), (2044, 39), (2044, 1))

model1 0.894 model2 0.860 model6 0.869 model7 0.874 model71 0.912 [3, 0, 1, 2, 4]


((1845312, 39), (1845312, 1), (1966, 39), (1966, 1))

model1 0.889 model2 0.856 model6 0.868 model7 0.875 model71 0.913 [3, 0, 1, 2, 4]


((1844865, 39), (1844865, 1), (2069, 39), (2069, 1))

model1 0.894 model2 0.858 model6 0.870 model7 0.875 model71 0.914 [3, 0, 1, 2, 4]


((1845900, 39), (1845900, 1), (2019, 39), (2019, 1))

model1 0.887 model2 0.855 model6 0.865 model7 0.869 model71 0.904 [3, 0, 1, 2, 4]


((1845851, 39), (1845851, 1), (2003, 39), (2003, 1))

model1 0.887 model2 0.851 model6 0.865 model7 0.870 model71 0.910 [3, 0, 1, 2, 4]


((1845822, 39), (1845822, 1), (2007, 39), (2007, 1))

model1 0.887 model2 0.848 model6 0.858 model7 0.863 model71 0.903 [3, 0, 1, 2, 4]


((1845891, 39), (1845891, 1), (1996, 39), (1996, 1))

model1 0.883 model2 0.847 model6 0.858 model7 0.863 model71 0.903 [3, 0, 1, 2, 4]


((1845542, 39), (1845542, 1), (1987, 39), (1987, 1))

model1 0.904 model2 0.867 model6 0.862 model7 0.869 model71 0.909 [3, 1, 0, 2, 4]


((1845146, 39), (1845146, 1), (1920, 39), (1920, 1))

model1 0.898 model2 0.866 model6 0.875 model7 0.879 model71 0.914 [3, 0, 1, 2, 4]


((1845805, 39), (1845805, 1), (2007, 39), (2007, 1))

model1 0.896 model2 0.864 model6 0.871 model7 0.875 model71 0.910 [3, 0, 1, 2, 4]


((1845771, 39), (1845771, 1), (2020, 39), (2020, 1))

model1 0.890 model2 0.857 model6 0.868 model7 0.873 model71 0.908 [3, 0, 1, 2, 4]


((1845977, 39), (1845977, 1), (1947, 39), (1947, 1))

model1 0.877 model2 0.846 model6 0.858 model7 0.863 model71 0.899 [3, 0, 1, 2, 4]


((1845739, 39), (1845739, 1), (2013, 39), (2013, 1))

model1 0.884 model2 0.849 model6 0.862 model7 0.867 model71 0.906 [3, 0, 1, 2, 4]


((1845868, 39), (1845868, 1), (1990, 39), (1990, 1))

model1 0.886 model2 0.850 model6 0.863 model7 0.870 model71 0.908 [3, 0, 1, 2, 4]


((1845887, 39), (1845887, 1), (2043, 39), (2043, 1))

model1 0.893 model2 0.855 model6 0.869 model7 0.874 model71 0.912 [3, 0, 1, 2, 4]


((1845699, 39), (1845699, 1), (1977, 39), (1977, 1))

model1 0.892 model2 0.856 model6 0.867 model7 0.873 model71 0.915 [3, 0, 1, 2, 4]


((1845922, 39), (1845922, 1), (2042, 39), (2042, 1))

model1 0.896 model2 0.863 model6 0.877 model7 0.881 model71 0.913 [3, 0, 1, 2, 4]


((1845652, 39), (1845652, 1), (2037, 39), (2037, 1))

model1 0.879 model2 0.844 model6 0.856 model7 0.862 model71 0.902 [3, 0, 1, 2, 4]


((1845527, 39), (1845527, 1), (2007, 39), (2007, 1))

model1 0.887 model2 0.854 model6 0.864 model7 0.868 model71 0.903 [3, 0, 1, 2, 4]


((1845922, 39), (1845922, 1), (1993, 39), (1993, 1))

model1 0.887 model2 0.850 model6 0.860 model7 0.866 model71 0.909 [3, 0, 1, 2, 4]


((1845794, 39), (1845794, 1), (1961, 39), (1961, 1))

model1 0.895 model2 0.864 model6 0.876 model7 0.880 model71 0.911 [3, 0, 1, 2, 4]


((1845500, 39), (1845500, 1), (2016, 39), (2016, 1))

model1 0.894 model2 0.857 model6 0.866 model7 0.872 model71 0.913 [3, 0, 1, 2, 4]


((1845974, 39), (1845974, 1), (1944, 39), (1944, 1))

model1 0.879 model2 0.840 model6 0.852 model7 0.858 model71 0.900 [3, 0, 1, 2, 4]


((1845521, 39), (1845521, 1), (2060, 39), (2060, 1))

model1 0.884 model2 0.849 model6 0.863 model7 0.868 model71 0.908 [3, 0, 1, 2, 4]


((1845500, 39), (1845500, 1), (1995, 39), (1995, 1))

model1 0.888 model2 0.855 model6 0.864 model7 0.869 model71 0.909 [3, 0, 1, 2, 4]


((1845797, 39), (1845797, 1), (2000, 39), (2000, 1))

model1 0.891 model2 0.857 model6 0.866 model7 0.871 model71 0.908 [3, 0, 1, 2, 4]


((1845470, 39), (1845470, 1), (2073, 39), (2073, 1))

model1 0.892 model2 0.856 model6 0.866 model7 0.870 model71 0.911 [3, 0, 1, 2, 4]


((1845514, 39), (1845514, 1), (1955, 39), (1955, 1))

model1 0.898 model2 0.863 model6 0.870 model7 0.875 model71 0.912 [3, 0, 1, 2, 4]


((1845303, 39), (1845303, 1), (1996, 39), (1996, 1))

model1 0.895 model2 0.864 model6 0.876 model7 0.881 model71 0.920 [3, 0, 1, 2, 4]


((1845978, 39), (1845978, 1), (2058, 39), (2058, 1))

model1 0.886 model2 0.852 model6 0.866 model7 0.871 model71 0.910 [3, 0, 1, 2, 4]


((1845753, 39), (1845753, 1), (2076, 39), (2076, 1))

model1 0.879 model2 0.843 model6 0.854 model7 0.859 model71 0.900 [3, 0, 1, 2, 4]


((1845855, 39), (1845855, 1), (2033, 39), (2033, 1))

model1 0.885 model2 0.849 model6 0.861 model7 0.867 model71 0.905 [3, 0, 1, 2, 4]


((1845418, 39), (1845418, 1), (2015, 39), (2015, 1))

model1 0.886 model2 0.856 model6 0.870 model7 0.874 model71 0.910 [3, 0, 1, 2, 4]


((1845562, 39), (1845562, 1), (1972, 39), (1972, 1))

model1 0.883 model2 0.849 model6 0.867 model7 0.872 model71 0.907 [3, 0, 1, 2, 4]


((1845929, 39), (1845929, 1), (2079, 39), (2079, 1))

model1 0.898 model2 0.862 model6 0.865 model7 0.870 model71 0.912 [3, 0, 1, 2, 4]


((1845923, 39), (1845923, 1), (1994, 39), (1994, 1))

model1 0.895 model2 0.861 model6 0.863 model7 0.869 model71 0.909 [3, 0, 1, 2, 4]


((1844831, 39), (1844831, 1), (2036, 39), (2036, 1))

model1 0.892 model2 0.855 model6 0.859 model7 0.864 model71 0.905 [3, 0, 1, 2, 4]


((1845846, 39), (1845846, 1), (1945, 39), (1945, 1))

model1 0.894 model2 0.860 model6 0.863 model7 0.867 model71 0.907 [3, 0, 1, 2, 4]


((1845429, 39), (1845429, 1), (1987, 39), (1987, 1))

model1 0.885 model2 0.846 model6 0.859 model7 0.864 model71 0.901 [3, 0, 1, 2, 4]


((1845700, 39), (1845700, 1), (1990, 39), (1990, 1))

model1 0.884 model2 0.851 model6 0.855 model7 0.862 model71 0.901 [3, 0, 1, 2, 4]


((1845876, 39), (1845876, 1), (2050, 39), (2050, 1))

model1 0.887 model2 0.855 model6 0.871 model7 0.875 model71 0.913 [3, 0, 1, 2, 4]


((1845728, 39), (1845728, 1), (2011, 39), (2011, 1))

model1 0.884 model2 0.850 model6 0.860 model7 0.865 model71 0.904 [3, 0, 1, 2, 4]


((1845940, 39), (1845940, 1), (2092, 39), (2092, 1))

model1 0.888 model2 0.855 model6 0.865 model7 0.870 model71 0.906 [3, 0, 1, 2, 4]


((1845310, 39), (1845310, 1), (1993, 39), (1993, 1))

model1 0.877 model2 0.839 model6 0.852 model7 0.858 model71 0.896 [3, 0, 1, 2, 4]


((1845740, 39), (1845740, 1), (1990, 39), (1990, 1))

model1 0.886 model2 0.848 model6 0.857 model7 0.863 model71 0.905 [3, 0, 1, 2, 4]


((1845969, 39), (1845969, 1), (2048, 39), (2048, 1))

model1 0.891 model2 0.859 model6 0.867 model7 0.872 model71 0.909 [3, 0, 1, 2, 4]


((1845893, 39), (1845893, 1), (1995, 39), (1995, 1))

model1 0.898 model2 0.863 model6 0.868 model7 0.875 model71 0.917 [3, 0, 1, 2, 4]


((1845789, 39), (1845789, 1), (2031, 39), (2031, 1))

model1 0.899 model2 0.863 model6 0.871 model7 0.876 model71 0.915 [3, 0, 1, 2, 4]


((1845679, 39), (1845679, 1), (2020, 39), (2020, 1))

model1 0.887 model2 0.852 model6 0.866 model7 0.871 model71 0.910 [3, 0, 1, 2, 4]


((1845678, 39), (1845678, 1), (2035, 39), (2035, 1))

model1 0.887 model2 0.846 model6 0.856 model7 0.861 model71 0.907 [3, 0, 1, 2, 4]


((1845704, 39), (1845704, 1), (2072, 39), (2072, 1))

model1 0.884 model2 0.853 model6 0.862 model7 0.867 model71 0.906 [3, 0, 1, 2, 4]


((1845956, 39), (1845956, 1), (1989, 39), (1989, 1))

model1 0.880 model2 0.844 model6 0.850 model7 0.854 model71 0.895 [3, 0, 1, 2, 4]


((1845524, 39), (1845524, 1), (2022, 39), (2022, 1))

model1 0.892 model2 0.860 model6 0.867 model7 0.872 model71 0.910 [3, 0, 1, 2, 4]


((1845930, 39), (1845930, 1), (1988, 39), (1988, 1))

model1 0.889 model2 0.856 model6 0.866 model7 0.870 model71 0.908 [3, 0, 1, 2, 4]


((1845521, 39), (1845521, 1), (2014, 39), (2014, 1))

model1 0.903 model2 0.870 model6 0.879 model7 0.883 model71 0.919 [3, 0, 1, 2, 4]


((1845719, 39), (1845719, 1), (2020, 39), (2020, 1))

model1 0.877 model2 0.839 model6 0.853 model7 0.858 model71 0.900 [3, 0, 1, 2, 4]


((1845813, 39), (1845813, 1), (2013, 39), (2013, 1))

model1 0.890 model2 0.856 model6 0.870 model7 0.875 model71 0.912 [3, 0, 1, 2, 4]


((1845888, 39), (1845888, 1), (1998, 39), (1998, 1))

model1 0.878 model2 0.845 model6 0.859 model7 0.864 model71 0.902 [3, 0, 1, 2, 4]


((1845517, 39), (1845517, 1), (2028, 39), (2028, 1))

model1 0.890 model2 0.861 model6 0.871 model7 0.877 model71 0.912 [3, 0, 1, 2, 4]


((1845847, 39), (1845847, 1), (2078, 39), (2078, 1))

model1 0.891 model2 0.856 model6 0.874 model7 0.879 model71 0.916 [3, 0, 1, 2, 4]


((1844623, 39), (1844623, 1), (1985, 39), (1985, 1))

model1 0.881 model2 0.848 model6 0.864 model7 0.868 model71 0.904 [3, 0, 1, 2, 4]


((1845839, 39), (1845839, 1), (2040, 39), (2040, 1))

model1 0.891 model2 0.858 model6 0.861 model7 0.865 model71 0.905 [3, 0, 1, 2, 4]


((1845739, 39), (1845739, 1), (2031, 39), (2031, 1))

model1 0.898 model2 0.860 model6 0.871 model7 0.877 model71 0.918 [3, 0, 1, 2, 4]


((1845638, 39), (1845638, 1), (1980, 39), (1980, 1))

model1 0.892 model2 0.862 model6 0.877 model7 0.881 model71 0.915 [3, 0, 1, 2, 4]


((1845590, 39), (1845590, 1), (2008, 39), (2008, 1))

model1 0.881 model2 0.844 model6 0.863 model7 0.869 model71 0.909 [3, 0, 1, 2, 4]


((1845848, 39), (1845848, 1), (1966, 39), (1966, 1))

model1 0.892 model2 0.861 model6 0.867 model7 0.873 model71 0.912 [3, 0, 1, 2, 4]


((1845900, 39), (1845900, 1), (1987, 39), (1987, 1))

model1 0.881 model2 0.848 model6 0.860 model7 0.864 model71 0.908 [3, 0, 1, 2, 4]


((1845587, 39), (1845587, 1), (2062, 39), (2062, 1))

model1 0.890 model2 0.852 model6 0.866 model7 0.871 model71 0.912 [3, 0, 1, 2, 4]


((1845809, 39), (1845809, 1), (2043, 39), (2043, 1))

model1 0.891 model2 0.856 model6 0.864 model7 0.869 model71 0.905 [3, 0, 1, 2, 4]


((1845524, 39), (1845524, 1), (1943, 39), (1943, 1))

model1 0.879 model2 0.843 model6 0.849 model7 0.854 model71 0.895 [3, 0, 1, 2, 4]


((1845408, 39), (1845408, 1), (2047, 39), (2047, 1))

model1 0.884 model2 0.848 model6 0.860 model7 0.865 model71 0.907 [3, 0, 1, 2, 4]


((1845774, 39), (1845774, 1), (2031, 39), (2031, 1))

model1 0.879 model2 0.844 model6 0.860 model7 0.865 model71 0.904 [3, 0, 1, 2, 4]


((1845491, 39), (1845491, 1), (2040, 39), (2040, 1))

model1 0.894 model2 0.861 model6 0.870 model7 0.875 model71 0.909 [3, 0, 1, 2, 4]


((1845808, 39), (1845808, 1), (2110, 39), (2110, 1))

model1 0.897 model2 0.860 model6 0.868 model7 0.874 model71 0.918 [3, 0, 1, 2, 4]


((1845858, 39), (1845858, 1), (2014, 39), (2014, 1))

model1 0.893 model2 0.863 model6 0.870 model7 0.876 model71 0.910 [3, 0, 1, 2, 4]


((1845722, 39), (1845722, 1), (1980, 39), (1980, 1))

model1 0.887 model2 0.848 model6 0.856 model7 0.860 model71 0.901 [3, 0, 1, 2, 4]


In [48]:
saucs = sorted(aucs)

In [50]:
[saucs.index(i) for i in aucs]==[3,4,1,2,0]

False

In [None]:
model2 # 0.8155
model6 # 0.8024 
model71 # 0.7948

In [None]:
model1 # 0.8111 3
model2 # 0.8155 4
model5 # 0.8112
model6 # 0.8024 1
model7 # 0.8062 2
model71 # 0.7948 0

In [None]:
FEATS1 # 0.8111
FEATS2 # 0.8155
FEATS5 # 0.8112
FEATS6 # 0.8024
FEATS7 # 0.8062
FEATS71 # 0.7948