# CV baseline code and Tuning test code

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns; sns.set_theme(color_codes=True)
import os
from math import pi
from matplotlib.path import Path
from matplotlib.spines import Spine
from matplotlib.transforms import Affine2D
import json
from collections import Counter
import time

import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings('ignore')

## Feature Engineering

In [14]:
def feature_engineering(df):
    # 문제별 풀이시간
    from tqdm import tqdm

    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df['diff_Timestamp'] = df['Timestamp'] - df.shift(1)['Timestamp']

    testId_df = df[~df.duplicated(['assessmentItemID'])].groupby('testId')
    testId2len = {}
    for testId, g_df in testId_df:
        testId2len[testId] = len(g_df)

    userID_df = df.groupby('userID')
    start_index_list = []
    second_index_list = []

    for userID, g_df in tqdm(userID_df):
        testId_df = g_df.groupby('testId')
        for testId, gg_df in testId_df:
            index_list = gg_df.index.tolist()
            start_index = 0
            if len(gg_df) <= testId2len[testId]:
                start_index_list += [index_list[start_index]]
                second_index_list += [index_list[start_index + 1]]
            else:
                div = len(gg_df) // testId2len[testId]
                for _ in range(div):
                    start_index_list += [index_list[start_index]]
                    second_index_list += [index_list[start_index + 1]]
                    start_index += testId2len[testId]

    df.loc[start_index_list, 'diff_Timestamp'] = df.loc[second_index_list, 'diff_Timestamp'].values
    df['elapsed'] = df['diff_Timestamp'].apply(lambda x: x.total_seconds() if not pd.isna(x) else np.nan)


    df['hour'] = df['Timestamp'].dt.hour
    df['dow'] = df['Timestamp'].dt.dayofweek # 요일을 숫자로

    diff = df.loc[:, ['userID','Timestamp']].groupby('userID').diff().fillna(pd.Timedelta(seconds=0))
    diff = diff.fillna(pd.Timedelta(seconds=0))
    diff = diff['Timestamp'].apply(lambda x: x.total_seconds())

    # 문제별 풀이시간
    df['elapsed'] = diff
    df['elapsed'] = df['elapsed'].apply(lambda x : x if x <650 and x >=0 else 0)

    df['testcode']=df['testId'].apply(lambda x : int(x[1:4])//10)
    df['problem_number'] = df['assessmentItemID'].apply(lambda x: int(x[7:])) 


    # feature 별 정답여부
    correct_t = df.groupby(['testId'])['answerCode'].agg(['mean', 'sum'])
    correct_t.columns = ["test_mean", 'test_sum']
    correct_k = df.groupby(['KnowledgeTag'])['answerCode'].agg(['mean', 'sum'])
    correct_k.columns = ["tag_mean", 'tag_sum']
    correct_a = df.groupby(['assessmentItemID'])['answerCode'].agg(['mean', 'sum'])
    correct_a.columns = ["ass_mean", 'ass_sum']
    correct_p = df.groupby(['problem_number'])['answerCode'].agg(['mean', 'sum'])
    correct_p.columns = ["prb_mean", 'prb_sum']
    correct_h = df.groupby(['hour'])['answerCode'].agg(['mean', 'sum'])
    correct_h.columns = ["hour_mean", 'hour_sum']
    correct_d = df.groupby(['dow'])['answerCode'].agg(['mean', 'sum'])
    correct_d.columns = ["dow_mean", 'dow_sum'] 

    df = pd.merge(df, correct_t, on=['testId'], how="left")
    df = pd.merge(df, correct_k, on=['KnowledgeTag'], how="left")
    df = pd.merge(df, correct_a, on=['assessmentItemID'], how="left")
    df = pd.merge(df, correct_p, on=['problem_number'], how="left")
    df = pd.merge(df, correct_h, on=['hour'], how="left")
    df = pd.merge(df, correct_d, on=['dow'], how="left")


    # 정답과 오답 기준으로 나눠서 생각
    o_df = df[df['answerCode']==1]
    x_df = df[df['answerCode']==0]

    elp_k = df.groupby(['KnowledgeTag'])['elapsed'].agg('mean').reset_index()
    elp_k.columns = ['KnowledgeTag',"tag_elp"]
    elp_k_o = o_df.groupby(['KnowledgeTag'])['elapsed'].agg('mean').reset_index()
    elp_k_o.columns = ['KnowledgeTag', "tag_elp_o"]
    elp_k_x = x_df.groupby(['KnowledgeTag'])['elapsed'].agg('mean').reset_index()
    elp_k_x.columns = ['KnowledgeTag', "tag_elp_x"]

    df = pd.merge(df, elp_k, on=['KnowledgeTag'], how="left")
    df = pd.merge(df, elp_k_o, on=['KnowledgeTag'], how="left")
    df = pd.merge(df, elp_k_x, on=['KnowledgeTag'], how="left")

    ass_k = df.groupby(['assessmentItemID'])['elapsed'].agg('mean').reset_index()
    ass_k.columns = ['assessmentItemID',"ass_elp"]
    ass_k_o = o_df.groupby(['assessmentItemID'])['elapsed'].agg('mean').reset_index()
    ass_k_o.columns = ['assessmentItemID',"ass_elp_o"]
    ass_k_x = x_df.groupby(['assessmentItemID'])['elapsed'].agg('mean').reset_index()
    ass_k_x.columns = ['assessmentItemID',"ass_elp_x"]

    df = pd.merge(df, ass_k, on=['assessmentItemID'], how="left")
    df = pd.merge(df, ass_k_o, on=['assessmentItemID'], how="left")
    df = pd.merge(df, ass_k_x, on=['assessmentItemID'], how="left")

    prb_k = df.groupby(['problem_number'])['elapsed'].agg('mean').reset_index()
    prb_k.columns = ['problem_number',"prb_elp"]
    prb_k_o = o_df.groupby(['problem_number'])['elapsed'].agg('mean').reset_index()
    prb_k_o.columns = ['problem_number',"prb_elp_o"]
    prb_k_x = x_df.groupby(['problem_number'])['elapsed'].agg('mean').reset_index()
    prb_k_x.columns = ['problem_number',"prb_elp_x"]

    df = pd.merge(df, prb_k, on=['problem_number'], how="left")
    df = pd.merge(df, prb_k_o, on=['problem_number'], how="left")
    df = pd.merge(df, prb_k_x, on=['problem_number'], how="left")

    # 누적합 - 주어진 데이터 이전/이후 데이터들을 포함하는 메모리를 feature로 포함시킴: Sequence Model을 사용하지 않고 일반적인 지도 학습 모델에서 사용하기 위함
    df['user_correct_answer'] = df.groupby('userID')['answerCode'].transform(lambda x: x.cumsum().shift(1))
    df['user_total_answer'] = df.groupby('userID')['answerCode'].cumcount()
    df['user_acc'] = df['user_correct_answer']/df['user_total_answer']
    df['testcode_o'] = df.groupby(['userID','testcode'])['answerCode'].transform(lambda x: x.cumsum().shift(1))
    df['testcodeCount'] = df.groupby(['userID','testcode']).cumcount()
    df['testcodeAcc'] = df['testcode_o']/df['testcodeCount']
    df['tectcodeElp'] = df.groupby(['userID','testcode'])['elapsed'].transform(lambda x: x.cumsum().shift(1))
    df['testcodeMElp'] = df['tectcodeElp']/df['testcodeCount']



    f = lambda x : len(set(x))
    t_df = df.groupby(['testId']).agg({
    'problem_number':'max',
    'KnowledgeTag':f
    })
    t_df.reset_index(inplace=True)

    t_df.columns = ['testId','problem_count',"tag_count"]

    df = pd.merge(df,t_df,on='testId',how='left')

    gdf = df[['userID','testId','problem_number','testcode','Timestamp']].sort_values(by=['userID','testcode','Timestamp'])
    gdf['buserID'] = gdf['userID'] != gdf['userID'].shift(1)
    gdf['btestcode'] = gdf['testcode'] != gdf['testcode'].shift(1)
    gdf['first'] = gdf[['buserID','btestcode']].any(axis=1).apply(lambda x : 1- int(x))
    gdf['RepeatedTime'] = gdf['Timestamp'].diff().fillna(pd.Timedelta(seconds=0)) 
    gdf['RepeatedTime'] = gdf['RepeatedTime'].apply(lambda x: x.total_seconds()) * gdf['first']
    df['RepeatedTime'] = gdf['RepeatedTime'].apply(lambda x : math.log(x+1))

    df['prior_KnowledgeTag_frequency'] = df.groupby(['userID','KnowledgeTag']).cumcount()

    df['problem_position'] = df['problem_number'] / df["problem_count"]
    df['solve_order'] = df.groupby(['userID','testId']).cumcount()
    df['solve_order'] = df['solve_order'] - df['problem_count']*(df['solve_order'] > df['problem_count']).apply(int) + 1
    df['retest'] = (df['solve_order'] > df['problem_count']).apply(int)
    T = df['solve_order'] != df['problem_number']
    TT = T.shift(1)
    TT[0] = False
    df['solved_disorder'] = (TT.apply(lambda x : not x) & T).apply(int)

    df['testId'] = df['testId'].apply(lambda x : int(x[1:4]+x[-3]))
    df['hour'] = df['Timestamp'].dt.hour
    df['dow'] = df['Timestamp'].dt.dayofweek

    return df

## 데이터 전처리

In [24]:
# 현제 경로 설정
os.chdir('/opt/ml/level2_dkt-recsys-09/DKT')

In [16]:
DATA_PATH = '/opt/ml/input/data'

%time
dtype = {
    'userID': 'int16',
    'answerCode': 'int8',
    'KnowledgeTag': 'int16'
}   

df = pd.read_csv(os.path.join(DATA_PATH, 'train_data.csv'), dtype=dtype, parse_dates=['Timestamp'])
df = df.sort_values(by=['userID', 'Timestamp', 'testId']).reset_index(drop=True)

df = feature_engineering(df)
df.to_csv(DATA_PATH + 'train_featured.csv', index=False)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 7.15 µs


100%|██████████| 6698/6698 [00:20<00:00, 326.75it/s] 


## 기본 KFold 모델 적용(userID index 기준)

In [47]:
# userID index 기준 K-fold
import lightgbm as lgb
import numpy as np
import random
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import KFold

train = df.copy()
predicts_list = list()
kf = KFold(n_splits=5, shuffle=True, random_state=22)

for fold, (train_idx, val_idx) in enumerate(
    kf.split(train["userID"].unique().tolist())
):
    print(
        f"-------------------------START FOLD {fold + 1} TRAINING---------------------------"
    )
    print(
        f"-------------------------START FOLD {fold + 1} MODEL LOADING----------------------"
    )

    # Split the data into training and testing sets for this fold
    
    FEATS = train.select_dtypes(include=["int", "int8", "int16", "int64", "float", "float16", "float64"]).columns
    FEATS = [col for col in FEATS if col not in ['answerCode']]

    train = df.copy()
    x_train = train[train['userID'].isin(train_idx)]
    x_valid = train[train['userID'].isin(val_idx)]
    X_train, Y_train = x_train.drop(['answerCode'], axis=1), x_train['answerCode']
    X_valid, Y_valid = x_valid.drop(['answerCode'], axis=1), x_valid['answerCode']
    # print(X_train.shape, X_valid.shape)

    # Create the LightGBM dataset
    lgb_train = lgb.Dataset(X_train[FEATS], Y_train)
    lgb_test = lgb.Dataset(X_valid[FEATS], Y_valid)

    model = lgb.train(
        {'objective': 'binary'}, 
        lgb_train,
        valid_sets=[lgb_train, lgb_test],
        verbose_eval=100,
        num_boost_round=500,
        early_stopping_rounds=100
    )

    print(
        f"-------------------------DONE FOLD {fold + 1} MODEL LOADING-----------------------"
    )
    predicts_list.append(model.predict(test_df[FEATS]))

    preds = model.predict(X_valid[FEATS])
    acc = accuracy_score(Y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(Y_valid, preds)

    print(f'VALID AUC : {auc} ACC : {acc}\n')
    print(
        f"---------------------------DONE FOLD {fold + 1} TRAINING--------------------------"
    )

-------------------------START FOLD 1 TRAINING---------------------------
-------------------------START FOLD 1 MODEL LOADING----------------------
(1806456, 48) (441630, 48)
[LightGBM] [Info] Number of positive: 1181418, number of negative: 625038
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6470
[LightGBM] [Info] Number of data points in the train set: 1806456, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.653998 -> initscore=0.636658
[LightGBM] [Info] Start training from score 0.636658
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.446834	valid_1's binary_logloss: 0.450146
[200]	training's binary_logloss: 0.442826	valid_1's binary_logloss: 0.448247
[300]	training's binary_logloss: 0.44008	valid_1's binary_logloss: 0.447588
[400]	training's binary_logloss: 0.437808	valid_1's binary_logloss: 0.447161

## 기본 KFold 모델 적용(train index 기준)

In [48]:
# train index 기준 K-fold

train = df.copy()
predicts_list = list()
kf = KFold(n_splits=5, shuffle=True, random_state=22)

y_train = train['answerCode']
train = train.drop(['answerCode'], axis=1)

for fold, (train_idx, val_idx) in enumerate(
    kf.split(train)
):
    print(
        f"-------------------------START FOLD {fold + 1} TRAINING---------------------------"
    )
    print(
        f"-------------------------START FOLD {fold + 1} MODEL LOADING----------------------"
    )

    # Split the data into training and testing sets for this fold
    
    FEATS = train.select_dtypes(include=["int", "int8", "int16", "int64", "float", "float16", "float64"]).columns
    FEATS = [col for col in FEATS if col not in ['answerCode']]

    X_train, Y_train = train.iloc[train_idx], y_train.iloc[train_idx]
    X_valid, Y_valid = train.iloc[val_idx], y_train.iloc[val_idx]

    # Create the LightGBM dataset
    lgb_train = lgb.Dataset(X_train[FEATS], Y_train)
    lgb_test = lgb.Dataset(X_valid[FEATS], Y_valid)

    model = lgb.train(
        {'objective': 'binary'}, 
        lgb_train,
        valid_sets=[lgb_train, lgb_test],
        verbose_eval=100,
        num_boost_round=500,
        early_stopping_rounds=100
    )

    print(
        f"-------------------------DONE FOLD {fold + 1} MODEL LOADING-----------------------"
    )
    predicts_list.append(model.predict(test_df[FEATS]))

    preds = model.predict(X_valid[FEATS])
    acc = accuracy_score(Y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(Y_valid, preds)

    print(f'VALID AUC : {auc} ACC : {acc}\n')
    print(
        f"---------------------------DONE FOLD {fold + 1} TRAINING--------------------------"
    )

-------------------------START FOLD 1 TRAINING---------------------------
-------------------------START FOLD 1 MODEL LOADING----------------------
[LightGBM] [Info] Number of positive: 1186404, number of negative: 626864
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6468
[LightGBM] [Info] Number of data points in the train set: 1813268, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654290 -> initscore=0.637953
[LightGBM] [Info] Start training from score 0.637953
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.447301	valid_1's binary_logloss: 0.449423
[200]	training's binary_logloss: 0.443451	valid_1's binary_logloss: 0.44703
[300]	training's binary_logloss: 0.440961	valid_1's binary_logloss: 0.446065
[400]	training's binary_logloss: 0.438722	valid_1's binary_logloss: 0.445244
[500]	training's binary_lo

## Custom K-Fold 모델 적용(userID index 기준)

In [93]:
# train과 test 데이터셋은 사용자 별로 묶어서 분리를 해주어야함
def custom_K_fold_5(df): 
    users = list(zip(df['userID'].value_counts().index, df['userID'].value_counts()))
    random.seed(42)
    random.shuffle(users)
    
    train_data_div_len = 0.2*len(df)
    sum_of_train_data = 0
    user_ids =[[] for _ in range(5)]

    for user_id, count in users:
        sum_of_train_data += count
        if sum_of_train_data < train_data_div_len:
            user_ids[0].append(user_id)
        elif sum_of_train_data < train_data_div_len*2:
            user_ids[1].append(user_id)
        elif sum_of_train_data < train_data_div_len*3:
            user_ids[2].append(user_id)
        elif sum_of_train_data < train_data_div_len*4:
            user_ids[3].append(user_id)
        else:
            user_ids[4].append(user_id)
            
    final_ids =[[] for _ in range(5)]
    for i in range(5):
        train_idx = [x for x in df['userID'].value_counts().index if x not in user_ids[i]]
        final_ids[i].append(train_idx)
        final_ids[i].append(user_ids[i])

    return final_ids

In [61]:
len(final_ids)

5

In [56]:
df['userID'].nunique()

6698

In [94]:
# 기존 코드에서 작동 확인
for fold, (train_idx, val_idx) in enumerate(
    custom_K_fold_5(df)
):
    print(len(train_idx), len(val_idx))

5378 1320
5376 1322
5363 1335
5328 1370
5347 1351


In [96]:
import lightgbm as lgb
import numpy as np
import random
from sklearn.metrics import accuracy_score, roc_auc_score

predicts_list = list()

for fold, (train_idx, val_idx) in enumerate(
    custom_K_fold_5(df)
):
    print(
        f"-------------------------START FOLD {fold + 1} TRAINING---------------------------"
    )
    print(
        f"-------------------------START FOLD {fold + 1} MODEL LOADING----------------------"
    )

    # Split the data into training and testing sets for this fold
    
    FEATS = train.select_dtypes(include=["int", "int8", "int16", "int64", "float", "float16", "float64"]).columns
    FEATS = [col for col in FEATS if col not in ['answerCode']]

    train = df.copy()
    x_train = train[train['userID'].isin(train_idx)]
    x_valid = train[train['userID'].isin(val_idx)]
    x_valid = x_valid[x_valid['userID'] != x_valid['userID'].shift(-1)]
    X_train, Y_train = x_train.drop(['answerCode'], axis=1), x_train['answerCode']
    X_valid, Y_valid = x_valid.drop(['answerCode'], axis=1), x_valid['answerCode']
    print(X_train.shape, X_valid.shape)

    # Create the LightGBM dataset
    lgb_train = lgb.Dataset(X_train[FEATS], Y_train)
    lgb_test = lgb.Dataset(X_valid[FEATS], Y_valid)

    model = lgb.train(
        {'objective': 'binary'}, 
        lgb_train,
        valid_sets=[lgb_train, lgb_test],
        verbose_eval=100,
        num_boost_round=500,
        early_stopping_rounds=100
    )

    print(
        f"-------------------------DONE FOLD {fold + 1} MODEL LOADING-----------------------"
    )
    predicts_list.append(model.predict(test_df[FEATS]))

    preds = model.predict(X_valid[FEATS])
    acc = accuracy_score(Y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(Y_valid, preds)

    print(f'VALID AUC : {auc} ACC : {acc}\n')
    print(
        f"---------------------------DONE FOLD {fold + 1} TRAINING--------------------------"
    )

-------------------------START FOLD 1 TRAINING---------------------------
-------------------------START FOLD 1 MODEL LOADING----------------------
(1813372, 48) (1320, 48)
[LightGBM] [Info] Number of positive: 1186899, number of negative: 626473
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6465
[LightGBM] [Info] Number of data points in the train set: 1813372, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654526 -> initscore=0.638994
[LightGBM] [Info] Start training from score 0.638994
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.447853	valid_1's binary_logloss: 0.487734
[200]	training's binary_logloss: 0.444086	valid_1's binary_logloss: 0.48404
[300]	training's binary_logloss: 0.44146	valid_1's binary_logloss: 0.482124
[400]	training's binary_logloss: 0.439106	valid_1's binary_logloss: 0.480017
[5

## test data 적용

In [28]:
# FEATURE ENGINEERING
test_df = pd.read_csv(os.path.join(DATA_PATH, 'test_data.csv'), dtype=dtype, parse_dates=['Timestamp'])
test_df = feature_engineering(test_df)
test_df.to_csv(DATA_PATH + 'test_featured.csv', index=False)

# Inference
test_df = pd.read_csv(DATA_PATH+'test_featured.csv')

# LEAVE LAST INTERACTION ONLY
test_df = test_df[test_df['userID'] != test_df['userID'].shift(-1)]

# DROP ANSWERCODE
test_df = test_df.drop(['answerCode'], axis=1)

100%|██████████| 744/744 [00:02<00:00, 320.51it/s]


In [98]:
# CHECK PREDICT
min(predicts), max(predicts)

(0.005697023038779744, 0.9748787404980996)

In [97]:
# MAKE PREDICTION
predicts = np.mean(predicts_list, axis=0)

submission = pd.read_csv(DATA_PATH+'/sample_submission.csv')
submission['prediction'] = predicts

submission.to_csv(DATA_PATH+'/lgbm_kfold_submission.csv')

# Optuna + FE 과정

In [4]:
DATA_PATH = '/opt/ml/input/data'

In [32]:
df = pd.read_csv(DATA_PATH+'/train_data.csv')
df["Timestamp"] = df["Timestamp"].apply(convert_time)

In [27]:
import lightgbm as lgb
import numpy as np
import math
import random
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import KFold

In [10]:
# train과 test 데이터셋은 사용자 별로 묶어서 분리를 해주어야함
random.seed(42)
def custom_train_test_split(df, ratio=0.8, split=True):
    
    users = list(zip(df['userID'].value_counts().index, df['userID'].value_counts()))
    random.shuffle(users)
    
    max_train_data_len = ratio*len(df)
    sum_of_train_data = 0
    user_ids =[]

    for user_id, count in users:
        sum_of_train_data += count
        if max_train_data_len < sum_of_train_data:
            break
        user_ids.append(user_id)


    train = df[df['userID'].isin(user_ids)]
    test = df[df['userID'].isin(user_ids) == False]

    #test데이터셋은 각 유저의 마지막 interaction만 추출
    test = test[test['userID'] != test['userID'].shift(-1)]
    return train, test

In [28]:
def feature_engineering(df):
    df = df.sort_values(['userID', 'Timestamp'])

    # diff
    df['diff'] = df.sort_values(['userID','Timestamp']).groupby('userID')['Timestamp'].diff()

    diff_df = df['diff']
    diff_df.dropna(inplace=True)

    # nan은 -1
    # 600(10분) 이상이면 다 600
    df['diff'].fillna(-1, inplace=True)
    idx = df[df['diff'] >= 600].index
    df.loc[idx, 'diff'] = 600

    tmp = df[df['diff'] >= 0]
    correct_k = tmp.groupby(['KnowledgeTag'])['diff'].agg(['mean'])
    df = pd.merge(df, correct_k, on=['KnowledgeTag'], how="left")


    df.sort_values(by=['userID','Timestamp'], inplace=True)

    #유저들의 문제 풀이수, 정답 수, 정답률
    df['user_correct_answer'] = df.groupby('userID')['answerCode'].transform(lambda x: x.cumsum().shift(1))
    df['user_total_answer'] = df.groupby('userID')['answerCode'].cumcount()
    df['user_acc'] = df['user_correct_answer']/df['user_total_answer']

    #testId와 KnowledgeTag의 전체 정답률
    correct_t = df.groupby(['testId'])['answerCode'].agg(['mean', 'sum'])
    correct_t.columns = ["test_mean", 'test_sum']
    correct_k = df.groupby(['KnowledgeTag'])['answerCode'].agg(['mean', 'sum'])
    correct_k.columns = ["tag_mean", 'tag_sum']

    df = pd.merge(df, correct_t, on=['testId'], how="left")
    df = pd.merge(df, correct_k, on=['KnowledgeTag'], how="left")


    df['hour'] = pd.to_datetime(df['Timestamp']).dt.hour
    df['dow'] = pd.to_datetime(df['Timestamp']).dt.dayofweek # 요일을 숫자로

    df['testcode']=df['testId'].apply(lambda x : int(x[1:4])//10)
    df['problem_number'] = df['assessmentItemID'].apply(lambda x: int(x[7:])) 

    # feature 별 정답여부
    correct_t = df.groupby(['testId'])['answerCode'].agg(['mean', 'sum'])
    correct_t.columns = ["test_mean", 'test_sum']
    correct_k = df.groupby(['KnowledgeTag'])['answerCode'].agg(['mean', 'sum'])
    correct_k.columns = ["tag_mean", 'tag_sum']
    correct_a = df.groupby(['assessmentItemID'])['answerCode'].agg(['mean', 'sum'])
    correct_a.columns = ["ass_mean", 'ass_sum']
    correct_p = df.groupby(['problem_number'])['answerCode'].agg(['mean', 'sum'])
    correct_p.columns = ["prb_mean", 'prb_sum']
    correct_h = df.groupby(['hour'])['answerCode'].agg(['mean', 'sum'])
    correct_h.columns = ["hour_mean", 'hour_sum']
    correct_d = df.groupby(['dow'])['answerCode'].agg(['mean', 'sum'])
    correct_d.columns = ["dow_mean", 'dow_sum'] 

    df = pd.merge(df, correct_t, on=['testId'], how="left")
    df = pd.merge(df, correct_k, on=['KnowledgeTag'], how="left")
    df = pd.merge(df, correct_a, on=['assessmentItemID'], how="left")
    df = pd.merge(df, correct_p, on=['problem_number'], how="left")
    df = pd.merge(df, correct_h, on=['hour'], how="left")
    df = pd.merge(df, correct_d, on=['dow'], how="left")


    f = lambda x : len(set(x))
    t_df = df.groupby(['testId']).agg({
    'problem_number':'max',
    'KnowledgeTag':f
    })
    t_df.reset_index(inplace=True)

    t_df.columns = ['testId','problem_count',"tag_count"]

    df = pd.merge(df,t_df,on='testId',how='left')

    gdf = df[['userID','testId','problem_number','testcode','Timestamp']].sort_values(by=['userID','testcode','Timestamp'])
    gdf['buserID'] = gdf['userID'] != gdf['userID'].shift(1)
    gdf['btestcode'] = gdf['testcode'] != gdf['testcode'].shift(1)
    gdf['first'] = gdf[['buserID','btestcode']].any(axis=1).apply(lambda x : 1- int(x))
    gdf['RepeatedTime'] = pd.to_datetime(gdf['Timestamp']).diff().fillna(pd.Timedelta(seconds=0)) 
    gdf['RepeatedTime'] = gdf['RepeatedTime'].apply(lambda x: x.total_seconds()) * gdf['first']
    df['RepeatedTime'] = gdf['RepeatedTime'].apply(lambda x : math.log(x+1))

    df['prior_KnowledgeTag_frequency'] = df.groupby(['userID','KnowledgeTag']).cumcount()

    df['problem_position'] = df['problem_number'] / df["problem_count"]
    df['solve_order'] = df.groupby(['userID','testId']).cumcount()
    df['solve_order'] = df['solve_order'] - df['problem_count']*(df['solve_order'] > df['problem_count']).apply(int) + 1
    df['retest'] = (df['solve_order'] > df['problem_count']).apply(int)
    T = df['solve_order'] != df['problem_number']
    TT = T.shift(1)
    TT[0] = False
    df['solved_disorder'] = (TT.apply(lambda x : not x) & T).apply(int)

    df['testId'] = df['testId'].apply(lambda x : int(x[1:4]+x[-3]))

    # 정답과 오답 기준으로 나눠서 생각
    o_df = df[df['answerCode']==1]
    x_df = df[df['answerCode']==0]

    diff_k = df.groupby(['KnowledgeTag'])['diff'].agg('mean').reset_index()
    diff_k.columns = ['KnowledgeTag',"tag_diff"]
    diff_k_o = o_df.groupby(['KnowledgeTag'])['diff'].agg('mean').reset_index()
    diff_k_o.columns = ['KnowledgeTag', "tag_diff_o"]
    diff_k_x = x_df.groupby(['KnowledgeTag'])['diff'].agg('mean').reset_index()
    diff_k_x.columns = ['KnowledgeTag', "tag_diff_x"]

    df = pd.merge(df, diff_k, on=['KnowledgeTag'], how="left")
    df = pd.merge(df, diff_k_o, on=['KnowledgeTag'], how="left")
    df = pd.merge(df, diff_k_x, on=['KnowledgeTag'], how="left")

    ass_k = df.groupby(['assessmentItemID'])['diff'].agg('mean').reset_index()
    ass_k.columns = ['assessmentItemID',"ass_diff"]
    ass_k_o = o_df.groupby(['assessmentItemID'])['diff'].agg('mean').reset_index()
    ass_k_o.columns = ['assessmentItemID',"ass_diff_o"]
    ass_k_x = x_df.groupby(['assessmentItemID'])['diff'].agg('mean').reset_index()
    ass_k_x.columns = ['assessmentItemID',"ass_diff_x"]

    df = pd.merge(df, ass_k, on=['assessmentItemID'], how="left")
    df = pd.merge(df, ass_k_o, on=['assessmentItemID'], how="left")
    df = pd.merge(df, ass_k_x, on=['assessmentItemID'], how="left")

    prb_k = df.groupby(['problem_number'])['diff'].agg('mean').reset_index()
    prb_k.columns = ['problem_number',"prb_diff"]
    prb_k_o = o_df.groupby(['problem_number'])['diff'].agg('mean').reset_index()
    prb_k_o.columns = ['problem_number',"prb_diff_o"]
    prb_k_x = x_df.groupby(['problem_number'])['diff'].agg('mean').reset_index()
    prb_k_x.columns = ['problem_number',"prb_diff_x"]

    df = pd.merge(df, prb_k, on=['problem_number'], how="left")
    df = pd.merge(df, prb_k_o, on=['problem_number'], how="left")
    df = pd.merge(df, prb_k_x, on=['problem_number'], how="left")


    return df

In [29]:
def categorical_label_encoding(df, is_train=True):
    cate_cols = ["assessmentItemID", "testId", "KnowledgeTag"]

    if not os.path.exists('asset/'):
        os.makedirs('asset/')    

    for col in cate_cols:
        le = LabelEncoder()
        if is_train:
            # For UNKNOWN class
            a = df[col].unique().tolist() + ["unknown"]
            le.fit(a)
            le_path = os.path.join('asset/', col + "_classes.npy")            
            np.save(le_path, le.classes_)
        else:
            label_path = os.path.join('asset/', col + "_classes.npy")
            le.classes_ = np.load(label_path)
            df[col] = df[col].apply(lambda x: x if str(x) in le.classes_ else "unknown")

        # 모든 컬럼이 범주형이라고 가정
        df[col] = df[col].astype(str)
        test = le.transform(df[col])
        df[col] = test

    return df

In [30]:
def convert_time(s):
     timestamp = time.mktime(datetime.strptime(s, "%Y-%m-%d %H:%M:%S").timetuple())
     return int(timestamp)

In [33]:
df = feature_engineering(df)
df = categorical_label_encoding(df, is_train=True) # LGBM을 위한 FE

In [36]:
FEATS = ['userID', 'user_acc', 'user_correct_answer', 'diff', 'ass_diff_o', 'ass_diff_x', 'ass_mean', 'assessmentItemID', 'ass_sum']

In [15]:
FEATS = df.select_dtypes(include=["int", "int8", "int16", "int64", "float", "float16", "float64"]).columns
FEATS = [col for col in FEATS if col not in ['answerCode']]
'Timestamp' in FEATS

False

In [34]:
# 유저별 분리
train, test = custom_train_test_split(df)

# 사용할 Feature 설정
# FEATS = df.select_dtypes(include=["int", "int8", "int16", "int64", "float", "float16", "float64"]).columns
# FEATS = [col for col in FEATS if col not in ['answerCode']]

# X, y 값 분리
y_train = train['answerCode']
train = train.drop(['answerCode'], axis=1)

y_test = test['answerCode']
test = test.drop(['answerCode'], axis=1)

In [47]:
import optuna
from optuna.samplers import TPESampler

sampler = TPESampler(42)
def objective(trial):
    dtrain = lgb.Dataset(train[FEATS], y_train)
    dtest = lgb.Dataset(test[FEATS], y_test)

    param = {
        'objective': 'binary',
        'metric': 'auc',
        'boosting_type': 'gbdt',
        'num_leaves': trial.suggest_int('num_leaves', 10, 1000),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.1),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.1, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.1, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
        'seed': 42
    }
    model = lgb.train(
        param, 
        dtrain,
        valid_sets=[dtrain, dtest],
        verbose_eval=100,
        num_boost_round=1000,
        early_stopping_rounds=100,
    )

    preds = model.predict(test[FEATS])
    acc = accuracy_score(y_test, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(y_test, preds)

    return auc

study = optuna.create_study(direction='maximize', sampler=TPESampler())
study.optimize(objective,  n_trials=100)

trial = study.best_trial
trial_params = trial.params
print('Best Trial: score {},\nparams {}'.format(trial.value, trial_params))

# 최적의 파라미터로 모델 재학습
final_lgb_model1 = lgb.LGBMClassifier(**trial_params)
final_lgb_model1.fit(train[FEATS], y_train)

[32m[I 2023-05-24 07:50:54,649][0m A new study created in memory with name: no-name-98670657-5673-41ea-9419-7e94715bece5[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.819778	valid_1's auc: 0.797107
[200]	training's auc: 0.821841	valid_1's auc: 0.799375
[300]	training's auc: 0.823583	valid_1's auc: 0.801126
[400]	training's auc: 0.825047	valid_1's auc: 0.802498
[500]	training's auc: 0.826309	valid_1's auc: 0.803791
[600]	training's auc: 0.827402	valid_1's auc: 0.804865
[700]	training's auc: 0.828289	valid_1's auc: 0.805635
[800]	training's auc: 0.829056	valid_1's auc: 0.806366
[900]	training's auc: 0.82974	valid_1's au

[32m[I 2023-05-24 07:53:18,452][0m Trial 0 finished with value: 0.8071322122460997 and parameters: {'num_leaves': 62, 'learning_rate': 0.005492274570909453, 'feature_fraction': 0.9013191926640104, 'bagging_fraction': 0.6867032081165119, 'bagging_freq': 5, 'lambda_l1': 0.0002321004478099803, 'lambda_l2': 2.5998310603506103}. Best is trial 0 with value: 0.8071322122460997.[0m


[1000]	training's auc: 0.830336	valid_1's auc: 0.807132
Did not meet early stopping. Best iteration is:
[1000]	training's auc: 0.830336	valid_1's auc: 0.807132
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.823475	valid_1's auc: 0.798471
[200]	training's auc: 0.827165	valid_1's auc: 0.801709
[300]	training's auc: 0.830206	valid_1's auc: 0.804632
[400]	training's auc: 0.832585	valid_1's auc: 0.806235
[500]	training's auc: 0.834662	valid_1's auc: 0.80768
[600]	training's auc: 0.836696	valid_1's auc: 0.

[32m[I 2023-05-24 07:56:02,568][0m Trial 1 finished with value: 0.8101150831616132 and parameters: {'num_leaves': 301, 'learning_rate': 0.008919553725370155, 'feature_fraction': 0.5822188695978939, 'bagging_fraction': 0.8825907090782631, 'bagging_freq': 9, 'lambda_l1': 1.1369606054802198e-06, 'lambda_l2': 0.005318865827399402}. Best is trial 1 with value: 0.8101150831616132.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.787014	valid_1's auc: 0.753015


[32m[I 2023-05-24 07:56:29,771][0m Trial 2 finished with value: 0.7569431667647923 and parameters: {'num_leaves': 404, 'learning_rate': 0.0013845921382871654, 'feature_fraction': 0.12306228070881381, 'bagging_fraction': 0.37862222403487855, 'bagging_freq': 9, 'lambda_l1': 1.199491308438662e-08, 'lambda_l2': 1.7666670550191215e-06}. Best is trial 1 with value: 0.8101150831616132.[0m


Early stopping, best iteration is:
[79]	training's auc: 0.790139	valid_1's auc: 0.756943
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.789729	valid_1's auc: 0.755835


[32m[I 2023-05-24 07:56:58,806][0m Trial 3 finished with value: 0.7587255298793054 and parameters: {'num_leaves': 697, 'learning_rate': 0.00984275396879612, 'feature_fraction': 0.11472126988488936, 'bagging_fraction': 0.8590865224756986, 'bagging_freq': 7, 'lambda_l1': 5.304526100312594e-07, 'lambda_l2': 0.012520016697451624}. Best is trial 1 with value: 0.8101150831616132.[0m


Early stopping, best iteration is:
[79]	training's auc: 0.791925	valid_1's auc: 0.758726
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.837274	valid_1's auc: 0.810258
[200]	training's auc: 0.846384	valid_1's auc: 0.81193
[300]	training's auc: 0.853859	valid_1's auc: 0.810957


[32m[I 2023-05-24 07:57:54,963][0m Trial 4 finished with value: 0.8122010229614366 and parameters: {'num_leaves': 614, 'learning_rate': 0.028574283104417796, 'feature_fraction': 0.8563348902225885, 'bagging_fraction': 0.5674193033202796, 'bagging_freq': 5, 'lambda_l1': 0.00041591967741612135, 'lambda_l2': 3.271801911135647e-07}. Best is trial 4 with value: 0.8122010229614366.[0m


Early stopping, best iteration is:
[213]	training's auc: 0.847543	valid_1's auc: 0.812201
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.84017	valid_1's auc: 0.80707
[200]	training's auc: 0.851579	valid_1's auc: 0.809317
[300]	training's auc: 0.860819	valid_1's auc: 0.810101
[400]	training's auc: 0.868234	valid_1's auc: 0.810623
Early stopping, best iteration is:
[375]	training's auc: 0.866308	valid_1's auc: 0.811026


[32m[I 2023-05-24 07:59:18,980][0m Trial 5 finished with value: 0.811025813217545 and parameters: {'num_leaves': 739, 'learning_rate': 0.04802725439378667, 'feature_fraction': 0.47315137495624715, 'bagging_fraction': 0.7991643468056636, 'bagging_freq': 7, 'lambda_l1': 6.899385766876918e-05, 'lambda_l2': 1.3614325829864167e-06}. Best is trial 4 with value: 0.8122010229614366.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.798249	valid_1's auc: 0.766257
[200]	training's auc: 0.803984	valid_1's auc: 0.773212
[300]	training's auc: 0.80931	valid_1's auc: 0.780661
[400]	training's auc: 0.812233	valid_1's auc: 0.784964
[500]	training's auc: 0.814243	valid_1's auc: 0.788402
[600]	training's auc: 0.815855	valid_1's auc: 0.790774
[700]	training's auc: 0.816848	valid_1's auc: 0.792758
[800]	training's auc: 0.817511	valid_1's auc: 0.793966
[900]	training's auc: 0.81805	valid_1's auc

[32m[I 2023-05-24 08:01:38,254][0m Trial 6 finished with value: 0.7959067743597293 and parameters: {'num_leaves': 82, 'learning_rate': 0.03767478682260475, 'feature_fraction': 0.10049293426844144, 'bagging_fraction': 0.6959437439954722, 'bagging_freq': 5, 'lambda_l1': 0.3669907219269512, 'lambda_l2': 0.06788781551513305}. Best is trial 4 with value: 0.8122010229614366.[0m


[1000]	training's auc: 0.818462	valid_1's auc: 0.795907
Did not meet early stopping. Best iteration is:
[1000]	training's auc: 0.818462	valid_1's auc: 0.795907
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.820266	valid_1's auc: 0.790176


[32m[I 2023-05-24 08:02:00,351][0m Trial 7 finished with value: 0.8017195779364145 and parameters: {'num_leaves': 807, 'learning_rate': 0.002462862260324613, 'feature_fraction': 0.4961006816401461, 'bagging_fraction': 0.6880251127243885, 'bagging_freq': 3, 'lambda_l1': 1.8224399244948661, 'lambda_l2': 1.2832406194803993e-08}. Best is trial 4 with value: 0.8122010229614366.[0m


Early stopping, best iteration is:
[3]	training's auc: 0.821079	valid_1's auc: 0.80172
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.822563	valid_1's auc: 0.799488
[200]	training's auc: 0.823875	valid_1's auc: 0.801194
[300]	training's auc: 0.825111	valid_1's auc: 0.802811
[400]	training's auc: 0.825981	valid_1's auc: 0.803538
[500]	training's auc: 0.826865	valid_1's auc: 0.804432
[600]	training's auc: 0.827734	valid_1's auc: 0.805106
[700]	training's auc: 0.828467	valid_1's auc: 0.805667
[800]	trai

[32m[I 2023-05-24 08:04:35,880][0m Trial 8 finished with value: 0.8077623638504562 and parameters: {'num_leaves': 205, 'learning_rate': 0.0029947061452672698, 'feature_fraction': 0.6428455346840557, 'bagging_fraction': 0.444055066459103, 'bagging_freq': 9, 'lambda_l1': 0.02695373097037978, 'lambda_l2': 9.797432088338537e-07}. Best is trial 4 with value: 0.8122010229614366.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.805094	valid_1's auc: 0.769813
[200]	training's auc: 0.807091	valid_1's auc: 0.772416
[300]	training's auc: 0.809627	valid_1's auc: 0.775275
[400]	training's auc: 0.811503	valid_1's auc: 0.777441
[500]	training's auc: 0.81384	valid_1's auc: 0.78019
[600]	training's auc: 0.815447	valid_1's auc: 0.781593
[700]	training's auc: 0.817194	valid_1's auc: 0.783906
[800]	training's auc: 0.818931	valid_1's auc: 0.785953
[900]	training's auc: 0.820461	valid_1's auc

[32m[I 2023-05-24 08:07:36,700][0m Trial 9 finished with value: 0.7890693994701207 and parameters: {'num_leaves': 754, 'learning_rate': 0.003213466733385622, 'feature_fraction': 0.276964408495576, 'bagging_fraction': 0.4850939166357411, 'bagging_freq': 3, 'lambda_l1': 0.0030712342357467498, 'lambda_l2': 1.4454697589703585e-05}. Best is trial 4 with value: 0.8122010229614366.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.834493	valid_1's auc: 0.808321
[200]	training's auc: 0.841749	valid_1's auc: 0.811941
[300]	training's auc: 0.847552	valid_1's auc: 0.809908
Early stopping, best iteration is:
[202]	training's auc: 0.84193	valid_1's auc: 0.812058


[32m[I 2023-05-24 08:08:38,315][0m Trial 10 finished with value: 0.8120584339122756 and parameters: {'num_leaves': 995, 'learning_rate': 0.028296522977221628, 'feature_fraction': 0.9672666573215222, 'bagging_fraction': 0.15782276926645417, 'bagging_freq': 1, 'lambda_l1': 7.646321076850574, 'lambda_l2': 1.0080667969359109e-08}. Best is trial 4 with value: 0.8122010229614366.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.831861	valid_1's auc: 0.806571
[200]	training's auc: 0.837874	valid_1's auc: 0.809554
[300]	training's auc: 0.842536	valid_1's auc: 0.808818
Early stopping, best iteration is:
[212]	training's auc: 0.838494	valid_1's auc: 0.80974


[32m[I 2023-05-24 08:09:38,232][0m Trial 11 finished with value: 0.8097402119517221 and parameters: {'num_leaves': 962, 'learning_rate': 0.02617057768739891, 'feature_fraction': 0.9916065868376711, 'bagging_fraction': 0.10132628761025908, 'bagging_freq': 1, 'lambda_l1': 8.15021075963148, 'lambda_l2': 1.0113292122135543e-08}. Best is trial 4 with value: 0.8122010229614366.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.849261	valid_1's auc: 0.808227


[32m[I 2023-05-24 08:10:02,589][0m Trial 12 finished with value: 0.8112879930821314 and parameters: {'num_leaves': 549, 'learning_rate': 0.0887151618473456, 'feature_fraction': 0.8430147995371227, 'bagging_fraction': 0.286471251451892, 'bagging_freq': 1, 'lambda_l1': 0.31660716905592595, 'lambda_l2': 8.821366307975741e-08}. Best is trial 4 with value: 0.8122010229614366.[0m


Early stopping, best iteration is:
[43]	training's auc: 0.838134	valid_1's auc: 0.811288
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.838525	valid_1's auc: 0.809131
[200]	training's auc: 0.847542	valid_1's auc: 0.809998
[300]	training's auc: 0.856003	valid_1's auc: 0.811357
[400]	training's auc: 0.863139	valid_1's auc: 0.811198
Early stopping, best iteration is:
[305]	training's auc: 0.85637	valid_1's auc: 0.811433


[32m[I 2023-05-24 08:11:23,660][0m Trial 13 finished with value: 0.811432881954666 and parameters: {'num_leaves': 945, 'learning_rate': 0.022119724673206193, 'feature_fraction': 0.8134129089055528, 'bagging_fraction': 0.5714971022970474, 'bagging_freq': 3, 'lambda_l1': 0.00975392699693534, 'lambda_l2': 8.1985373228445e-05}. Best is trial 4 with value: 0.8122010229614366.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.828451	valid_1's auc: 0.805359
[200]	training's auc: 0.832255	valid_1's auc: 0.808418
[300]	training's auc: 0.835316	valid_1's auc: 0.808944
[400]	training's auc: 0.837837	valid_1's auc: 0.809331
Early stopping, best iteration is:
[350]	training's auc: 0.836615	valid_1's auc: 0.80986


[32m[I 2023-05-24 08:12:39,769][0m Trial 14 finished with value: 0.8098598027671475 and parameters: {'num_leaves': 537, 'learning_rate': 0.014446754242637663, 'feature_fraction': 0.985549459895514, 'bagging_fraction': 0.21469900832159589, 'bagging_freq': 7, 'lambda_l1': 9.994927869614749, 'lambda_l2': 3.414970795355619e-07}. Best is trial 4 with value: 0.8122010229614366.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.853343	valid_1's auc: 0.81233


[32m[I 2023-05-24 08:13:18,129][0m Trial 15 finished with value: 0.812626490285546 and parameters: {'num_leaves': 871, 'learning_rate': 0.06299407042788457, 'feature_fraction': 0.7307102617478988, 'bagging_fraction': 0.9824611836980233, 'bagging_freq': 2, 'lambda_l1': 0.09110421328063195, 'lambda_l2': 8.572194725132169e-08}. Best is trial 15 with value: 0.812626490285546.[0m


Early stopping, best iteration is:
[92]	training's auc: 0.851095	valid_1's auc: 0.812626
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.853542	valid_1's auc: 0.811955


[32m[I 2023-05-24 08:13:48,825][0m Trial 16 finished with value: 0.8127920775684427 and parameters: {'num_leaves': 578, 'learning_rate': 0.09481595207644586, 'feature_fraction': 0.7733922926780457, 'bagging_fraction': 0.9935899018501876, 'bagging_freq': 4, 'lambda_l1': 0.001848548575421097, 'lambda_l2': 3.5716280933244687e-05}. Best is trial 16 with value: 0.8127920775684427.[0m


Early stopping, best iteration is:
[78]	training's auc: 0.849605	valid_1's auc: 0.812792
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.859936	valid_1's auc: 0.808609
[200]	training's auc: 0.877825	valid_1's auc: 0.808767


[32m[I 2023-05-24 08:14:35,564][0m Trial 17 finished with value: 0.8096712172505153 and parameters: {'num_leaves': 843, 'learning_rate': 0.09005917366749196, 'feature_fraction': 0.7290916585673084, 'bagging_fraction': 0.9817452826968475, 'bagging_freq': 3, 'lambda_l1': 0.06482984227190676, 'lambda_l2': 2.8616618990143937e-05}. Best is trial 16 with value: 0.8127920775684427.[0m


Early stopping, best iteration is:
[163]	training's auc: 0.871969	valid_1's auc: 0.809671
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.841305	valid_1's auc: 0.810382
[200]	training's auc: 0.851257	valid_1's auc: 0.80972


[32m[I 2023-05-24 08:15:13,941][0m Trial 18 finished with value: 0.810782031939947 and parameters: {'num_leaves': 396, 'learning_rate': 0.05810242316641848, 'feature_fraction': 0.7279003990559625, 'bagging_fraction': 0.9767170762426503, 'bagging_freq': 4, 'lambda_l1': 0.001655588330314507, 'lambda_l2': 0.0003405907694517933}. Best is trial 16 with value: 0.8127920775684427.[0m


Early stopping, best iteration is:
[128]	training's auc: 0.84506	valid_1's auc: 0.810782
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.846891	valid_1's auc: 0.811598


[32m[I 2023-05-24 08:15:48,375][0m Trial 19 finished with value: 0.8119457425669709 and parameters: {'num_leaves': 608, 'learning_rate': 0.060167379884101174, 'feature_fraction': 0.7599925892971291, 'bagging_fraction': 0.9931309467075099, 'bagging_freq': 2, 'lambda_l1': 0.08100323470894666, 'lambda_l2': 6.505274418643534e-06}. Best is trial 16 with value: 0.8127920775684427.[0m


Early stopping, best iteration is:
[87]	training's auc: 0.844372	valid_1's auc: 0.811946
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.845965	valid_1's auc: 0.810628
[200]	training's auc: 0.857408	valid_1's auc: 0.811168


[32m[I 2023-05-24 08:16:24,791][0m Trial 20 finished with value: 0.8114811782455108 and parameters: {'num_leaves': 440, 'learning_rate': 0.07851979941799277, 'feature_fraction': 0.6574029296210883, 'bagging_fraction': 0.8944250102885665, 'bagging_freq': 4, 'lambda_l1': 0.00967333681208007, 'lambda_l2': 0.0003556680014863676}. Best is trial 16 with value: 0.8127920775684427.[0m


Early stopping, best iteration is:
[126]	training's auc: 0.849696	valid_1's auc: 0.811481
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.840113	valid_1's auc: 0.810281
[200]	training's auc: 0.852093	valid_1's auc: 0.811028


[32m[I 2023-05-24 08:17:12,080][0m Trial 21 finished with value: 0.8114282823079187 and parameters: {'num_leaves': 635, 'learning_rate': 0.036309713895405146, 'feature_fraction': 0.8551789777046852, 'bagging_fraction': 0.7902351368513536, 'bagging_freq': 6, 'lambda_l1': 0.0009521368000122069, 'lambda_l2': 1.1100142913268836e-07}. Best is trial 16 with value: 0.8127920775684427.[0m


Early stopping, best iteration is:
[157]	training's auc: 0.847294	valid_1's auc: 0.811428
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.85054	valid_1's auc: 0.812252


[32m[I 2023-05-24 08:17:46,048][0m Trial 22 finished with value: 0.8125091992934943 and parameters: {'num_leaves': 867, 'learning_rate': 0.05616677205877089, 'feature_fraction': 0.8937045629803467, 'bagging_fraction': 0.5453124958109317, 'bagging_freq': 4, 'lambda_l1': 0.00010998149753008868, 'lambda_l2': 4.387329639748049e-06}. Best is trial 16 with value: 0.8127920775684427.[0m


Early stopping, best iteration is:
[78]	training's auc: 0.845893	valid_1's auc: 0.812509
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.851007	valid_1's auc: 0.811543
[200]	training's auc: 0.867133	valid_1's auc: 0.811132
Early stopping, best iteration is:
[101]	training's auc: 0.851177	valid_1's auc: 0.811649


[32m[I 2023-05-24 08:18:26,620][0m Trial 23 finished with value: 0.811649065351781 and parameters: {'num_leaves': 875, 'learning_rate': 0.056585664253706654, 'feature_fraction': 0.7663193777792657, 'bagging_fraction': 0.9989844328681502, 'bagging_freq': 2, 'lambda_l1': 5.257248847264833e-05, 'lambda_l2': 5.211100930306731e-06}. Best is trial 16 with value: 0.8127920775684427.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.86251	valid_1's auc: 0.811842


[32m[I 2023-05-24 08:18:56,132][0m Trial 24 finished with value: 0.8134820245805122 and parameters: {'num_leaves': 910, 'learning_rate': 0.08828627729795027, 'feature_fraction': 0.919042894244271, 'bagging_fraction': 0.9205359421651845, 'bagging_freq': 4, 'lambda_l1': 0.005273953273231081, 'lambda_l2': 4.381564320447384e-05}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[51]	training's auc: 0.847647	valid_1's auc: 0.813482
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.85559	valid_1's auc: 0.810053


[32m[I 2023-05-24 08:19:28,338][0m Trial 25 finished with value: 0.8102806704445099 and parameters: {'num_leaves': 681, 'learning_rate': 0.08588414201446758, 'feature_fraction': 0.9113483967573486, 'bagging_fraction': 0.9093958619212986, 'bagging_freq': 2, 'lambda_l1': 0.014459077042122445, 'lambda_l2': 8.319543345629393e-05}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[78]	training's auc: 0.851275	valid_1's auc: 0.810281
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.861343	valid_1's auc: 0.809453


[32m[I 2023-05-24 08:20:02,200][0m Trial 26 finished with value: 0.8107613335295849 and parameters: {'num_leaves': 903, 'learning_rate': 0.08861052143040953, 'feature_fraction': 0.7987170766506547, 'bagging_fraction': 0.9337332898726506, 'bagging_freq': 6, 'lambda_l1': 0.0022223843527280356, 'lambda_l2': 3.75569653600202e-05}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[76]	training's auc: 0.855245	valid_1's auc: 0.810761
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.842735	valid_1's auc: 0.808639
[200]	training's auc: 0.856064	valid_1's auc: 0.810676


[32m[I 2023-05-24 08:20:58,656][0m Trial 27 finished with value: 0.8110028149838092 and parameters: {'num_leaves': 779, 'learning_rate': 0.039948954390870525, 'feature_fraction': 0.6740813413053579, 'bagging_fraction': 0.9302208523811653, 'bagging_freq': 4, 'lambda_l1': 0.18292658049196905, 'lambda_l2': 0.0006924437308328184}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[189]	training's auc: 0.854716	valid_1's auc: 0.811003
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.845484	valid_1's auc: 0.807215


[32m[I 2023-05-24 08:21:21,389][0m Trial 28 finished with value: 0.8084017147483074 and parameters: {'num_leaves': 303, 'learning_rate': 0.0971375041574014, 'feature_fraction': 0.9286114453230631, 'bagging_fraction': 0.8063005009779297, 'bagging_freq': 2, 'lambda_l1': 0.03455210566336739, 'lambda_l2': 9.593261245546102e-06}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[42]	training's auc: 0.835432	valid_1's auc: 0.808402
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.827927	valid_1's auc: 0.806468
[200]	training's auc: 0.830758	valid_1's auc: 0.808459
[300]	training's auc: 0.832359	valid_1's auc: 0.809262
[400]	training's auc: 0.833723	valid_1's auc: 0.809563
[500]	training's auc: 0.834809	valid_1's auc: 0.80963
[600]	training's auc: 0.835853	valid_1's auc: 0.809246


[32m[I 2023-05-24 08:22:35,052][0m Trial 29 finished with value: 0.8097517110685899 and parameters: {'num_leaves': 24, 'learning_rate': 0.06560146838111905, 'feature_fraction': 0.912068482356148, 'bagging_fraction': 0.8360104181009584, 'bagging_freq': 6, 'lambda_l1': 0.7481806752656154, 'lambda_l2': 4.39585468546477}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[510]	training's auc: 0.834918	valid_1's auc: 0.809752
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.846261	valid_1's auc: 0.810018
[200]	training's auc: 0.861775	valid_1's auc: 0.809273


[32m[I 2023-05-24 08:23:24,737][0m Trial 30 finished with value: 0.8110787091551368 and parameters: {'num_leaves': 904, 'learning_rate': 0.043150413469875526, 'feature_fraction': 0.8041194385490648, 'bagging_fraction': 0.7209429256042569, 'bagging_freq': 4, 'lambda_l1': 0.006497878132537864, 'lambda_l2': 0.0010770336850376193}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[148]	training's auc: 0.854383	valid_1's auc: 0.811079
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.854386	valid_1's auc: 0.810283


[32m[I 2023-05-24 08:24:00,445][0m Trial 31 finished with value: 0.8109269208124816 and parameters: {'num_leaves': 862, 'learning_rate': 0.06531256414732613, 'feature_fraction': 0.8984287896756085, 'bagging_fraction': 0.7564686075376785, 'bagging_freq': 4, 'lambda_l1': 0.00035816525211337426, 'lambda_l2': 3.243272174906246e-06}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[86]	training's auc: 0.850849	valid_1's auc: 0.810927
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.853929	valid_1's auc: 0.812976


[32m[I 2023-05-24 08:24:36,701][0m Trial 32 finished with value: 0.8132865395937591 and parameters: {'num_leaves': 827, 'learning_rate': 0.06639973050396938, 'feature_fraction': 0.8961606571726262, 'bagging_fraction': 0.62772913846233, 'bagging_freq': 5, 'lambda_l1': 0.00016763643517009585, 'lambda_l2': 3.76769457665576e-05}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[98]	training's auc: 0.853593	valid_1's auc: 0.813287
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.856394	valid_1's auc: 0.810699


[32m[I 2023-05-24 08:25:07,781][0m Trial 33 finished with value: 0.812150426847218 and parameters: {'num_leaves': 806, 'learning_rate': 0.07434094162988959, 'feature_fraction': 0.8458737996377396, 'bagging_fraction': 0.860632106699509, 'bagging_freq': 5, 'lambda_l1': 2.391200044262381e-05, 'lambda_l2': 0.00011419557301892207}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[63]	training's auc: 0.846584	valid_1's auc: 0.81215
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.845549	valid_1's auc: 0.80974
[200]	training's auc: 0.860101	valid_1's auc: 0.810131


[32m[I 2023-05-24 08:25:50,008][0m Trial 34 finished with value: 0.8112304974977921 and parameters: {'num_leaves': 712, 'learning_rate': 0.048738281579074026, 'feature_fraction': 0.9540498025132124, 'bagging_fraction': 0.6335711115680691, 'bagging_freq': 10, 'lambda_l1': 0.0009445057421524083, 'lambda_l2': 1.622251240376271e-05}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[137]	training's auc: 0.851429	valid_1's auc: 0.81123
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.866206	valid_1's auc: 0.811189


[32m[I 2023-05-24 08:26:24,366][0m Trial 35 finished with value: 0.8124885008831322 and parameters: {'num_leaves': 942, 'learning_rate': 0.09738281371476014, 'feature_fraction': 0.8690119501883611, 'bagging_fraction': 0.862861961752002, 'bagging_freq': 5, 'lambda_l1': 0.004288735260395181, 'lambda_l2': 0.002148387570986201}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[79]	training's auc: 0.860102	valid_1's auc: 0.812489
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.846496	valid_1's auc: 0.811145


[32m[I 2023-05-24 08:26:54,165][0m Trial 36 finished with value: 0.8114811782455108 and parameters: {'num_leaves': 470, 'learning_rate': 0.06976894881261714, 'feature_fraction': 0.939877549707854, 'bagging_fraction': 0.9371550469289319, 'bagging_freq': 8, 'lambda_l1': 0.0002586304120008184, 'lambda_l2': 0.0001386940623638809}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[80]	training's auc: 0.843321	valid_1's auc: 0.811481
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.839155	valid_1's auc: 0.810478
[200]	training's auc: 0.850058	valid_1's auc: 0.812705
[300]	training's auc: 0.857922	valid_1's auc: 0.812725
[400]	training's auc: 0.864351	valid_1's auc: 0.812712
Early stopping, best iteration is:
[372]	training's auc: 0.862628	valid_1's auc: 0.813149


[32m[I 2023-05-24 08:28:17,750][0m Trial 37 finished with value: 0.8131485501913452 and parameters: {'num_leaves': 658, 'learning_rate': 0.034878426017843724, 'feature_fraction': 0.7097896929774541, 'bagging_fraction': 0.7536337879439974, 'bagging_freq': 5, 'lambda_l1': 0.023690822824041485, 'lambda_l2': 0.00792622797954652}. Best is trial 24 with value: 0.8134820245805122.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.840819	valid_1's auc: 0.809352
[200]	training's auc: 0.853567	valid_1's auc: 0.811359
[300]	training's auc: 0.862236	valid_1's auc: 0.811357


[32m[I 2023-05-24 08:29:12,124][0m Trial 38 finished with value: 0.8115271747129822 and parameters: {'num_leaves': 665, 'learning_rate': 0.04580424711634552, 'feature_fraction': 0.6098188631857525, 'bagging_fraction': 0.729946239570396, 'bagging_freq': 5, 'lambda_l1': 0.0007274201503300868, 'lambda_l2': 0.014407303702079557}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[204]	training's auc: 0.853926	valid_1's auc: 0.811527
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.8378	valid_1's auc: 0.810405
[200]	training's auc: 0.847882	valid_1's auc: 0.81253
[300]	training's auc: 0.855036	valid_1's auc: 0.812153


[32m[I 2023-05-24 08:30:10,114][0m Trial 39 finished with value: 0.8129254673241096 and parameters: {'num_leaves': 604, 'learning_rate': 0.03365366986761467, 'feature_fraction': 0.6922073241038355, 'bagging_fraction': 0.6451247913599678, 'bagging_freq': 6, 'lambda_l1': 1.5894381829060876e-05, 'lambda_l2': 0.2479096325075768}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[220]	training's auc: 0.849663	valid_1's auc: 0.812925
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.831883	valid_1's auc: 0.804297
[200]	training's auc: 0.839696	valid_1's auc: 0.808901
[300]	training's auc: 0.84578	valid_1's auc: 0.811815
[400]	training's auc: 0.851156	valid_1's auc: 0.811684
Early stopping, best iteration is:
[311]	training's auc: 0.846407	valid_1's auc: 0.811957


[32m[I 2023-05-24 08:31:28,942][0m Trial 40 finished with value: 0.8119572416838388 and parameters: {'num_leaves': 738, 'learning_rate': 0.018771200834084824, 'feature_fraction': 0.5367714090991335, 'bagging_fraction': 0.6279466435830345, 'bagging_freq': 7, 'lambda_l1': 6.813432778259579e-06, 'lambda_l2': 0.4392567065611612}. Best is trial 24 with value: 0.8134820245805122.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.837071	valid_1's auc: 0.808301
[200]	training's auc: 0.846575	valid_1's auc: 0.810518
[300]	training's auc: 0.853242	valid_1's auc: 0.810651


[32m[I 2023-05-24 08:32:31,040][0m Trial 41 finished with value: 0.8111477038563437 and parameters: {'num_leaves': 549, 'learning_rate': 0.03401532591543454, 'feature_fraction': 0.6968634279280952, 'bagging_fraction': 0.7647596917904285, 'bagging_freq': 6, 'lambda_l1': 0.0004273717989351749, 'lambda_l2': 1.0438024652270663}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[258]	training's auc: 0.850916	valid_1's auc: 0.811148
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.838121	valid_1's auc: 0.810725
[200]	training's auc: 0.847988	valid_1's auc: 0.812026


[32m[I 2023-05-24 08:33:16,875][0m Trial 42 finished with value: 0.8125758941713277 and parameters: {'num_leaves': 592, 'learning_rate': 0.03305750302999574, 'feature_fraction': 0.7899887723507408, 'bagging_fraction': 0.6410117790796617, 'bagging_freq': 5, 'lambda_l1': 0.00013960428952389776, 'lambda_l2': 0.19442968039621183}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[152]	training's auc: 0.843361	valid_1's auc: 0.812576
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.840409	valid_1's auc: 0.810209
[200]	training's auc: 0.851169	valid_1's auc: 0.812019


[32m[I 2023-05-24 08:34:02,697][0m Trial 43 finished with value: 0.8124931005298792 and parameters: {'num_leaves': 483, 'learning_rate': 0.04926676692669532, 'feature_fraction': 0.6257270098490151, 'bagging_fraction': 0.8288669570083136, 'bagging_freq': 8, 'lambda_l1': 0.0022565213182577266, 'lambda_l2': 0.00821499443284301}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[173]	training's auc: 0.848485	valid_1's auc: 0.812493
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.849992	valid_1's auc: 0.809257


[32m[I 2023-05-24 08:34:37,439][0m Trial 44 finished with value: 0.8097218133647335 and parameters: {'num_leaves': 650, 'learning_rate': 0.07438480100275105, 'feature_fraction': 0.687282691073355, 'bagging_fraction': 0.6792395956836863, 'bagging_freq': 5, 'lambda_l1': 0.020441493267666193, 'lambda_l2': 0.041342748682619654}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[94]	training's auc: 0.848906	valid_1's auc: 0.809722
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.835385	valid_1's auc: 0.807353
[200]	training's auc: 0.844279	valid_1's auc: 0.809906
[300]	training's auc: 0.850264	valid_1's auc: 0.810138


[32m[I 2023-05-24 08:35:35,483][0m Trial 45 finished with value: 0.8103611642625846 and parameters: {'num_leaves': 367, 'learning_rate': 0.04432422736495813, 'feature_fraction': 0.5738804035784644, 'bagging_fraction': 0.7767741859512245, 'bagging_freq': 6, 'lambda_l1': 5.112307534279379e-06, 'lambda_l2': 0.0033452651011565923}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[270]	training's auc: 0.848597	valid_1's auc: 0.810361
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.836254	valid_1's auc: 0.809156
[200]	training's auc: 0.844674	valid_1's auc: 0.811049
[300]	training's auc: 0.850983	valid_1's auc: 0.810913


[32m[I 2023-05-24 08:36:34,488][0m Trial 46 finished with value: 0.8112879930821313 and parameters: {'num_leaves': 586, 'learning_rate': 0.028448302908117267, 'feature_fraction': 0.8678714302953325, 'bagging_fraction': 0.8985412516276299, 'bagging_freq': 7, 'lambda_l1': 0.004931789141937299, 'lambda_l2': 7.297955402418746}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[207]	training's auc: 0.845206	valid_1's auc: 0.811288
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.831466	valid_1's auc: 0.805801
[200]	training's auc: 0.834033	valid_1's auc: 0.807491
[300]	training's auc: 0.836623	valid_1's auc: 0.808974
[400]	training's auc: 0.839101	valid_1's auc: 0.809782
[500]	training's auc: 0.84174	valid_1's auc: 0.810955
[600]	training's auc: 0.844319	valid_1's auc: 0.811649
[700]	training's auc: 0.847051	valid_1's auc: 0.812061
[800]	tr

[32m[I 2023-05-24 08:39:59,209][0m Trial 47 finished with value: 0.8126770863997644 and parameters: {'num_leaves': 797, 'learning_rate': 0.006835393104215111, 'feature_fraction': 0.8119876164588208, 'bagging_fraction': 0.8205078739962519, 'bagging_freq': 3, 'lambda_l1': 1.8543102817795454e-07, 'lambda_l2': 0.0012875409602434694}. Best is trial 24 with value: 0.8134820245805122.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.833013	valid_1's auc: 0.805939
[200]	training's auc: 0.837423	valid_1's auc: 0.808606
[300]	training's auc: 0.841915	valid_1's auc: 0.810814
[400]	training's auc: 0.846411	valid_1's auc: 0.811525
[500]	training's auc: 0.850635	valid_1's auc: 0.812058
[600]	training's auc: 0.854492	valid_1's auc: 0.812284
Early stopping, best iteration is:
[592]	training's auc: 0.854276	valid_1's auc: 0.812374


[32m[I 2023-05-24 08:42:11,688][0m Trial 48 finished with value: 0.812373509714454 and parameters: {'num_leaves': 710, 'learning_rate': 0.012745009245476707, 'feature_fraction': 0.7604788166882899, 'bagging_fraction': 0.8734402647780386, 'bagging_freq': 5, 'lambda_l1': 0.00014625217238643532, 'lambda_l2': 4.4352446638560736e-05}. Best is trial 24 with value: 0.8134820245805122.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.834178	valid_1's auc: 0.808425
[200]	training's auc: 0.841695	valid_1's auc: 0.809867
[300]	training's auc: 0.848759	valid_1's auc: 0.810152
Early stopping, best iteration is:
[294]	training's auc: 0.84845	valid_1's auc: 0.810437


[32m[I 2023-05-24 08:43:21,071][0m Trial 49 finished with value: 0.8104370584339123 and parameters: {'num_leaves': 532, 'learning_rate': 0.02317174055334265, 'feature_fraction': 0.990284790168256, 'bagging_fraction': 0.7490210778137779, 'bagging_freq': 4, 'lambda_l1': 4.466118089780291e-05, 'lambda_l2': 0.00018573038037464863}. Best is trial 24 with value: 0.8134820245805122.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.830714	valid_1's auc: 0.80727
[200]	training's auc: 0.836165	valid_1's auc: 0.809586
[300]	training's auc: 0.839741	valid_1's auc: 0.809754
[400]	training's auc: 0.842272	valid_1's auc: 0.809657


[32m[I 2023-05-24 08:44:28,087][0m Trial 50 finished with value: 0.8100621872240212 and parameters: {'num_leaves': 173, 'learning_rate': 0.03421876766823819, 'feature_fraction': 0.7105981874821898, 'bagging_fraction': 0.6972695710008457, 'bagging_freq': 6, 'lambda_l1': 0.022626666094273596, 'lambda_l2': 0.005387070028398797}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[371]	training's auc: 0.841466	valid_1's auc: 0.810062
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.831922	valid_1's auc: 0.80638
[200]	training's auc: 0.834645	valid_1's auc: 0.807183
[300]	training's auc: 0.83736	valid_1's auc: 0.808471
[400]	training's auc: 0.840216	valid_1's auc: 0.809582
[500]	training's auc: 0.843127	valid_1's auc: 0.810573
[600]	training's auc: 0.846265	valid_1's auc: 0.810975
[700]	training's auc: 0.849168	valid_1's auc: 0.811203
[800]	tra

[32m[I 2023-05-24 08:47:50,042][0m Trial 51 finished with value: 0.8119595415072123 and parameters: {'num_leaves': 803, 'learning_rate': 0.007401109081392571, 'feature_fraction': 0.8393117314036176, 'bagging_fraction': 0.825610118034985, 'bagging_freq': 3, 'lambda_l1': 1.6023388426125533e-07, 'lambda_l2': 0.0010853624954608844}. Best is trial 24 with value: 0.8134820245805122.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.830542	valid_1's auc: 0.804294
[200]	training's auc: 0.83249	valid_1's auc: 0.805743
[300]	training's auc: 0.834482	valid_1's auc: 0.807098
[400]	training's auc: 0.836308	valid_1's auc: 0.808059
[500]	training's auc: 0.83819	valid_1's auc: 0.808859
[600]	training's auc: 0.840075	valid_1's auc: 0.809519
[700]	training's auc: 0.842058	valid_1's auc: 0.810101
[800]	training's auc: 0.843952	valid_1's auc: 0.810711
[900]	training's auc: 0.845874	valid_1's auc

[32m[I 2023-05-24 08:51:22,065][0m Trial 52 finished with value: 0.8113546879599647 and parameters: {'num_leaves': 762, 'learning_rate': 0.00516831276310031, 'feature_fraction': 0.8231774151395358, 'bagging_fraction': 0.9530272828999568, 'bagging_freq': 3, 'lambda_l1': 3.899326602389085e-08, 'lambda_l2': 0.00021538522128244656}. Best is trial 24 with value: 0.8134820245805122.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.853109	valid_1's auc: 0.812564


[32m[I 2023-05-24 08:52:00,817][0m Trial 53 finished with value: 0.8128541727995291 and parameters: {'num_leaves': 999, 'learning_rate': 0.05677413524571376, 'feature_fraction': 0.74988608709019, 'bagging_fraction': 0.8115196365903293, 'bagging_freq': 4, 'lambda_l1': 1.2614597951021287e-06, 'lambda_l2': 0.0005837192546166238}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[89]	training's auc: 0.850638	valid_1's auc: 0.812854
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.859186	valid_1's auc: 0.809703


[32m[I 2023-05-24 08:52:34,130][0m Trial 54 finished with value: 0.8102553723874006 and parameters: {'num_leaves': 941, 'learning_rate': 0.07940756787634415, 'feature_fraction': 0.7437261226051711, 'bagging_fraction': 0.7929301988855878, 'bagging_freq': 4, 'lambda_l1': 1.7388438498200657e-06, 'lambda_l2': 0.0004375803200952037}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[67]	training's auc: 0.849986	valid_1's auc: 0.810255
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.850085	valid_1's auc: 0.810863
[200]	training's auc: 0.866646	valid_1's auc: 0.810354


[32m[I 2023-05-24 08:53:26,541][0m Trial 55 finished with value: 0.8112120989108037 and parameters: {'num_leaves': 963, 'learning_rate': 0.0521826910902473, 'feature_fraction': 0.6618890830519388, 'bagging_fraction': 0.8751184746969698, 'bagging_freq': 5, 'lambda_l1': 1.9599396474385122e-05, 'lambda_l2': 2.1552258296054456e-05}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[165]	training's auc: 0.861497	valid_1's auc: 0.811212
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.854878	valid_1's auc: 0.811787


[32m[I 2023-05-24 08:54:03,894][0m Trial 56 finished with value: 0.8125643950544599 and parameters: {'num_leaves': 910, 'learning_rate': 0.06537435822241588, 'feature_fraction': 0.7706209280942548, 'bagging_fraction': 0.9612479172490325, 'bagging_freq': 4, 'lambda_l1': 0.004123956900921172, 'lambda_l2': 7.294904284729923e-05}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[88]	training's auc: 0.851734	valid_1's auc: 0.812564
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.852134	valid_1's auc: 0.812139
[200]	training's auc: 0.869807	valid_1's auc: 0.811773


[32m[I 2023-05-24 08:54:46,945][0m Trial 57 finished with value: 0.8125551957609656 and parameters: {'num_leaves': 968, 'learning_rate': 0.05569331190865971, 'feature_fraction': 0.7284639835478369, 'bagging_fraction': 0.8919288759624484, 'bagging_freq': 5, 'lambda_l1': 0.0012518334838889553, 'lambda_l2': 4.926110366344352e-05}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[112]	training's auc: 0.854854	valid_1's auc: 0.812555
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.861114	valid_1's auc: 0.809954


[32m[I 2023-05-24 08:55:21,181][0m Trial 58 finished with value: 0.8131485501913454 and parameters: {'num_leaves': 993, 'learning_rate': 0.07945863573713279, 'feature_fraction': 0.8830352036822614, 'bagging_fraction': 0.5979807155130894, 'bagging_freq': 4, 'lambda_l1': 0.009106252137354703, 'lambda_l2': 0.0005132104918565297}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[74]	training's auc: 0.854204	valid_1's auc: 0.813149
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.847177	valid_1's auc: 0.810175
[200]	training's auc: 0.863143	valid_1's auc: 0.81003


[32m[I 2023-05-24 08:56:04,849][0m Trial 59 finished with value: 0.8108533264645275 and parameters: {'num_leaves': 993, 'learning_rate': 0.041073149495969005, 'feature_fraction': 0.9543662014436477, 'bagging_fraction': 0.5922917296032384, 'bagging_freq': 3, 'lambda_l1': 0.05939890936644508, 'lambda_l2': 0.002816845421525193}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[112]	training's auc: 0.849457	valid_1's auc: 0.810853
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.860318	valid_1's auc: 0.811173


[32m[I 2023-05-24 08:56:35,368][0m Trial 60 finished with value: 0.8131439505445983 and parameters: {'num_leaves': 998, 'learning_rate': 0.07847322828965776, 'feature_fraction': 0.8920864720310072, 'bagging_fraction': 0.534642139970882, 'bagging_freq': 6, 'lambda_l1': 0.04491611172073994, 'lambda_l2': 0.018558317333886135}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[42]	training's auc: 0.842723	valid_1's auc: 0.813144
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.860308	valid_1's auc: 0.809043


[32m[I 2023-05-24 08:57:08,160][0m Trial 61 finished with value: 0.8106647409478953 and parameters: {'num_leaves': 994, 'learning_rate': 0.07889695044964541, 'feature_fraction': 0.8675998088727328, 'bagging_fraction': 0.516194958164118, 'bagging_freq': 6, 'lambda_l1': 0.015325658202208773, 'lambda_l2': 0.0004919148378066757}. Best is trial 24 with value: 0.8134820245805122.[0m


Early stopping, best iteration is:
[68]	training's auc: 0.850991	valid_1's auc: 0.810665
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.852612	valid_1's auc: 0.812562
[200]	training's auc: 0.869991	valid_1's auc: 0.811189


[32m[I 2023-05-24 08:57:50,157][0m Trial 62 finished with value: 0.8139074919046217 and parameters: {'num_leaves': 914, 'learning_rate': 0.05856625324959715, 'feature_fraction': 0.895552049755324, 'bagging_fraction': 0.6076848986308129, 'bagging_freq': 7, 'lambda_l1': 0.008228463273101132, 'lambda_l2': 0.012077846321216792}. Best is trial 62 with value: 0.8139074919046217.[0m


Early stopping, best iteration is:
[117]	training's auc: 0.855893	valid_1's auc: 0.813907
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.861618	valid_1's auc: 0.808503


[32m[I 2023-05-24 08:58:16,307][0m Trial 63 finished with value: 0.8110534110980278 and parameters: {'num_leaves': 841, 'learning_rate': 0.09925755667946799, 'feature_fraction': 0.8965122142342117, 'bagging_fraction': 0.5866984959208124, 'bagging_freq': 8, 'lambda_l1': 0.006336471986325564, 'lambda_l2': 0.018348394183215037}. Best is trial 62 with value: 0.8139074919046217.[0m


Early stopping, best iteration is:
[42]	training's auc: 0.843896	valid_1's auc: 0.811053
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.85601	valid_1's auc: 0.807638
[200]	training's auc: 0.875186	valid_1's auc: 0.805821


[32m[I 2023-05-24 08:58:55,935][0m Trial 64 finished with value: 0.8094642331468943 and parameters: {'num_leaves': 916, 'learning_rate': 0.0694455162964047, 'feature_fraction': 0.9630441497962567, 'bagging_fraction': 0.5427877347050609, 'bagging_freq': 7, 'lambda_l1': 0.04316299231947613, 'lambda_l2': 0.06593188923857922}. Best is trial 62 with value: 0.8139074919046217.[0m


Early stopping, best iteration is:
[115]	training's auc: 0.859711	valid_1's auc: 0.809464
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.856752	valid_1's auc: 0.80917


[32m[I 2023-05-24 08:59:26,698][0m Trial 65 finished with value: 0.8114719789520166 and parameters: {'num_leaves': 837, 'learning_rate': 0.08187616006889804, 'feature_fraction': 0.8747322698641393, 'bagging_fraction': 0.47178864169185486, 'bagging_freq': 7, 'lambda_l1': 0.011035826582540032, 'lambda_l2': 0.031999270886099176}. Best is trial 62 with value: 0.8139074919046217.[0m


Early stopping, best iteration is:
[65]	training's auc: 0.848257	valid_1's auc: 0.811472
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.852666	valid_1's auc: 0.810909


[32m[I 2023-05-24 09:00:00,635][0m Trial 66 finished with value: 0.8113937849573153 and parameters: {'num_leaves': 889, 'learning_rate': 0.06074408464618363, 'feature_fraction': 0.9228146223667848, 'bagging_fraction': 0.6632331522376091, 'bagging_freq': 6, 'lambda_l1': 0.03202376672809282, 'lambda_l2': 0.00793508825881421}. Best is trial 62 with value: 0.8139074919046217.[0m


Early stopping, best iteration is:
[76]	training's auc: 0.847473	valid_1's auc: 0.811394
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.848989	valid_1's auc: 0.813353


[32m[I 2023-05-24 09:00:39,508][0m Trial 67 finished with value: 0.813826998086547 and parameters: {'num_leaves': 926, 'learning_rate': 0.05026408528445307, 'feature_fraction': 0.8375159288364995, 'bagging_fraction': 0.5967089866891111, 'bagging_freq': 6, 'lambda_l1': 0.124670401306131, 'lambda_l2': 0.13910136906347714}. Best is trial 62 with value: 0.8139074919046217.[0m


Early stopping, best iteration is:
[94]	training's auc: 0.848035	valid_1's auc: 0.813827
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.848982	valid_1's auc: 0.811546
[200]	training's auc: 0.864397	valid_1's auc: 0.811056


[32m[I 2023-05-24 09:01:29,320][0m Trial 68 finished with value: 0.8124333051221666 and parameters: {'num_leaves': 919, 'learning_rate': 0.049409451360680826, 'feature_fraction': 0.9302997363684573, 'bagging_fraction': 0.6047337497719444, 'bagging_freq': 7, 'lambda_l1': 0.13513605513382348, 'lambda_l2': 0.004026936154133971}. Best is trial 62 with value: 0.8139074919046217.[0m


Early stopping, best iteration is:
[161]	training's auc: 0.859167	valid_1's auc: 0.812433
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.853894	valid_1's auc: 0.809501


[32m[I 2023-05-24 09:02:05,961][0m Trial 69 finished with value: 0.8099402965852223 and parameters: {'num_leaves': 831, 'learning_rate': 0.07224203226207612, 'feature_fraction': 0.9956961749687533, 'bagging_fraction': 0.5533502123920813, 'bagging_freq': 8, 'lambda_l1': 0.22516825077671887, 'lambda_l2': 0.10362048531116401}. Best is trial 62 with value: 0.8139074919046217.[0m


Early stopping, best iteration is:
[98]	training's auc: 0.853299	valid_1's auc: 0.80994
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.844793	valid_1's auc: 0.810566
[200]	training's auc: 0.858828	valid_1's auc: 0.81038


[32m[I 2023-05-24 09:02:55,158][0m Trial 70 finished with value: 0.8115777708272005 and parameters: {'num_leaves': 936, 'learning_rate': 0.03918466313107381, 'feature_fraction': 0.8319794951076709, 'bagging_fraction': 0.5104482719364949, 'bagging_freq': 5, 'lambda_l1': 0.09917229575401842, 'lambda_l2': 0.022303117058671156}. Best is trial 62 with value: 0.8139074919046217.[0m


Early stopping, best iteration is:
[151]	training's auc: 0.852424	valid_1's auc: 0.811578
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.863128	valid_1's auc: 0.813974


[32m[I 2023-05-24 09:03:29,799][0m Trial 71 finished with value: 0.8147607263762142 and parameters: {'num_leaves': 968, 'learning_rate': 0.08763502043577061, 'feature_fraction': 0.9019125656696226, 'bagging_fraction': 0.6170675368292509, 'bagging_freq': 6, 'lambda_l1': 0.05275656010203414, 'lambda_l2': 0.01144787273227786}. Best is trial 71 with value: 0.8147607263762142.[0m


Early stopping, best iteration is:
[81]	training's auc: 0.857887	valid_1's auc: 0.814761
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.862239	valid_1's auc: 0.807509


[32m[I 2023-05-24 09:03:57,367][0m Trial 72 finished with value: 0.8094757322637621 and parameters: {'num_leaves': 964, 'learning_rate': 0.08821556513732041, 'feature_fraction': 0.8848317129423998, 'bagging_fraction': 0.6120695319839116, 'bagging_freq': 6, 'lambda_l1': 0.07564163738012054, 'lambda_l2': 0.01123635339507627}. Best is trial 71 with value: 0.8147607263762142.[0m


Early stopping, best iteration is:
[41]	training's auc: 0.843777	valid_1's auc: 0.809476
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.853148	valid_1's auc: 0.8096
[200]	training's auc: 0.87028	valid_1's auc: 0.807746


[32m[I 2023-05-24 09:04:38,822][0m Trial 73 finished with value: 0.8106371430674124 and parameters: {'num_leaves': 886, 'learning_rate': 0.06206631086241098, 'feature_fraction': 0.9119954475664043, 'bagging_fraction': 0.5807160080925496, 'bagging_freq': 7, 'lambda_l1': 0.039049924427517015, 'lambda_l2': 0.03153683309268804}. Best is trial 71 with value: 0.8147607263762142.[0m


Early stopping, best iteration is:
[116]	training's auc: 0.856694	valid_1's auc: 0.810637
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.862513	valid_1's auc: 0.812969
[200]	training's auc: 0.883338	valid_1's auc: 0.81161


[32m[I 2023-05-24 09:05:20,460][0m Trial 74 finished with value: 0.8133371357079776 and parameters: {'num_leaves': 972, 'learning_rate': 0.08484967954393172, 'feature_fraction': 0.8517883415471328, 'bagging_fraction': 0.6528266751862122, 'bagging_freq': 5, 'lambda_l1': 0.009117593107068044, 'lambda_l2': 0.008191122598567007}. Best is trial 71 with value: 0.8147607263762142.[0m


Early stopping, best iteration is:
[120]	training's auc: 0.868001	valid_1's auc: 0.813337
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.86351	valid_1's auc: 0.810773


[32m[I 2023-05-24 09:05:47,279][0m Trial 75 finished with value: 0.8115731711804532 and parameters: {'num_leaves': 864, 'learning_rate': 0.09891780205011087, 'feature_fraction': 0.8486286742696364, 'bagging_fraction': 0.6611533065103118, 'bagging_freq': 5, 'lambda_l1': 0.010413352644369647, 'lambda_l2': 0.006377512384623025}. Best is trial 71 with value: 0.8147607263762142.[0m


Early stopping, best iteration is:
[42]	training's auc: 0.844651	valid_1's auc: 0.811573
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.862563	valid_1's auc: 0.808432


[32m[I 2023-05-24 09:06:18,313][0m Trial 76 finished with value: 0.8099471960553429 and parameters: {'num_leaves': 934, 'learning_rate': 0.08517072127299215, 'feature_fraction': 0.9454497639025792, 'bagging_fraction': 0.6121370337477468, 'bagging_freq': 5, 'lambda_l1': 0.0024221666841334915, 'lambda_l2': 0.0025616015823152535}. Best is trial 71 with value: 0.8147607263762142.[0m


Early stopping, best iteration is:
[62]	training's auc: 0.850501	valid_1's auc: 0.809947
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.849254	valid_1's auc: 0.812291


[32m[I 2023-05-24 09:06:55,821][0m Trial 77 finished with value: 0.8124126067118045 and parameters: {'num_leaves': 891, 'learning_rate': 0.052216134045329696, 'feature_fraction': 0.7990018802273104, 'bagging_fraction': 0.7085982412641214, 'bagging_freq': 4, 'lambda_l1': 0.017849633963317416, 'lambda_l2': 0.011398472857481818}. Best is trial 71 with value: 0.8147607263762142.[0m


Early stopping, best iteration is:
[90]	training's auc: 0.8473	valid_1's auc: 0.812413
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.85709	valid_1's auc: 0.810598


[32m[I 2023-05-24 09:07:30,033][0m Trial 78 finished with value: 0.812272317486017 and parameters: {'num_leaves': 966, 'learning_rate': 0.06756359234000273, 'feature_fraction': 0.9733037339999927, 'bagging_fraction': 0.5667436215340285, 'bagging_freq': 5, 'lambda_l1': 0.007660810183818202, 'lambda_l2': 0.0013322091303120199}. Best is trial 71 with value: 0.8147607263762142.[0m


Early stopping, best iteration is:
[78]	training's auc: 0.851021	valid_1's auc: 0.812272
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.845893	valid_1's auc: 0.811235
[200]	training's auc: 0.861145	valid_1's auc: 0.810465


[32m[I 2023-05-24 09:08:17,389][0m Trial 79 finished with value: 0.811934243450103 and parameters: {'num_leaves': 824, 'learning_rate': 0.04529360499158205, 'feature_fraction': 0.8564593792884614, 'bagging_fraction': 0.6896876328497437, 'bagging_freq': 6, 'lambda_l1': 0.39402896197512965, 'lambda_l2': 0.000237509427382134}. Best is trial 71 with value: 0.8147607263762142.[0m


Early stopping, best iteration is:
[147]	training's auc: 0.854001	valid_1's auc: 0.811934
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.852587	valid_1's auc: 0.810603


[32m[I 2023-05-24 09:08:53,721][0m Trial 80 finished with value: 0.8109821165734471 and parameters: {'num_leaves': 857, 'learning_rate': 0.059806709814959, 'feature_fraction': 0.9331999849982173, 'bagging_fraction': 0.7327140783429528, 'bagging_freq': 4, 'lambda_l1': 0.002826291769493588, 'lambda_l2': 0.00452856337427034}. Best is trial 71 with value: 0.8147607263762142.[0m


Early stopping, best iteration is:
[87]	training's auc: 0.849528	valid_1's auc: 0.810982
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.858893	valid_1's auc: 0.812935


[32m[I 2023-05-24 09:09:26,609][0m Trial 81 finished with value: 0.8148113224904328 and parameters: {'num_leaves': 974, 'learning_rate': 0.07496665344106089, 'feature_fraction': 0.9011253799278782, 'bagging_fraction': 0.6220601458405756, 'bagging_freq': 6, 'lambda_l1': 0.05582306915107806, 'lambda_l2': 0.06944996697439584}. Best is trial 81 with value: 0.8148113224904328.[0m


Early stopping, best iteration is:
[67]	training's auc: 0.849665	valid_1's auc: 0.814811
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.863313	valid_1's auc: 0.808301


[32m[I 2023-05-24 09:09:55,276][0m Trial 82 finished with value: 0.8093170444509861 and parameters: {'num_leaves': 971, 'learning_rate': 0.08892390047836912, 'feature_fraction': 0.9044589222017267, 'bagging_fraction': 0.6544190914473066, 'bagging_freq': 6, 'lambda_l1': 0.09861421367075907, 'lambda_l2': 0.04971462828905216}. Best is trial 81 with value: 0.8148113224904328.[0m


Early stopping, best iteration is:
[45]	training's auc: 0.845488	valid_1's auc: 0.809317
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.856787	valid_1's auc: 0.812815
[200]	training's auc: 0.874929	valid_1's auc: 0.8102
Early stopping, best iteration is:
[101]	training's auc: 0.856918	valid_1's auc: 0.8129


[32m[I 2023-05-24 09:10:33,610][0m Trial 83 finished with value: 0.8129001692670003 and parameters: {'num_leaves': 924, 'learning_rate': 0.06982641598358423, 'feature_fraction': 0.8351484451069361, 'bagging_fraction': 0.6277859885099975, 'bagging_freq': 5, 'lambda_l1': 0.030726901198714646, 'lambda_l2': 0.10545409261196531}. Best is trial 81 with value: 0.8148113224904328.[0m


[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.859374	valid_1's auc: 0.811001


[32m[I 2023-05-24 09:11:06,191][0m Trial 84 finished with value: 0.8130289593759199 and parameters: {'num_leaves': 946, 'learning_rate': 0.0766382779073131, 'feature_fraction': 0.8809757197969864, 'bagging_fraction': 0.6703664008492839, 'bagging_freq': 6, 'lambda_l1': 0.01340770284734141, 'lambda_l2': 0.02770566623114721}. Best is trial 81 with value: 0.8148113224904328.[0m


Early stopping, best iteration is:
[65]	training's auc: 0.849658	valid_1's auc: 0.813029
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.847595	valid_1's auc: 0.811258
[200]	training's auc: 0.863227	valid_1's auc: 0.810849


[32m[I 2023-05-24 09:11:49,945][0m Trial 85 finished with value: 0.8116996614659994 and parameters: {'num_leaves': 778, 'learning_rate': 0.054230106248333756, 'feature_fraction': 0.822374538889744, 'bagging_fraction': 0.5672960060254657, 'bagging_freq': 5, 'lambda_l1': 0.006110892122571244, 'lambda_l2': 0.011194779491882752}. Best is trial 81 with value: 0.8148113224904328.[0m


Early stopping, best iteration is:
[141]	training's auc: 0.855	valid_1's auc: 0.8117
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.859616	valid_1's auc: 0.806907


[32m[I 2023-05-24 09:12:23,102][0m Trial 86 finished with value: 0.8093814395054459 and parameters: {'num_leaves': 902, 'learning_rate': 0.08829790010635857, 'feature_fraction': 0.7820997481328502, 'bagging_fraction': 0.610936066721611, 'bagging_freq': 7, 'lambda_l1': 0.02333540467433054, 'lambda_l2': 0.05431036954146245}. Best is trial 81 with value: 0.8148113224904328.[0m


Early stopping, best iteration is:
[75]	training's auc: 0.853314	valid_1's auc: 0.809381
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.855158	valid_1's auc: 0.812436
[200]	training's auc: 0.873222	valid_1's auc: 0.808462


[32m[I 2023-05-24 09:13:02,839][0m Trial 87 finished with value: 0.8127552803944657 and parameters: {'num_leaves': 951, 'learning_rate': 0.061866558478595915, 'feature_fraction': 0.912388248621547, 'bagging_fraction': 0.7003050596413571, 'bagging_freq': 7, 'lambda_l1': 0.0036253517739877235, 'lambda_l2': 1.241389598145953e-05}. Best is trial 81 with value: 0.8148113224904328.[0m


Early stopping, best iteration is:
[102]	training's auc: 0.855586	valid_1's auc: 0.812755
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.859975	valid_1's auc: 0.81072


[32m[I 2023-05-24 09:13:35,238][0m Trial 88 finished with value: 0.8112074992640566 and parameters: {'num_leaves': 978, 'learning_rate': 0.07438462634938021, 'feature_fraction': 0.9702195167216268, 'bagging_fraction': 0.6364878835518716, 'bagging_freq': 6, 'lambda_l1': 0.15127217274424076, 'lambda_l2': 0.0018770985291326442}. Best is trial 81 with value: 0.8148113224904328.[0m


Early stopping, best iteration is:
[63]	training's auc: 0.848606	valid_1's auc: 0.811207
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.847922	valid_1's auc: 0.811909
[200]	training's auc: 0.863877	valid_1's auc: 0.811946


[32m[I 2023-05-24 09:14:20,856][0m Trial 89 finished with value: 0.812568994701207 and parameters: {'num_leaves': 880, 'learning_rate': 0.04694358200017941, 'feature_fraction': 0.8570419687760843, 'bagging_fraction': 0.6525207120888163, 'bagging_freq': 5, 'lambda_l1': 0.0013758273324007404, 'lambda_l2': 0.007708713265252215}. Best is trial 81 with value: 0.8148113224904328.[0m


Early stopping, best iteration is:
[139]	training's auc: 0.85499	valid_1's auc: 0.812569
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.856686	valid_1's auc: 0.809991


[32m[I 2023-05-24 09:14:52,574][0m Trial 90 finished with value: 0.8105014534883721 and parameters: {'num_leaves': 926, 'learning_rate': 0.06728669602829045, 'feature_fraction': 0.9454875383494383, 'bagging_fraction': 0.6783551553168806, 'bagging_freq': 4, 'lambda_l1': 0.060299590219793024, 'lambda_l2': 0.01852158358961704}. Best is trial 81 with value: 0.8148113224904328.[0m


Early stopping, best iteration is:
[58]	training's auc: 0.844837	valid_1's auc: 0.810501
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.862076	valid_1's auc: 0.815763


[32m[I 2023-05-24 09:15:26,431][0m Trial 91 finished with value: 0.8176815020606417 and parameters: {'num_leaves': 998, 'learning_rate': 0.0820671592743757, 'feature_fraction': 0.8930950799841506, 'bagging_fraction': 0.5944132149154198, 'bagging_freq': 6, 'lambda_l1': 0.04604986012546942, 'lambda_l2': 0.01507835805376649}. Best is trial 91 with value: 0.8176815020606417.[0m


Early stopping, best iteration is:
[72]	training's auc: 0.853173	valid_1's auc: 0.817682
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.861565	valid_1's auc: 0.811973


[32m[I 2023-05-24 09:15:57,316][0m Trial 92 finished with value: 0.8130151604356786 and parameters: {'num_leaves': 983, 'learning_rate': 0.08314812850765325, 'feature_fraction': 0.8916456044087423, 'bagging_fraction': 0.5981630738409466, 'bagging_freq': 6, 'lambda_l1': 0.018601467432051714, 'lambda_l2': 0.09439859545237006}. Best is trial 91 with value: 0.8176815020606417.[0m


Early stopping, best iteration is:
[63]	training's auc: 0.85067	valid_1's auc: 0.813015
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.863987	valid_1's auc: 0.808264


[32m[I 2023-05-24 09:16:27,257][0m Trial 93 finished with value: 0.812288416249632 and parameters: {'num_leaves': 953, 'learning_rate': 0.09298563936375362, 'feature_fraction': 0.9252448078342133, 'bagging_fraction': 0.6253291329748496, 'bagging_freq': 5, 'lambda_l1': 0.009235058341542597, 'lambda_l2': 0.0046191404571223225}. Best is trial 91 with value: 0.8176815020606417.[0m


Early stopping, best iteration is:
[56]	training's auc: 0.850214	valid_1's auc: 0.812288
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.85195	valid_1's auc: 0.812162


[32m[I 2023-05-24 09:17:02,167][0m Trial 94 finished with value: 0.8130450581395349 and parameters: {'num_leaves': 906, 'learning_rate': 0.05856320517907372, 'feature_fraction': 0.865911752076411, 'bagging_fraction': 0.5890105200156248, 'bagging_freq': 6, 'lambda_l1': 0.029056630180179416, 'lambda_l2': 0.035140465382642094}. Best is trial 91 with value: 0.8176815020606417.[0m


Early stopping, best iteration is:
[80]	training's auc: 0.847905	valid_1's auc: 0.813045
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.862459	valid_1's auc: 0.807774


[32m[I 2023-05-24 09:17:29,248][0m Trial 95 finished with value: 0.8100437886370326 and parameters: {'num_leaves': 865, 'learning_rate': 0.09978337469466167, 'feature_fraction': 0.8037902432065079, 'bagging_fraction': 0.6797211177579287, 'bagging_freq': 4, 'lambda_l1': 0.04977507745520909, 'lambda_l2': 0.01449657272892797}. Best is trial 91 with value: 0.8176815020606417.[0m


Early stopping, best iteration is:
[44]	training's auc: 0.845488	valid_1's auc: 0.810044
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.857327	valid_1's auc: 0.812052


[32m[I 2023-05-24 09:18:07,720][0m Trial 96 finished with value: 0.8122746173093905 and parameters: {'num_leaves': 981, 'learning_rate': 0.07328149920470728, 'feature_fraction': 0.8222068499102183, 'bagging_fraction': 0.5687250580936088, 'bagging_freq': 6, 'lambda_l1': 0.004976982301809624, 'lambda_l2': 0.000127319189830671}. Best is trial 91 with value: 0.8176815020606417.[0m


Early stopping, best iteration is:
[98]	training's auc: 0.85702	valid_1's auc: 0.812275
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.850487	valid_1's auc: 0.812001


[32m[I 2023-05-24 09:18:44,702][0m Trial 97 finished with value: 0.8127115837503679 and parameters: {'num_leaves': 932, 'learning_rate': 0.05218416441276048, 'feature_fraction': 0.8849794878480689, 'bagging_fraction': 0.6416916097351367, 'bagging_freq': 7, 'lambda_l1': 0.0006658177672186149, 'lambda_l2': 0.0007545731882641202}. Best is trial 91 with value: 0.8176815020606417.[0m


Early stopping, best iteration is:
[87]	training's auc: 0.847928	valid_1's auc: 0.812712
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.855686	valid_1's auc: 0.809115


[32m[I 2023-05-24 09:19:14,181][0m Trial 98 finished with value: 0.8102829702678833 and parameters: {'num_leaves': 954, 'learning_rate': 0.06372475278270784, 'feature_fraction': 0.8481696480130183, 'bagging_fraction': 0.7156361997859011, 'bagging_freq': 5, 'lambda_l1': 0.01196500399380784, 'lambda_l2': 0.002103932775730722}. Best is trial 91 with value: 0.8176815020606417.[0m


Early stopping, best iteration is:
[44]	training's auc: 0.840986	valid_1's auc: 0.810283
[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.855744	valid_1's auc: 0.811191


[32m[I 2023-05-24 09:19:49,731][0m Trial 99 finished with value: 0.8116421658816603 and parameters: {'num_leaves': 741, 'learning_rate': 0.08030394723489734, 'feature_fraction': 0.9085538880916514, 'bagging_fraction': 0.6178771155101888, 'bagging_freq': 3, 'lambda_l1': 0.09110952918569978, 'lambda_l2': 0.18003702154466145}. Best is trial 91 with value: 0.8176815020606417.[0m


Early stopping, best iteration is:
[98]	training's auc: 0.855448	valid_1's auc: 0.811642
Best Trial: score 0.8176815020606417,
params {'num_leaves': 998, 'learning_rate': 0.0820671592743757, 'feature_fraction': 0.8930950799841506, 'bagging_fraction': 0.5944132149154198, 'bagging_freq': 6, 'lambda_l1': 0.04604986012546942, 'lambda_l2': 0.01507835805376649}


In [None]:
LGBMClassifier(bagging_fraction=0.7319382550725074, bagging_freq=2,
               feature_fraction=0.633004609254542,
               lambda_l1=8.898067404913072e-08, lambda_l2=0.04345454720459056,
               learning_rate=0.06524134688369941, num_leaves=626)

In [None]:
final_lgb_model

In [42]:
# FEATURE ENGINEERING
test_df = pd.read_csv(DATA_PATH+'/test_data.csv')
test_df["Timestamp"] = test_df["Timestamp"].apply(convert_time)
test_df = feature_engineering(test_df)
test_df = categorical_label_encoding(test_df, is_train=False) # LGBM을 위한 FE
# test_df.to_csv(DATA_PATH + 'test_featured.csv', index=False)

# Inference
# test_df = pd.read_csv(DATA_PATH+'test_featured.csv')

# LEAVE LAST INTERACTION ONLY
test_df = test_df[test_df['userID'] != test_df['userID'].shift(-1)]

# DROP ANSWERCODE
test_df = test_df.drop(['answerCode'], axis=1)

In [43]:
preds = final_lgb_model.predict(test_df[FEATS])

In [46]:
# # MAKE PREDICTION
# predicts = np.mean(predicts_list, axis=0)

submission = pd.read_csv(DATA_PATH+'/sample_submission.csv')
submission['prediction'] = preds

submission.to_csv(DATA_PATH+'/lgbm_kfold_tune_submission.csv',index=False)

## probability로 출력하게 코드 수정

In [None]:
# FEATURE ENGINEERING
test_df = pd.read_csv(DATA_PATH+'/test_data.csv')
test_df["Timestamp"] = test_df["Timestamp"].apply(convert_time)
test_df = feature_engineering(test_df)
test_df = categorical_label_encoding(test_df, is_train=False) # LGBM을 위한 FE
# test_df.to_csv(DATA_PATH + 'test_featured.csv', index=False)

# Inference
# test_df = pd.read_csv(DATA_PATH+'test_featured.csv')

# LEAVE LAST INTERACTION ONLY
test_df = test_df[test_df['userID'] != test_df['userID'].shift(-1)]

# DROP ANSWERCODE
test_df = test_df.drop(['answerCode'], axis=1)
# # MAKE PREDICTION
# predicts = np.mean(predicts_list, axis=0)
​
submission = pd.read_csv(DATA_PATH+'/sample_submission.csv')
submission['prediction'] = preds
​
submission.to_csv(DATA_PATH+'/lgbm_pro_tune_auc_submission.csv',index=False)


(bagging_fraction=0.5944132149154198, bagging_freq=6,
feature_fraction=0.8930950799841506,
lambda_l1=0.04604986012546942, lambda_l2=0.01507835805376649,
learning_rate=0.0820671592743757, num_leaves=998)

In [54]:
dtrain = lgb.Dataset(train[FEATS], y_train)
dtest = lgb.Dataset(test[FEATS], y_test)
param = {'objective': 'binary',
        'metric': 'auc',
        'boosting_type': 'gbdt',
        'bagging_fraction':0.5944132149154198, 'bagging_freq':6,
        'feature_fraction':0.8930950799841506,
        'lambda_l1':0.04604986012546942, 'lambda_l2':0.01507835805376649,
        'learning_rate':0.0820671592743757, 'num_leaves':998,
        'seed':42}

model = lgb.train(
    param, 
    dtrain,
    valid_sets=[dtrain, dtest],
    verbose_eval=100,
    num_boost_round=1000,
    early_stopping_rounds=100,
)

[LightGBM] [Info] Number of positive: 1188575, number of negative: 624388
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2292
[LightGBM] [Info] Number of data points in the train set: 1812963, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655598 -> initscore=0.643738
[LightGBM] [Info] Start training from score 0.643738
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.862076	valid_1's auc: 0.815763
Early stopping, best iteration is:
[72]	training's auc: 0.853173	valid_1's auc: 0.817682


In [55]:
preds = model.predict(test_df[FEATS])

In [49]:
# # MAKE PREDICTION
# predicts = np.mean(predicts_list, axis=0)

submission = pd.read_csv(DATA_PATH+'/sample_submission.csv')
submission['prediction'] = preds

submission.to_csv(DATA_PATH+'/lgbm_pro_tune_auc_submission.csv',index=False)