In [1]:
import warnings 
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import os
import random
import numpy as np
import yaml
import lightgbm as lgb
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
import numpy as np
from datetime import datetime, timezone, timedelta
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.preprocessing import LabelEncoder
from wandb.lightgbm import wandb_callback, log_summary

In [3]:
#wandb_callback 수정 
from typing import TYPE_CHECKING, Callable
import wandb
from wandb.sdk.lib import telemetry as wb_telemetry

MINIMIZE_METRICS = [
    "l1",
    "l2",
    "rmse",
    "mape",
    "huber",
    "fair",
    "poisson",
    "gamma",
    "binary_logloss",
]

MAXIMIZE_METRICS = ["map", "auc", "average_precision"]
        
def wandb_callback(log_params=True, define_metric=True) -> Callable:
    """Automatically integrates LightGBM with wandb.

    Arguments:
        log_params: (boolean) if True (default) logs params passed to lightgbm.train as W&B config
        define_metric: (boolean) if True (default) capture model performance at the best step, instead of the last step, of training in your `wandb.summary`

    Passing `wandb_callback` to LightGBM will:
      - log params passed to lightgbm.train as W&B config (default).
      - log evaluation metrics collected by LightGBM, such as rmse, accuracy etc to Weights & Biases
      - Capture the best metric in `wandb.summary` when `define_metric=True` (default).

    Use `log_summary` as an extension of this callback.

    Example:
        ```python
        params = {
            'boosting_type': 'gbdt',
            'objective': 'regression',
            .
        }
        gbm = lgb.train(params,
                        lgb_train,
                        num_boost_round=10,
                        valid_sets=lgb_eval,
                        valid_names=('validation'),
                        callbacks=[wandb_callback()])
        ```
    """
    def _define_metric(data: str, metric_name: str) -> None:
    
        """Capture model performance at the best step.
        instead of the last step, of training in your `wandb.summary`
        """
        if "loss" in str.lower(metric_name):
            wandb.define_metric(f"{data}_{metric_name}", summary="min")
        elif str.lower(metric_name) in MINIMIZE_METRICS:
            wandb.define_metric(f"{data}_{metric_name}", summary="min")
        elif str.lower(metric_name) in MAXIMIZE_METRICS:
            wandb.define_metric(f"{data}_{metric_name}", summary="max")
            
    log_params_list: "List[bool]" = [log_params]
    define_metric_list: "List[bool]" = [define_metric]

    def _init(env: "CallbackEnv") -> None:
        with wb_telemetry.context() as tel:
            tel.feature.lightgbm_wandb_callback = True

        wandb.config.update(env.params)
        log_params_list[0] = False

        if define_metric_list[0]:
            for i in range(len(env.evaluation_result_list)):
                data_type = env.evaluation_result_list[i][0]
                metric_name = env.evaluation_result_list[i][1]
                _define_metric(data_type, metric_name)

    def _callback(env: "CallbackEnv") -> None:
        if log_params_list[0]:
            _init(env)
        # eval_results: "Dict[str, Dict[str, List[Any]]]" = {}
        # recorder = lightgbm.record_evaluation(eval_results)
        # recorder(env)
        eval_results = {x[0]:{x[1:][0]:x[1:][1:]} for x in env.evaluation_result_list}

        for validation_key in eval_results.keys():
            for key in eval_results[validation_key].keys():
                 wandb.log(
                     {validation_key + "_" + key: eval_results[validation_key][key][0]},
                     commit=False,
                 )
        for item in eval_results:
            if len(item) == 4:
                wandb.log({f"{item[0]}_{item[1]}": item[2]}, commit=False)

        # Previous log statements use commit=False. This commits them.
        wandb.log({"iteration": env.iteration}, commit=True)

    return _callback

## Training

In [4]:
sweep_config_path = '/data/ephemeral/level2-dkt-recsys-06/code/boost/lgbmsweepconfigv2.yaml'

# 노트북의 이름 설정

os.environ['WANDB_NOTEBOOK_NAME'] = 'LGBM_Train.ipynb'
# YAML 파일 로드
with open(sweep_config_path, 'r') as file:
    sweep_config = yaml.safe_load(file)

# W&B 스위프트 설정
sweep_id = wandb.sweep(sweep=sweep_config, project="lightgbm-sweep")


Create sweep with ID: y7hdc9jx
Sweep URL: https://wandb.ai/boostcamp6-recsys6/lightgbm-sweep/sweeps/y7hdc9jx


In [5]:
X = pd.read_csv('/data/ephemeral/level2-dkt-recsys-06/data/FE_v4_2.csv')
test =  pd.read_csv('/data/ephemeral/level2-dkt-recsys-06/data/FE_Test_v4_2.csv')


test = test[test["userID"] != test["userID"].shift(-1)]
test = test.drop(["answerCode"], axis=1)

# X.shape
print(X.shape)
print(test.shape)

X.head()
test.head()


(2525956, 23)
(744, 22)


Unnamed: 0,userID,assessmentItemID,testId,KnowledgeTag,SolvingTime,CumulativeTime,Month,DayOfWeek,TimeOfDay,problems_cumulative,...,CumulativeUserProblemAnswerRate,CumulativeProblemCount,ProblemAnswerRate,TagAnswerRate,CumulativeUserTagAnswerRate,TestAnswerRate,categorize_solvingTime,categorize_ProblemAnswerRate,categorize_TagAnswerRate,categorize_TestAnswerRate
1035,3,50133008,50133,5289,45,361,10,Monday,Afternoon,1035,...,69,290,52,54,81,66,6,Difficult,Very Difficult,Medium
1706,4,70146008,70146,9080,24,196,12,Sunday,Dawn,670,...,69,28,53,56,66,74,4,Difficult,Difficult,Easy
3023,13,70111008,70111,9660,14,118,12,Sunday,Dawn,1316,...,69,34,31,44,33,41,2,Extremely Difficult,Extremely Difficult,Extremely Difficult
4283,17,90064006,90064,2611,76,456,10,Friday,Dawn,1259,...,81,624,37,51,100,62,7,Extremely Difficult,Very Difficult,Difficult
4670,26,60135007,60135,1422,45,320,10,Friday,Morning,386,...,75,178,35,60,66,67,6,Extremely Difficult,Difficult,Medium


In [6]:
label = X["answerCode"]
g=X["userID"]

In [7]:
feat=[ 'userID','assessmentItemID','testId','KnowledgeTag',
       'SolvingTime','CumulativeTime',
       'Month','DayOfWeek','TimeOfDay',
       #'problems_cumulative',
       #'problems_last7days',
       #'problems_last30days',
       'CumulativeUserProblemAnswerRate','CumulativeProblemCount',
       'ProblemAnswerRate','TagAnswerRate','CumulativeUserTagAnswerRate','TestAnswerRate',
       'categorize_solvingTime',
       'categorize_ProblemAnswerRate','categorize_TagAnswerRate','categorize_TestAnswerRate'
]

In [8]:
default_config = {
    "num_leaves": 10,  # 최소값 10
    "learning_rate": 0.0001,  # 최소값 0.0001
    "max_depth": -1,  # -1 (깊이 제한 없음)
    "min_data_in_leaf": 20,  # 최소값 20
    "feature_fraction": 0.6,  # 최소값 0.6
    "bagging_fraction": 0.6,  # 최소값 0.6
    "bagging_freq": 0,  # 최소값 0
    "lambda_l1": 0.0,  # 최소값 0.0
    "lambda_l2": 0.0,  # 최소값 0.0
    "cat_smooth": 10,  # 최소값 10
}




test_feat = test[feat]

# LabelEncoder 적용

label_encoders = {}
for column in [
    "DayOfWeek",
    "TimeOfDay",
    "categorize_ProblemAnswerRate",
    "categorize_TagAnswerRate",
    "categorize_TestAnswerRate",
]:
    le = LabelEncoder()
    X[column] = le.fit_transform(X[column])
    # 테스트 데이터에 대해서는 transform만 적용
    test_feat[column] = le.transform(test_feat[column])


def train():
    auc = 0
    acc = 0
    test_preds = np.zeros(len(test_feat))
    
    # userID별 마지막 인덱스 찾기
    last_indices = X.groupby("userID").tail(1).index

    # 검증 데이터셋 생성
    X_valid = X.loc[last_indices]
    y_valid = X_valid["answerCode"]

    # 학습 데이터셋 생성
    X_train = X.drop(last_indices)
    y_train = X_train["answerCode"]

    lgb_train = lgb.Dataset(
        X_train[feat],
        y_train,
        categorical_feature=[
            "userID",
            "assessmentItemID",
            "testId,KnowledgeTag",
            "Month",
            "categorize_solvingTime",
            "categorize_ProblemAnswerRate",
            "categorize_TagAnswerRate",
            "categorize_TestAnswerRate"
        ],
    )
    lgb_valid = lgb.Dataset(
        X_valid[feat],
        y_valid,
        categorical_feature=[
            "userID",
            "assessmentItemID",
            "testId,KnowledgeTag",
            "Month",
            "categorize_solvingTime",
            "categorize_ProblemAnswerRate",
            "categorize_TagAnswerRate",
            "categorize_TestAnswerRate"
        ],
    )

    wandb.init(project=f"lightgbm-sweep", config=default_config)
    wandb.run.name = f"nofoldlgbm"
    current_params = {
        "objective": "binary",
        "metric": ["auc"],
        "device": "cpu",
        "num_leaves": wandb.config.num_leaves,
        "learning_rate": wandb.config.learning_rate,
        "max_depth": wandb.config.max_depth,
        "min_data_in_leaf": wandb.config.min_data_in_leaf,
        "feature_fraction": wandb.config.feature_fraction,
        "bagging_fraction": wandb.config.bagging_fraction,
        "bagging_freq": wandb.config.bagging_freq,
        "lambda_l1": wandb.config.lambda_l1,
        "lambda_l2": wandb.config.lambda_l2,
        "cat_smooth": wandb.config.cat_smooth,
    }
    model = lgb.train(
        current_params,
        lgb_train,
        valid_sets=[lgb_train, lgb_valid],
        num_boost_round=50000,
        callbacks=[
            wandb_callback(log_params=True, define_metric=True),
            lgb.early_stopping(100),
        ],
        categorical_feature=["KnowledgeTag"],
    )
    preds = model.predict(X_valid[feat])
    acc = accuracy_score(y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(y_valid, preds)
    test_preds += model.predict(test_feat)
    print(f"VALID AUC : {auc} ACC : {acc}\n")
    wandb.log({"auc": auc, "accuracy": acc})
    output_dir = "output/"
    write_path = os.path.join(
        output_dir,
        f"auc:{auc} acc:{acc}" + "sweep" + " lgbm.csv",
    )
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    with open(write_path, "w", encoding="utf8") as w:
        print("writing prediction : {}".format(write_path))
        w.write("id,prediction\n")
        for id, p in enumerate(test_preds):
            w.write("{},{}\n".format(id, p))

In [9]:

wandb.agent(sweep_id, train)

[34m[1mwandb[0m: Agent Starting Run: r53xi2fm with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.880587528028289
[34m[1mwandb[0m: 	bagging_freq: 29
[34m[1mwandb[0m: 	feature_fraction: 0.615865952668752
[34m[1mwandb[0m: 	lambda_l1: 4.456332884445666
[34m[1mwandb[0m: 	lambda_l2: 11.096611731679277
[34m[1mwandb[0m: 	learning_rate: 0.23963393665605
[34m[1mwandb[0m: 	max_depth: -1
[34m[1mwandb[0m: 	min_data_in_leaf: 51
[34m[1mwandb[0m: 	num_leaves: 39
[34m[1mwandb[0m: Currently logged in as: [33mwooksbaby[0m ([33mboostcamp6-recsys6[0m). Use [1m`wandb login --relogin`[0m to force relogin


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.020392 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[687]	training's auc: 0.854856	valid_1's auc: 0.808986
VALID AUC : 0.8089863854318649 ACC : 0.7280300994356356

writing prediction : output/auc:0.8089863854318649 acc:0.7280300994356356sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇████████
valid_1_auc,▁▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇█████████████████

0,1
accuracy,0.72803
auc,0.80899
iteration,786.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: dwlb5829 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9001405883645326
[34m[1mwandb[0m: 	bagging_freq: 23
[34m[1mwandb[0m: 	feature_fraction: 0.8129907224559122
[34m[1mwandb[0m: 	lambda_l1: 2.489664829279511
[34m[1mwandb[0m: 	lambda_l2: 2.3634891424899616
[34m[1mwandb[0m: 	learning_rate: 0.3056182377169366
[34m[1mwandb[0m: 	max_depth: 16
[34m[1mwandb[0m: 	min_data_in_leaf: 73
[34m[1mwandb[0m: 	num_leaves: 24


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.025435 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[287]	training's auc: 0.848389	valid_1's auc: 0.805148
VALID AUC : 0.8051483101027251 ACC : 0.7303144316044075

writing prediction : output/auc:0.8051483101027251 acc:0.7303144316044075sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████
valid_1_auc,▁▅▆▇▇▇▇▇████████████████████████████████

0,1
accuracy,0.73031
auc,0.80515
iteration,386.0


[34m[1mwandb[0m: Agent Starting Run: sgcbfdxq with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9418535713702882
[34m[1mwandb[0m: 	bagging_freq: 21
[34m[1mwandb[0m: 	feature_fraction: 0.8615456713621384
[34m[1mwandb[0m: 	lambda_l1: 6.736821599801911
[34m[1mwandb[0m: 	lambda_l2: 13.847732338890513
[34m[1mwandb[0m: 	learning_rate: 0.3090160383156925
[34m[1mwandb[0m: 	max_depth: 0
[34m[1mwandb[0m: 	min_data_in_leaf: 38
[34m[1mwandb[0m: 	num_leaves: 35


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.024927 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[96]	training's auc: 0.847393	valid_1's auc: 0.80626
VALID AUC : 0.8062603133412707 ACC : 0.7284332168771835

writing prediction : output/auc:0.8062603133412707 acc:0.7284332168771835sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████
valid_1_auc,▁▅▆▇▇▇▇▇▇███████████████████████████████

0,1
accuracy,0.72843
auc,0.80626
iteration,195.0


[34m[1mwandb[0m: Agent Starting Run: 7dn5leyp with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.8926282723738113
[34m[1mwandb[0m: 	bagging_freq: 23
[34m[1mwandb[0m: 	feature_fraction: 0.87516182918595
[34m[1mwandb[0m: 	lambda_l1: 10.72187235078246
[34m[1mwandb[0m: 	lambda_l2: 3.7800212924799776
[34m[1mwandb[0m: 	learning_rate: 0.02039638119924408
[34m[1mwandb[0m: 	max_depth: 27
[34m[1mwandb[0m: 	min_data_in_leaf: 116
[34m[1mwandb[0m: 	num_leaves: 50


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.128400 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[710]	training's auc: 0.847227	valid_1's auc: 0.807065
VALID AUC : 0.8070649265194649 ACC : 0.7312550389680194

writing prediction : output/auc:0.8070649265194649 acc:0.7312550389680194sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▃▃▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇████████████
valid_1_auc,▁▂▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇████████████████████

0,1
accuracy,0.73126
auc,0.80706
iteration,809.0


[34m[1mwandb[0m: Agent Starting Run: zs8j41jy with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.547575914268917
[34m[1mwandb[0m: 	bagging_freq: 22
[34m[1mwandb[0m: 	feature_fraction: 0.6642163207442668
[34m[1mwandb[0m: 	lambda_l1: 3.176691699030465
[34m[1mwandb[0m: 	lambda_l2: 7.836268248324185
[34m[1mwandb[0m: 	learning_rate: 0.20016734229385116
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	min_data_in_leaf: 80
[34m[1mwandb[0m: 	num_leaves: 59


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.150687 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[308]	training's auc: 0.849866	valid_1's auc: 0.80744
VALID AUC : 0.8074398224148427 ACC : 0.7299113141628595

writing prediction : output/auc:0.8074398224148427 acc:0.7299113141628595sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████
valid_1_auc,▁▆▇▇▇███████████████████████████████████

0,1
accuracy,0.72991
auc,0.80744
iteration,407.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gm1y74xs with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.7165724178632369
[34m[1mwandb[0m: 	bagging_freq: 18
[34m[1mwandb[0m: 	feature_fraction: 0.7290837383867151
[34m[1mwandb[0m: 	lambda_l1: 3.825251068092436
[34m[1mwandb[0m: 	lambda_l2: 4.261209061442401
[34m[1mwandb[0m: 	learning_rate: 0.27803930944309274
[34m[1mwandb[0m: 	max_depth: 39
[34m[1mwandb[0m: 	min_data_in_leaf: 99
[34m[1mwandb[0m: 	num_leaves: 34


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.024687 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[253]	training's auc: 0.848697	valid_1's auc: 0.806762
VALID AUC : 0.8067623066787195 ACC : 0.7325987637731792

writing prediction : output/auc:0.8067623066787195 acc:0.7325987637731792sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▄▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████
valid_1_auc,▁▆▇▇▇▇▇▇████████████████████████████████

0,1
accuracy,0.7326
auc,0.80676
iteration,352.0


[34m[1mwandb[0m: Agent Starting Run: j5fcg32p with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.6816823472188052
[34m[1mwandb[0m: 	bagging_freq: 26
[34m[1mwandb[0m: 	feature_fraction: 0.6676596484358364
[34m[1mwandb[0m: 	lambda_l1: 12.70131043379327
[34m[1mwandb[0m: 	lambda_l2: 13.014986228627489
[34m[1mwandb[0m: 	learning_rate: 0.19644823776771503
[34m[1mwandb[0m: 	max_depth: 22
[34m[1mwandb[0m: 	min_data_in_leaf: 57
[34m[1mwandb[0m: 	num_leaves: 16


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023117 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[421]	training's auc: 0.844304	valid_1's auc: 0.80633
VALID AUC : 0.8063300958720183 ACC : 0.7287019618382156

writing prediction : output/auc:0.8063300958720183 acc:0.7287019618382156sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
training_auc,▁▃▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████████████
valid_1_auc,▁▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████████

0,1
accuracy,0.7287
auc,0.80633
iteration,520.0


[34m[1mwandb[0m: Agent Starting Run: n37j08gp with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9507767454900156
[34m[1mwandb[0m: 	bagging_freq: 25
[34m[1mwandb[0m: 	feature_fraction: 0.5021988759684439
[34m[1mwandb[0m: 	lambda_l1: 4.914025542857314
[34m[1mwandb[0m: 	lambda_l2: 13.409398654269973
[34m[1mwandb[0m: 	learning_rate: 0.6470444839003195
[34m[1mwandb[0m: 	max_depth: 17
[34m[1mwandb[0m: 	min_data_in_leaf: 108
[34m[1mwandb[0m: 	num_leaves: 17


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.015961 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[510]	training's auc: 0.850685	valid_1's auc: 0.805267
VALID AUC : 0.8052672764886499 ACC : 0.7274926095135716

writing prediction : output/auc:0.8052672764886499 acc:0.7274926095135716sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▃▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇███████
valid_1_auc,▁▄▅▆▆▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██▇██████▇▇▇▇

0,1
accuracy,0.72749
auc,0.80527
iteration,609.0


[34m[1mwandb[0m: Agent Starting Run: je9vrddo with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.6447725045909287
[34m[1mwandb[0m: 	bagging_freq: 6
[34m[1mwandb[0m: 	feature_fraction: 0.7226302704297018
[34m[1mwandb[0m: 	lambda_l1: 7.498896336998261
[34m[1mwandb[0m: 	lambda_l2: 8.926576475617411
[34m[1mwandb[0m: 	learning_rate: 0.34247145345848434
[34m[1mwandb[0m: 	max_depth: 33
[34m[1mwandb[0m: 	min_data_in_leaf: 84
[34m[1mwandb[0m: 	num_leaves: 37


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.025569 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[85]	training's auc: 0.845098	valid_1's auc: 0.80611
VALID AUC : 0.8061098707335534 ACC : 0.7300456866433754

writing prediction : output/auc:0.8061098707335534 acc:0.7300456866433754sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████████
valid_1_auc,▁▆▇▇████████████████████████████████████

0,1
accuracy,0.73005
auc,0.80611
iteration,184.0


[34m[1mwandb[0m: Agent Starting Run: 4yiku3zf with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9797524028875516
[34m[1mwandb[0m: 	bagging_freq: 21
[34m[1mwandb[0m: 	feature_fraction: 0.7229262822476921
[34m[1mwandb[0m: 	lambda_l1: 12.306302940329957
[34m[1mwandb[0m: 	lambda_l2: 4.299449866913359
[34m[1mwandb[0m: 	learning_rate: 0.281041464391539
[34m[1mwandb[0m: 	max_depth: 45
[34m[1mwandb[0m: 	min_data_in_leaf: 75
[34m[1mwandb[0m: 	num_leaves: 34


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.024472 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[506]	training's auc: 0.85313	valid_1's auc: 0.807298
VALID AUC : 0.8072978722435448 ACC : 0.7277613544746036

writing prediction : output/auc:0.8072978722435448 acc:0.7277613544746036sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
training_auc,▁▃▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇████████
valid_1_auc,▁▄▆▇▇▇▇▇▇▇▇▇▇▇█████▇████████████████████

0,1
accuracy,0.72776
auc,0.8073
iteration,605.0


[34m[1mwandb[0m: Agent Starting Run: mmnsmheh with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.6567232393736742
[34m[1mwandb[0m: 	bagging_freq: 14
[34m[1mwandb[0m: 	feature_fraction: 0.6412789138481435
[34m[1mwandb[0m: 	lambda_l1: 2.5392951292665806
[34m[1mwandb[0m: 	lambda_l2: 12.238108665312753
[34m[1mwandb[0m: 	learning_rate: 0.4786631702715863
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	min_data_in_leaf: 84
[34m[1mwandb[0m: 	num_leaves: 24


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.022470 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[128]	training's auc: 0.844279	valid_1's auc: 0.804108
VALID AUC : 0.804108113124457 ACC : 0.7265520021499597

writing prediction : output/auc:0.804108113124457 acc:0.7265520021499597sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████████
valid_1_auc,▁▅▇▇▇▇▇█████████████████████████████████

0,1
accuracy,0.72655
auc,0.80411
iteration,227.0


[34m[1mwandb[0m: Agent Starting Run: 6f3lhvq2 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9335540640367408
[34m[1mwandb[0m: 	bagging_freq: 10
[34m[1mwandb[0m: 	feature_fraction: 0.7602026615884261
[34m[1mwandb[0m: 	lambda_l1: 10.484778746788749
[34m[1mwandb[0m: 	lambda_l2: 10.309184666290449
[34m[1mwandb[0m: 	learning_rate: 0.4548557573727584
[34m[1mwandb[0m: 	max_depth: -1
[34m[1mwandb[0m: 	min_data_in_leaf: 89
[34m[1mwandb[0m: 	num_leaves: 54


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.174070 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[57]	training's auc: 0.84799	valid_1's auc: 0.805874
VALID AUC : 0.8058739255532064 ACC : 0.7274926095135716

writing prediction : output/auc:0.8058739255532064 acc:0.7274926095135716sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▄▄▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇████████████
valid_1_auc,▁▄▅▆▆▇▇▇▇▇▇█████████████████████████████

0,1
accuracy,0.72749
auc,0.80587
iteration,156.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: eaolfd35 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.7428447494382879
[34m[1mwandb[0m: 	bagging_freq: 7
[34m[1mwandb[0m: 	feature_fraction: 0.6297040862422343
[34m[1mwandb[0m: 	lambda_l1: 3.212990012970746
[34m[1mwandb[0m: 	lambda_l2: 1.1989157923123015
[34m[1mwandb[0m: 	learning_rate: 0.3784326165976891
[34m[1mwandb[0m: 	max_depth: 17
[34m[1mwandb[0m: 	min_data_in_leaf: 54
[34m[1mwandb[0m: 	num_leaves: 40


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.021932 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[99]	training's auc: 0.846045	valid_1's auc: 0.805889
VALID AUC : 0.8058892119387611 ACC : 0.7282988443966676

writing prediction : output/auc:0.8058892119387611 acc:0.7282988443966676sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▄▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████
valid_1_auc,▁▅▇▇▇▇██████████████████████████████████

0,1
accuracy,0.7283
auc,0.80589
iteration,198.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0oyjv2fy with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.5030605679333731
[34m[1mwandb[0m: 	bagging_freq: 19
[34m[1mwandb[0m: 	feature_fraction: 0.6009189179985628
[34m[1mwandb[0m: 	lambda_l1: 4.000587736268685
[34m[1mwandb[0m: 	lambda_l2: 6.027254920095588
[34m[1mwandb[0m: 	learning_rate: 0.2676837422212581
[34m[1mwandb[0m: 	max_depth: 17
[34m[1mwandb[0m: 	min_data_in_leaf: 60
[34m[1mwandb[0m: 	num_leaves: 32


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.018877 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[296]	training's auc: 0.846769	valid_1's auc: 0.805158
VALID AUC : 0.8051581396461552 ACC : 0.7315237839290514

writing prediction : output/auc:0.8051581396461552 acc:0.7315237839290514sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████
valid_1_auc,▁▆▇▇████████████████████████████████████

0,1
accuracy,0.73152
auc,0.80516
iteration,395.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xhx6bhbi with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.6713223503642771
[34m[1mwandb[0m: 	bagging_freq: 19
[34m[1mwandb[0m: 	feature_fraction: 0.8360060549054226
[34m[1mwandb[0m: 	lambda_l1: 5.063977784555731
[34m[1mwandb[0m: 	lambda_l2: 6.7045430145849485
[34m[1mwandb[0m: 	learning_rate: 0.4523916658979612
[34m[1mwandb[0m: 	max_depth: 49
[34m[1mwandb[0m: 	min_data_in_leaf: 105
[34m[1mwandb[0m: 	num_leaves: 15


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.024626 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[265]	training's auc: 0.845487	valid_1's auc: 0.803425
VALID AUC : 0.8034253935124002 ACC : 0.7287019618382156

writing prediction : output/auc:0.8034253935124002 acc:0.7287019618382156sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
training_auc,▁▄▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████
valid_1_auc,▁▆▆▇▇▇▇▇▇██████████████████████▇▇▇▇▇▇▇▇▇

0,1
accuracy,0.7287
auc,0.80343
iteration,364.0


[34m[1mwandb[0m: Agent Starting Run: iadw4m70 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.7302477028310428
[34m[1mwandb[0m: 	bagging_freq: 26
[34m[1mwandb[0m: 	feature_fraction: 0.6274958008739928
[34m[1mwandb[0m: 	lambda_l1: 10.398299830609988
[34m[1mwandb[0m: 	lambda_l2: 9.77124081761937
[34m[1mwandb[0m: 	learning_rate: 0.23349664029318987
[34m[1mwandb[0m: 	max_depth: 9
[34m[1mwandb[0m: 	min_data_in_leaf: 119
[34m[1mwandb[0m: 	num_leaves: 25


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.021491 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[477]	training's auc: 0.847731	valid_1's auc: 0.807312
VALID AUC : 0.8073115324178703 ACC : 0.7301800591238914

writing prediction : output/auc:0.8073115324178703 acc:0.7301800591238914sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████████
valid_1_auc,▁▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████████

0,1
accuracy,0.73018
auc,0.80731
iteration,576.0


[34m[1mwandb[0m: Agent Starting Run: 3p2h84bc with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.8438238608946947
[34m[1mwandb[0m: 	bagging_freq: 30
[34m[1mwandb[0m: 	feature_fraction: 0.9299413366542824
[34m[1mwandb[0m: 	lambda_l1: 11.264065998147746
[34m[1mwandb[0m: 	lambda_l2: 4.255382447038536
[34m[1mwandb[0m: 	learning_rate: 0.421092153334178
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	min_data_in_leaf: 31
[34m[1mwandb[0m: 	num_leaves: 12


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.024762 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2321
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
