In [5]:
import warnings 
warnings.filterwarnings("ignore")

In [6]:
import pandas as pd
import os
import random
import numpy as np
import yaml
import lightgbm as lgb
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
import numpy as np
from datetime import datetime, timezone, timedelta
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.preprocessing import LabelEncoder
from wandb.lightgbm import wandb_callback, log_summary

In [7]:
#wandb_callback 수정 
from typing import TYPE_CHECKING, Callable
import wandb
from wandb.sdk.lib import telemetry as wb_telemetry

MINIMIZE_METRICS = [
    "l1",
    "l2",
    "rmse",
    "mape",
    "huber",
    "fair",
    "poisson",
    "gamma",
    "binary_logloss",
]

MAXIMIZE_METRICS = ["map", "auc", "average_precision"]

# def _define_metric(data: str, metric_name: str) -> None:
    
#     """Capture model performance at the best step.

#     instead of the last step, of training in your `wandb.summary`
#     """
#     if "loss" in str.lower(metric_name):
#         wandb.define_metric(f"{data}_{metric_name}", summary="min")
#     elif str.lower(metric_name) in MINIMIZE_METRICS:
#         wandb.define_metric(f"{data}_{metric_name}", summary="min")
#     elif str.lower(metric_name) in MAXIMIZE_METRICS:
#         wandb.define_metric(f"{data}_{metric_name}", summary="max")
        
def wandb_callback(log_params=True, define_metric=True) -> Callable:
    """Automatically integrates LightGBM with wandb.

    Arguments:
        log_params: (boolean) if True (default) logs params passed to lightgbm.train as W&B config
        define_metric: (boolean) if True (default) capture model performance at the best step, instead of the last step, of training in your `wandb.summary`

    Passing `wandb_callback` to LightGBM will:
      - log params passed to lightgbm.train as W&B config (default).
      - log evaluation metrics collected by LightGBM, such as rmse, accuracy etc to Weights & Biases
      - Capture the best metric in `wandb.summary` when `define_metric=True` (default).

    Use `log_summary` as an extension of this callback.

    Example:
        ```python
        params = {
            'boosting_type': 'gbdt',
            'objective': 'regression',
            .
        }
        gbm = lgb.train(params,
                        lgb_train,
                        num_boost_round=10,
                        valid_sets=lgb_eval,
                        valid_names=('validation'),
                        callbacks=[wandb_callback()])
        ```
    """
    def _define_metric(data: str, metric_name: str) -> None:
    
        """Capture model performance at the best step.
        instead of the last step, of training in your `wandb.summary`
        """
        if "loss" in str.lower(metric_name):
            wandb.define_metric(f"{data}_{metric_name}", summary="min")
        elif str.lower(metric_name) in MINIMIZE_METRICS:
            wandb.define_metric(f"{data}_{metric_name}", summary="min")
        elif str.lower(metric_name) in MAXIMIZE_METRICS:
            wandb.define_metric(f"{data}_{metric_name}", summary="max")
            
    log_params_list: "List[bool]" = [log_params]
    define_metric_list: "List[bool]" = [define_metric]

    def _init(env: "CallbackEnv") -> None:
        with wb_telemetry.context() as tel:
            tel.feature.lightgbm_wandb_callback = True

        wandb.config.update(env.params)
        log_params_list[0] = False

        if define_metric_list[0]:
            for i in range(len(env.evaluation_result_list)):
                data_type = env.evaluation_result_list[i][0]
                metric_name = env.evaluation_result_list[i][1]
                _define_metric(data_type, metric_name)

    def _callback(env: "CallbackEnv") -> None:
        if log_params_list[0]:
            _init(env)
        # eval_results: "Dict[str, Dict[str, List[Any]]]" = {}
        # recorder = lightgbm.record_evaluation(eval_results)
        # recorder(env)
        eval_results = {x[0]:{x[1:][0]:x[1:][1:]} for x in env.evaluation_result_list}

        for validation_key in eval_results.keys():
            for key in eval_results[validation_key].keys():
                 wandb.log(
                     {validation_key + "_" + key: eval_results[validation_key][key][0]},
                     commit=False,
                 )
        for item in eval_results:
            if len(item) == 4:
                wandb.log({f"{item[0]}_{item[1]}": item[2]}, commit=False)

        # Previous log statements use commit=False. This commits them.
        wandb.log({"iteration": env.iteration}, commit=True)

    return _callback

## Training

In [8]:
sweep_config_path = '/data/ephemeral/level2-dkt-recsys-06/code/boost/lgbmsweepconfigv2.yaml'

# 노트북의 이름 설정

os.environ['WANDB_NOTEBOOK_NAME'] = 'LGBM_Train.ipynb'
# YAML 파일 로드
with open(sweep_config_path, 'r') as file:
    sweep_config = yaml.safe_load(file)

# W&B 스위프트 설정
sweep_id = wandb.sweep(sweep=sweep_config, project="lightgbm-sweep")


Create sweep with ID: 5cceiqt8
Sweep URL: https://wandb.ai/boostcamp6-recsys6/lightgbm-sweep/sweeps/5cceiqt8


In [9]:
X = pd.read_csv('/data/ephemeral/level2-dkt-recsys-06/data/FE_v4_2.csv')
test =  pd.read_csv('/data/ephemeral/level2-dkt-recsys-06/data/FE_Test_v4_2.csv')
# 유저 최근데이터만 사용
#X = X.groupby('userID').tail(10)


#X = X[X['answerCode'] != -1]


test = test[test["userID"] != test["userID"].shift(-1)]
test = test.drop(["answerCode"], axis=1)

# X.shape
print(X.shape)
print(test.shape)

X.head()
test.head()


(2525956, 23)
(744, 22)


Unnamed: 0,userID,assessmentItemID,testId,KnowledgeTag,SolvingTime,CumulativeTime,Month,DayOfWeek,TimeOfDay,problems_cumulative,...,CumulativeUserProblemAnswerRate,CumulativeProblemCount,ProblemAnswerRate,TagAnswerRate,CumulativeUserTagAnswerRate,TestAnswerRate,categorize_solvingTime,categorize_ProblemAnswerRate,categorize_TagAnswerRate,categorize_TestAnswerRate
1035,3,50133008,50133,5289,45,361,10,Monday,Afternoon,1035,...,69,290,52,54,81,66,6,Difficult,Very Difficult,Medium
1706,4,70146008,70146,9080,24,196,12,Sunday,Dawn,670,...,69,28,53,56,66,74,4,Difficult,Difficult,Easy
3023,13,70111008,70111,9660,14,118,12,Sunday,Dawn,1316,...,69,34,31,44,33,41,2,Extremely Difficult,Extremely Difficult,Extremely Difficult
4283,17,90064006,90064,2611,76,456,10,Friday,Dawn,1259,...,81,624,37,51,100,62,7,Extremely Difficult,Very Difficult,Difficult
4670,26,60135007,60135,1422,45,320,10,Friday,Morning,386,...,75,178,35,60,66,67,6,Extremely Difficult,Difficult,Medium


In [10]:
label = X["answerCode"]
g=X["userID"]

In [11]:
feat=[ 'userID','assessmentItemID','testId','KnowledgeTag',
       'SolvingTime','CumulativeTime',
       'Month','DayOfWeek','TimeOfDay',
       'problems_cumulative','problems_last7days','problems_last30days',
       'CumulativeUserProblemAnswerRate','CumulativeProblemCount',
       'ProblemAnswerRate','TagAnswerRate','CumulativeUserTagAnswerRate','TestAnswerRate',
       'categorize_solvingTime','categorize_ProblemAnswerRate','categorize_TagAnswerRate','categorize_TestAnswerRate'
]

In [12]:
default_config = {
    "num_leaves": 10,  # 최소값 10
    "learning_rate": 0.0001,  # 최소값 0.0001
    "max_depth": -1,  # 최소값 -1 (깊이 제한 없음)
    "min_data_in_leaf": 20,  # 최소값 20
    "feature_fraction": 0.6,  # 최소값 0.6
    "bagging_fraction": 0.6,  # 최소값 0.6
    "bagging_freq": 0,  # 최소값 0
    "lambda_l1": 0.0,  # 최소값 0.0
    "lambda_l2": 0.0,  # 최소값 0.0
    "cat_smooth": 10,  # 최소값 10
}

# LabelEncoder 적용


label_encoders = {}
for column in [
    "DayOfWeek",
    "TimeOfDay",
    "categorize_ProblemAnswerRate",
    "categorize_TagAnswerRate",
    "categorize_TestAnswerRate",
]:
    le = LabelEncoder()
    X[column] = le.fit_transform(X[column])
    # 테스트 데이터에 대해서는 transform만 적용
    test[column] = le.transform(test[column])


# n_fold = 10

# sfcv = StratifiedGroupKFold(n_splits=n_fold)


def train():
    # HPPJ = 1
    auc = 0
    acc = 0
    test_preds = np.zeros(len(test))
    # X = X.drop(columns=['Timestamp'])
    # test = test.drop(columns=['Timestamp'])

    # userID별 마지막 인덱스 찾기
    last_indices = X.groupby("userID").tail(1).index

    # 검증 데이터셋 생성
    X_valid = X.loc[last_indices]
    y_valid = X_valid["answerCode"]

    # 학습 데이터셋 생성
    X_train = X.drop(last_indices)
    y_train = X_train["answerCode"]

    # # 학습 및 검증 데이터셋 크기 확인
    # print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    # print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")

    # print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    # print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
    

    lgb_train = lgb.Dataset(
        X_train[feat], y_train, categorical_feature=["KnowledgeTag", "Month"]
    )
    lgb_valid = lgb.Dataset(
        X_valid[feat], y_valid, categorical_feature=["KnowledgeTag", "Month"]
    )

    wandb.init(project=f"lightgbm-sweep", config=default_config)
    wandb.run.name = f"nofoldlgbm"
    current_params = {
        "objective": "binary",
        "metric": ["auc"],
        "device": "cpu",
        "num_leaves": wandb.config.num_leaves,
        "learning_rate": wandb.config.learning_rate,
        "max_depth": wandb.config.max_depth,
        "min_data_in_leaf": wandb.config.min_data_in_leaf,
        "feature_fraction": wandb.config.feature_fraction,
        "bagging_fraction": wandb.config.bagging_fraction,
        "bagging_freq": wandb.config.bagging_freq,
        "lambda_l1": wandb.config.lambda_l1,
        "lambda_l2": wandb.config.lambda_l2,
        "cat_smooth": wandb.config.cat_smooth,
    }
    model = lgb.train(
        current_params,
        lgb_train,
        valid_sets=[lgb_train, lgb_valid],
        num_boost_round=50000,
        callbacks=[
            wandb_callback(log_params=True, define_metric=True),
            lgb.early_stopping(100),
        ],
        categorical_feature=["KnowledgeTag"],
    )
    preds = model.predict(X_valid[feat])
    acc = accuracy_score(y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(y_valid, preds)
    test_preds += model.predict(test)
    print(f"VALID AUC : {auc} ACC : {acc}\n")
    wandb.log({"auc": auc, "accuracy": acc})
    output_dir = "output/"
    write_path = os.path.join(
        output_dir,
        # datetime.now(timezone(timedelta(hours=9))).strftime("%Y-%m-%d %H:%M:%S")
        f"auc:{auc} acc:{acc}"
        + "sweep"
        + " lgbm.csv",
    )
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    with open(write_path, "w", encoding="utf8") as w:
        print("writing prediction : {}".format(write_path))
        w.write("id,prediction\n")
        for id, p in enumerate(test_preds):
            w.write("{},{}\n".format(id, p))

In [13]:

wandb.agent(sweep_id, train)

[34m[1mwandb[0m: Agent Starting Run: omje2ut4 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.7902723725867474
[34m[1mwandb[0m: 	bagging_freq: 10
[34m[1mwandb[0m: 	feature_fraction: 0.8957465629555941
[34m[1mwandb[0m: 	lambda_l1: 13.224559421254526
[34m[1mwandb[0m: 	lambda_l2: 10.443495971149565
[34m[1mwandb[0m: 	learning_rate: 0.395003039981994
[34m[1mwandb[0m: 	max_depth: 16
[34m[1mwandb[0m: 	min_data_in_leaf: 75
[34m[1mwandb[0m: 	num_leaves: 46
[34m[1mwandb[0m: Currently logged in as: [33mwooksbaby[0m ([33mboostcamp6-recsys6[0m). Use [1m`wandb login --relogin`[0m to force relogin


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.171627 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[64]	training's auc: 0.849384	valid_1's auc: 0.807005
VALID AUC : 0.8070053710504478 ACC : 0.7311206664875034

writing prediction : output/auc:0.8070053710504478 acc:0.7311206664875034sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████
valid_1_auc,▁▄▅▆▇▇▇▇▇████████████████▇▇█████████████

0,1
accuracy,0.73112
auc,0.80701
iteration,163.0


[34m[1mwandb[0m: Agent Starting Run: ebk19c5v with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.725119381216223
[34m[1mwandb[0m: 	bagging_freq: 4
[34m[1mwandb[0m: 	feature_fraction: 0.5544882636206482
[34m[1mwandb[0m: 	lambda_l1: 3.5935288233852276
[34m[1mwandb[0m: 	lambda_l2: 8.526000330683184
[34m[1mwandb[0m: 	learning_rate: 0.5145760465279323
[34m[1mwandb[0m: 	max_depth: 1
[34m[1mwandb[0m: 	min_data_in_leaf: 35
[34m[1mwandb[0m: 	num_leaves: 42


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.020599 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[268]	training's auc: 0.832608	valid_1's auc: 0.798181
VALID AUC : 0.7981812598164434 ACC : 0.7198333781241602

writing prediction : output/auc:0.7981812598164434 acc:0.7198333781241602sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▆▆▇▇▇██████████████████████████████████
valid_1_auc,▁▆▇▇▇▇██████████████████████████████████

0,1
accuracy,0.71983
auc,0.79818
iteration,367.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nhva2lhh with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.8916620775532871
[34m[1mwandb[0m: 	bagging_freq: 13
[34m[1mwandb[0m: 	feature_fraction: 0.5873668306795825
[34m[1mwandb[0m: 	lambda_l1: 1.719788043325069
[34m[1mwandb[0m: 	lambda_l2: 10.618809843104186
[34m[1mwandb[0m: 	learning_rate: 0.5551558840746437
[34m[1mwandb[0m: 	max_depth: 21
[34m[1mwandb[0m: 	min_data_in_leaf: 63
[34m[1mwandb[0m: 	num_leaves: 42


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.176031 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[44]	training's auc: 0.846343	valid_1's auc: 0.804996
VALID AUC : 0.8049960244556149 ACC : 0.7264176296694437

writing prediction : output/auc:0.8049960244556149 acc:0.7264176296694437sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████
valid_1_auc,▁▅▆▇▇▇▇▇████████████████████████████████

0,1
accuracy,0.72642
auc,0.805
iteration,143.0


[34m[1mwandb[0m: Agent Starting Run: hqptqyji with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.5603180981245857
[34m[1mwandb[0m: 	bagging_freq: 2
[34m[1mwandb[0m: 	feature_fraction: 0.6373368478730268
[34m[1mwandb[0m: 	lambda_l1: 8.04949794774425
[34m[1mwandb[0m: 	lambda_l2: 12.30196977824512
[34m[1mwandb[0m: 	learning_rate: 0.0923450802066615
[34m[1mwandb[0m: 	max_depth: 1
[34m[1mwandb[0m: 	min_data_in_leaf: 47
[34m[1mwandb[0m: 	num_leaves: 45


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.196017 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[529]	training's auc: 0.832738	valid_1's auc: 0.798586
VALID AUC : 0.7985855359280293 ACC : 0.7192958882020962

writing prediction : output/auc:0.7985855359280293 acc:0.7192958882020962sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▅▆▆▆▇▇▇▇▇▇▇▇██████████████████████████
valid_1_auc,▁▃▅▆▇▇▇▇▇███████████████████████████████

0,1
accuracy,0.7193
auc,0.79859
iteration,628.0


[34m[1mwandb[0m: Agent Starting Run: ggj1rs20 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.76739607466569
[34m[1mwandb[0m: 	bagging_freq: 11
[34m[1mwandb[0m: 	feature_fraction: 0.9311969255425486
[34m[1mwandb[0m: 	lambda_l1: 13.470354191532268
[34m[1mwandb[0m: 	lambda_l2: 12.277453903853807
[34m[1mwandb[0m: 	learning_rate: 0.5415457872725182
[34m[1mwandb[0m: 	max_depth: 22
[34m[1mwandb[0m: 	min_data_in_leaf: 66
[34m[1mwandb[0m: 	num_leaves: 33


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.029461 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[112]	training's auc: 0.851117	valid_1's auc: 0.804677
VALID AUC : 0.8046768533983586 ACC : 0.7273582370330556

writing prediction : output/auc:0.8046768533983586 acc:0.7273582370330556sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████████
valid_1_auc,▁▅▆▇▇▇▇▇▇███████████████████████████████

0,1
accuracy,0.72736
auc,0.80468
iteration,211.0


[34m[1mwandb[0m: Agent Starting Run: h6caro27 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9352473730455876
[34m[1mwandb[0m: 	bagging_freq: 11
[34m[1mwandb[0m: 	feature_fraction: 0.9037469789135006
[34m[1mwandb[0m: 	lambda_l1: 9.50972656754268
[34m[1mwandb[0m: 	lambda_l2: 6.427288669686579
[34m[1mwandb[0m: 	learning_rate: 0.2004996898665869
[34m[1mwandb[0m: 	max_depth: 23
[34m[1mwandb[0m: 	min_data_in_leaf: 98
[34m[1mwandb[0m: 	num_leaves: 35


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.028288 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[432]	training's auc: 0.85486	valid_1's auc: 0.808876
VALID AUC : 0.8088760198964414 ACC : 0.7316581564095673

writing prediction : output/auc:0.8088760198964414 acc:0.7316581564095673sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
training_auc,▁▂▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████
valid_1_auc,▁▄▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇███████████████████████

0,1
accuracy,0.73166
auc,0.80888
iteration,531.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8qk0ndj2 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.945850230190728
[34m[1mwandb[0m: 	bagging_freq: 13
[34m[1mwandb[0m: 	feature_fraction: 0.935552904446614
[34m[1mwandb[0m: 	lambda_l1: 11.36095111323372
[34m[1mwandb[0m: 	lambda_l2: 2.858037155342323
[34m[1mwandb[0m: 	learning_rate: 0.1218730330984361
[34m[1mwandb[0m: 	max_depth: 19
[34m[1mwandb[0m: 	min_data_in_leaf: 86
[34m[1mwandb[0m: 	num_leaves: 42


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.027893 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[492]	training's auc: 0.855885	valid_1's auc: 0.808772
VALID AUC : 0.8087718701017163 ACC : 0.7323300188121473

writing prediction : output/auc:0.8087718701017163 acc:0.7323300188121473sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████████
valid_1_auc,▁▃▅▆▆▇▇▇▇▇██████████████████████████████

0,1
accuracy,0.73233
auc,0.80877
iteration,591.0


[34m[1mwandb[0m: Agent Starting Run: t4chw7ye with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9912254606760392
[34m[1mwandb[0m: 	bagging_freq: 13
[34m[1mwandb[0m: 	feature_fraction: 0.8853156018663173
[34m[1mwandb[0m: 	lambda_l1: 7.744060441440609
[34m[1mwandb[0m: 	lambda_l2: 12.709022163546193
[34m[1mwandb[0m: 	learning_rate: 0.42926095640108375
[34m[1mwandb[0m: 	max_depth: 23
[34m[1mwandb[0m: 	min_data_in_leaf: 117
[34m[1mwandb[0m: 	num_leaves: 48


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.030415 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[289]	training's auc: 0.862119	valid_1's auc: 0.807213
VALID AUC : 0.8072125864990788 ACC : 0.7289707067992475

writing prediction : output/auc:0.8072125864990788 acc:0.7289707067992475sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▃▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████
valid_1_auc,▁▅▆▇▇▇▇▇▇▇▇██▇▇▇▇▇██████████████████████

0,1
accuracy,0.72897
auc,0.80721
iteration,388.0


[34m[1mwandb[0m: Agent Starting Run: 50cnz3xd with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9593541699643432
[34m[1mwandb[0m: 	bagging_freq: 8
[34m[1mwandb[0m: 	feature_fraction: 0.9679747460025796
[34m[1mwandb[0m: 	lambda_l1: 7.487493257355021
[34m[1mwandb[0m: 	lambda_l2: 1.3470905833703646
[34m[1mwandb[0m: 	learning_rate: 0.25459192156505
[34m[1mwandb[0m: 	max_depth: 21
[34m[1mwandb[0m: 	min_data_in_leaf: 104
[34m[1mwandb[0m: 	num_leaves: 41


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.176512 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[90]	training's auc: 0.850367	valid_1's auc: 0.807587
VALID AUC : 0.8075866873578557 ACC : 0.7284332168771835

writing prediction : output/auc:0.8075866873578557 acc:0.7284332168771835sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████
valid_1_auc,▁▃▅▆▇▇▇▇████████████████████████████████

0,1
accuracy,0.72843
auc,0.80759
iteration,189.0


[34m[1mwandb[0m: Agent Starting Run: fc3irlbl with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9777728727065124
[34m[1mwandb[0m: 	bagging_freq: 9
[34m[1mwandb[0m: 	feature_fraction: 0.9583957577353108
[34m[1mwandb[0m: 	lambda_l1: 14.4974287099927
[34m[1mwandb[0m: 	lambda_l2: 2.6640384338607106
[34m[1mwandb[0m: 	learning_rate: 0.13148642459188517
[34m[1mwandb[0m: 	max_depth: 16
[34m[1mwandb[0m: 	min_data_in_leaf: 115
[34m[1mwandb[0m: 	num_leaves: 21


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.190281 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[432]	training's auc: 0.850167	valid_1's auc: 0.807931
VALID AUC : 0.8079305045497416 ACC : 0.7297769416823434

writing prediction : output/auc:0.8079305045497416 acc:0.7297769416823434sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
training_auc,▁▃▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████
valid_1_auc,▁▃▅▆▆▇▇▇▇▇▇▇▇▇██████████████████████████

0,1
accuracy,0.72978
auc,0.80793
iteration,531.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: minzmkwn with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9408519009155925
[34m[1mwandb[0m: 	bagging_freq: 13
[34m[1mwandb[0m: 	feature_fraction: 0.9910670512634396
[34m[1mwandb[0m: 	lambda_l1: 14.587125706161096
[34m[1mwandb[0m: 	lambda_l2: 10.9608377404122
[34m[1mwandb[0m: 	learning_rate: 0.07311692844886875
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	min_data_in_leaf: 84
[34m[1mwandb[0m: 	num_leaves: 40


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.029424 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[631]	training's auc: 0.852953	valid_1's auc: 0.80906
VALID AUC : 0.8090600347315353 ACC : 0.7311206664875034

writing prediction : output/auc:0.8090600347315353 acc:0.7311206664875034sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
training_auc,▁▃▄▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇███████████████
valid_1_auc,▁▃▅▅▆▇▇▇▇▇▇▇▇▇██████████████████████████

0,1
accuracy,0.73112
auc,0.80906
iteration,730.0


[34m[1mwandb[0m: Agent Starting Run: d7df8lxt with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9923194067659632
[34m[1mwandb[0m: 	bagging_freq: 9
[34m[1mwandb[0m: 	feature_fraction: 0.9952784531141372
[34m[1mwandb[0m: 	lambda_l1: 13.685004116362164
[34m[1mwandb[0m: 	lambda_l2: 9.877445007189364
[34m[1mwandb[0m: 	learning_rate: 0.3025350297843006
[34m[1mwandb[0m: 	max_depth: 18
[34m[1mwandb[0m: 	min_data_in_leaf: 109
[34m[1mwandb[0m: 	num_leaves: 37


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.048749 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[124]	training's auc: 0.852252	valid_1's auc: 0.806896
VALID AUC : 0.8068958728276799 ACC : 0.7278957269551196

writing prediction : output/auc:0.8068958728276799 acc:0.7278957269551196sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████
valid_1_auc,▁▄▆▆▇▇▇▇▇███████████████████████████████

0,1
accuracy,0.7279
auc,0.8069
iteration,223.0


[34m[1mwandb[0m: Agent Starting Run: p5onkecm with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.8703227706103089
[34m[1mwandb[0m: 	bagging_freq: 13
[34m[1mwandb[0m: 	feature_fraction: 0.9597711763596616
[34m[1mwandb[0m: 	lambda_l1: 11.89132673219263
[34m[1mwandb[0m: 	lambda_l2: 2.534272770492945
[34m[1mwandb[0m: 	learning_rate: 0.025182498508142553
[34m[1mwandb[0m: 	max_depth: 16
[34m[1mwandb[0m: 	min_data_in_leaf: 109
[34m[1mwandb[0m: 	num_leaves: 41


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.211330 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[956]	training's auc: 0.850388	valid_1's auc: 0.808168
VALID AUC : 0.8081676422849902 ACC : 0.7300456866433754

writing prediction : output/auc:0.8081676422849902 acc:0.7300456866433754sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
training_auc,▁▂▃▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████
valid_1_auc,▁▂▄▄▅▅▆▆▆▇▇▇▇▇▇▇▇███████████████████████

0,1
accuracy,0.73005
auc,0.80817
iteration,1055.0


[34m[1mwandb[0m: Agent Starting Run: bx03rw10 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9287710779054326
[34m[1mwandb[0m: 	bagging_freq: 15
[34m[1mwandb[0m: 	feature_fraction: 0.6387582998848482
[34m[1mwandb[0m: 	lambda_l1: 14.807905856867976
[34m[1mwandb[0m: 	lambda_l2: 12.243826115031714
[34m[1mwandb[0m: 	learning_rate: 0.02707414348435638
[34m[1mwandb[0m: 	max_depth: 16
[34m[1mwandb[0m: 	min_data_in_leaf: 119
[34m[1mwandb[0m: 	num_leaves: 48


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023632 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1474]	training's auc: 0.851501	valid_1's auc: 0.809102
VALID AUC : 0.809101738015058 ACC : 0.7304488040849234

writing prediction : output/auc:0.809101738015058 acc:0.7304488040849234sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▃▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████
valid_1_auc,▁▃▄▅▆▆▇▇▇▇▇▇▇▇▇▇████████████████████████

0,1
accuracy,0.73045
auc,0.8091
iteration,1573.0


[34m[1mwandb[0m: Agent Starting Run: rcpkssdx with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9716198518797972
[34m[1mwandb[0m: 	bagging_freq: 15
[34m[1mwandb[0m: 	feature_fraction: 0.7932347192778666
[34m[1mwandb[0m: 	lambda_l1: 14.80819941161078
[34m[1mwandb[0m: 	lambda_l2: 7.41220489709798
[34m[1mwandb[0m: 	learning_rate: 0.11022774061074504
[34m[1mwandb[0m: 	max_depth: 25
[34m[1mwandb[0m: 	min_data_in_leaf: 66
[34m[1mwandb[0m: 	num_leaves: 38


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.026571 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[585]	training's auc: 0.854007	valid_1's auc: 0.809784
VALID AUC : 0.8097843130750056 ACC : 0.7328675087342112

writing prediction : output/auc:0.8097843130750056 acc:0.7328675087342112sweep lgbm.csv


0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████
valid_1_auc,▁▄▅▆▆▆▆▇▇▇▇▇▇▇▇▇████████████████████████

0,1
accuracy,0.73287
auc,0.80978
iteration,684.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: btdo0st7 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9464388213955988
[34m[1mwandb[0m: 	bagging_freq: 8
[34m[1mwandb[0m: 	feature_fraction: 0.581506709697018
[34m[1mwandb[0m: 	lambda_l1: 14.239234328781851
[34m[1mwandb[0m: 	lambda_l2: 2.0956894938030595
[34m[1mwandb[0m: 	learning_rate: 0.03285293226167607
[34m[1mwandb[0m: 	max_depth: 23
[34m[1mwandb[0m: 	min_data_in_leaf: 114
[34m[1mwandb[0m: 	num_leaves: 45


[LightGBM] [Info] Number of positive: 1649967, number of negative: 868547
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.025049 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3596
[LightGBM] [Info] Number of data points in the train set: 2518514, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.655135 -> initscore=0.641689
[LightGBM] [Info] Start training from score 0.641689




Training until validation scores don't improve for 100 rounds
