In [17]:
import warnings 
warnings.filterwarnings("ignore")

In [18]:
import pandas as pd
import os
import random
import numpy as np
import yaml
import lightgbm as lgb
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
import numpy as np
import torch
import pytz
from datetime import datetime, timezone, timedelta
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.preprocessing import LabelEncoder
from wandb.lightgbm import wandb_callback, log_summary

In [19]:
#wandb_callback 수정 
from typing import TYPE_CHECKING, Callable
import wandb
from wandb.sdk.lib import telemetry as wb_telemetry

MINIMIZE_METRICS = [
    "l1",
    "l2",
    "rmse",
    "mape",
    "huber",
    "fair",
    "poisson",
    "gamma",
    "binary_logloss",
]

MAXIMIZE_METRICS = ["map", "auc", "average_precision"]

def set_seeds(seed: int = 42):
    # 랜덤 시드를 설정하여 매 코드를 실행할 때마다 동일한 결과를 얻게 합니다.
    os.environ["PYTHONHASHSEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
        
def wandb_callback(log_params=True, define_metric=True) -> Callable:
    """Automatically integrates LightGBM with wandb.

    Arguments:
        log_params: (boolean) if True (default) logs params passed to lightgbm.train as W&B config
        define_metric: (boolean) if True (default) capture model performance at the best step, instead of the last step, of training in your `wandb.summary`

    Passing `wandb_callback` to LightGBM will:
      - log params passed to lightgbm.train as W&B config (default).
      - log evaluation metrics collected by LightGBM, such as rmse, accuracy etc to Weights & Biases
      - Capture the best metric in `wandb.summary` when `define_metric=True` (default).

    Use `log_summary` as an extension of this callback.

    Example:
        ```python
        params = {
            'boosting_type': 'gbdt',
            'objective': 'regression',
            .
        }
        gbm = lgb.train(params,
                        lgb_train,
                        num_boost_round=10,
                        valid_sets=lgb_eval,
                        valid_names=('validation'),
                        callbacks=[wandb_callback()])
        ```
    """
    def _define_metric(data: str, metric_name: str) -> None:
    
        """Capture model performance at the best step.
        instead of the last step, of training in your `wandb.summary`
        """
        if "loss" in str.lower(metric_name):
            wandb.define_metric(f"{data}_{metric_name}", summary="min")
        elif str.lower(metric_name) in MINIMIZE_METRICS:
            wandb.define_metric(f"{data}_{metric_name}", summary="min")
        elif str.lower(metric_name) in MAXIMIZE_METRICS:
            wandb.define_metric(f"{data}_{metric_name}", summary="max")
            
    log_params_list: "List[bool]" = [log_params]
    define_metric_list: "List[bool]" = [define_metric]

    def _init(env: "CallbackEnv") -> None:
        with wb_telemetry.context() as tel:
            tel.feature.lightgbm_wandb_callback = True

        wandb.config.update(env.params)
        log_params_list[0] = False

        if define_metric_list[0]:
            for i in range(len(env.evaluation_result_list)):
                data_type = env.evaluation_result_list[i][0]
                metric_name = env.evaluation_result_list[i][1]
                _define_metric(data_type, metric_name)

    def _callback(env: "CallbackEnv") -> None:
        if log_params_list[0]:
            _init(env)
        # eval_results: "Dict[str, Dict[str, List[Any]]]" = {}
        # recorder = lightgbm.record_evaluation(eval_results)
        # recorder(env)
        eval_results = {x[0]:{x[1:][0]:x[1:][1:]} for x in env.evaluation_result_list}

        for validation_key in eval_results.keys():
            for key in eval_results[validation_key].keys():
                 wandb.log(
                     {validation_key + "_" + key: eval_results[validation_key][key][0]},
                     commit=False,
                 )
        for item in eval_results:
            if len(item) == 4:
                wandb.log({f"{item[0]}_{item[1]}": item[2]}, commit=False)

        # Previous log statements use commit=False. This commits them.
        wandb.log({"iteration": env.iteration}, commit=True)

    return _callback

## Training

In [20]:
sweep_config_path = '/data/ephemeral/home/level2-dkt-recsys-06/code/boost/lgbmsweepconfigv2.yaml'

# 노트북의 이름 설정

os.environ['WANDB_NOTEBOOK_NAME'] = 'LGBM_Train.ipynb'
# YAML 파일 로드
with open(sweep_config_path, 'r') as file:
    sweep_config = yaml.safe_load(file)

# W&B 스위프트 설정
sweep_id = wandb.sweep(sweep=sweep_config, project="lightgbm-sweep")

# 시드 고정
set_seeds()


Create sweep with ID: fdskahx2
Sweep URL: https://wandb.ai/boostcamp6-recsys6/lightgbm-sweep/sweeps/fdskahx2


In [21]:
X = pd.read_csv('/data/ephemeral/home/level2-dkt-recsys-06/data/FE_v8.csv')
test =  pd.read_csv('/data/ephemeral/home/level2-dkt-recsys-06/data/FE_v8_test.csv')
X = X.sort_values(by=["userID", "Timestamp", "assessmentItemID"]).reset_index(drop=True)
test = test.sort_values(by=["userID", "Timestamp", "assessmentItemID"]).reset_index(drop=True)

# test = test[test["userID"] != test["userID"].shift(-1)]
# test = test.drop(["answerCode"], axis=1)

# 유저 아이디 갯수 구하기
unique_user_count = X['userID'].nunique()

# 유저아이디 갯수 출력
print(f'userIDnum {unique_user_count}')

# 유저아이디 마지막 행 구하기
last_rows = test.groupby('userID').tail(1)
print(last_rows)
# 마지막 행의 answerCode가 -1인지 확인
are_last_answers_minus_one = (last_rows['answerCode'] == -1).all()

print(are_last_answers_minus_one)

# test 모양
print(test.shape)

userIDnum 7442
        userID  assessmentItemID            Timestamp  testId  answerCode  \
1035         3          50133008  2020-10-26 13:13:57   50133          -1   
1706         4          70146008  2020-12-27 02:47:54   70146          -1   
3023        13          70111008  2020-12-27 04:35:09   70111          -1   
4283        17          90064006  2020-10-30 05:48:37   90064          -1   
4670        26          60135007  2020-10-23 11:44:18   60135          -1   
...        ...               ...                  ...     ...         ...   
260052    7395          40122005  2020-09-08 02:05:20   40122          -1   
260067    7404          30111005  2020-10-13 09:49:18   30111          -1   
260082    7416          50193004  2020-10-04 02:44:41   50193          -1   
260097    7417          50193004  2020-09-06 13:09:15   50193          -1   
260113    7439          40130005  2020-10-14 23:10:03   40130          -1   

        KnowledgeTag  Itemseq  SolvingTime  CumulativeTime  

In [22]:
test = test[test["answerCode"] == -1]
X = X[X['answerCode']!=-1]

In [23]:
Feature = ['Itemseq', 'SolvingTime', 'CumulativeTime', 'UserAvgSolvingTime',
       'Difference_SolvingTime_UserAvgSolvingTime', 'CumulativeItemCount',
       'Item_last7days', 'Item_last30days', 'CumulativeUserItemAcc',
       'PastItemCount', 'UserItemElapsed', 'ItemAcc',
       'AverageItemSolvingTime_Correct', 'AverageItemSolvingTime_Incorrect',
       'AverageItemSolvingTime', 'Difference_SolvingTime_AvgItemSolvingTime',
       'UserTagAvgSolvingTime', 'TagAcc', 'CumulativeUserTagAverageAcc',
       'CumulativeUserTagExponentialAverage', 'UserTagCount', 'UserTagElapsed',
       'PastTagSolvingTime', 
       'TestAcc'
]

Categorical_Feature = ['userID', 'assessmentItemID', 'testId', 'KnowledgeTag', 
       'Month','DayOfWeek', 'TimeOfDay', 'WeekOfYear',
       'UserRecentTagAnswer', 'PreviousItemAnswer',
       'categorize_solvingTime', 'categorize_ItemAcc',
       'categorize_TagAcc', 'categorize_TestAcc',
       'categorize_CumulativeUserItemAcc',
       'categorize_CumulativeUserTagAverageAcc',
       'categorize_CumulativeUserTagExponentialAverage'
]
Feature = Feature + Categorical_Feature

# as category: integer여도 범주형으로 취급 가능
for feature in Categorical_Feature:
       test[feature] = test[feature].astype('category')
       X[feature] = X[feature].astype('category')

In [24]:
# print(X.shape)

# # 원-핫 인코딩 적용할 컬럼 선택
# columns_to_encode = [
#     "UserRecentTagAnswer",
#     "PreviousItemAnswer",
#     # 추가적으로 원-핫 인코딩을 적용할 다른 컬럼들을 여기에 추가
# ]
# for column in columns_to_encode:
#     if column in X.columns:
#         X = pd.get_dummies(X, columns=[column])
#     if column in test.columns:
#         test = pd.get_dummies(test, columns=[column])

#         # 다른 필드들에 대해서도 동일하게 적용
# print(X.shape)

# 라벨로 이동 


In [25]:
feat = X.columns.tolist()

exclude_columns = [
    "Timestamp",
    "answerCode",
    "DayOfWeek",
    'WeekOfYear',
    'UserAvgSolvingTime',
    'PastItemCount',
    "user_tag_total_answer",
    "categorize_CumulativeUserTagExponentialAverage",
    'categorize_CumulativeUserTagAverageAnswerRate',
    "categorize_TestAnswerRate",
    "categorize_TagAnswerRate"
]

filtered_feat = [column for column in feat if column not in exclude_columns]

print(X[feat].shape)
print(X[filtered_feat].shape)
print(X.columns.tolist())

(2525956, 43)
(2525956, 36)
['userID', 'assessmentItemID', 'testId', 'answerCode', 'Timestamp', 'KnowledgeTag', 'Itemseq', 'SolvingTime', 'CumulativeTime', 'Month', 'DayOfWeek', 'TimeOfDay', 'WeekOfYear', 'UserAvgSolvingTime', 'Difference_SolvingTime_UserAvgSolvingTime', 'CumulativeItemCount', 'Item_last7days', 'Item_last30days', 'CumulativeUserItemAcc', 'PastItemCount', 'UserItemElapsed', 'ItemAcc', 'AverageItemSolvingTime_Correct', 'AverageItemSolvingTime_Incorrect', 'AverageItemSolvingTime', 'Difference_SolvingTime_AvgItemSolvingTime', 'UserTagAvgSolvingTime', 'TagAcc', 'CumulativeUserTagAverageAcc', 'CumulativeUserTagExponentialAverage', 'UserTagCount', 'UserTagElapsed', 'PastTagSolvingTime', 'UserRecentTagAnswer', 'PreviousItemAnswer', 'TestAcc', 'categorize_solvingTime', 'categorize_ItemAcc', 'categorize_TagAcc', 'categorize_TestAcc', 'categorize_CumulativeUserItemAcc', 'categorize_CumulativeUserTagAverageAcc', 'categorize_CumulativeUserTagExponentialAverage']


In [26]:
default_config = {
    "num_leaves": 10,  # 최소값 10
    "learning_rate": 0.0001,  # 최소값 0.0001
    "max_depth": -1,  # -1 (깊이 제한 없음)
    "min_data_in_leaf": 20,  # 최소값 20
    "feature_fraction": 0.6,  # 최소값 0.6
    "bagging_fraction": 0.6,  # 최소값 0.6
    "bagging_freq": 0,  # 최소값 0
    "lambda_l1": 0.0,  # 최소값 0.0
    "lambda_l2": 0.0,  # 최소값 0.0
    "cat_smooth": 10,  # 최소값 10
}



In [27]:

def train():
    
    auc = 0
    acc = 0
    test_preds = np.zeros(len(test))
    
    wandb.init(project=f"lightgbm-sweep", config=default_config)
    
    ratio = wandb.config.ratio
    
    sampled_indices = X.groupby('userID').sample(frac=ratio).index

    # userID별 마지막 인덱스 찾기
    # last_indices = X.groupby("userID").tail(1).index

    # 학습 데이터셋 생성
    X_train = X.drop(sampled_indices)
    y_train = X_train["answerCode"]

    # 검증 데이터셋 생성
    X_valid = X.loc[sampled_indices]
    y_valid = X_valid["answerCode"]

    lgb_train = lgb.Dataset(X_train[filtered_feat], y_train)
    lgb_valid = lgb.Dataset(X_valid[filtered_feat], y_valid)

    # 완드비 실험 이름
    korea = pytz.timezone("Asia/Seoul")
    current_time = datetime.now(korea).strftime("%m-%d %H:%M")
    wandb.run.name = f"Wonhee {current_time}"
    current_params = {
        "objective": "binary",
        "metric": ["auc"],
        "device": "cpu",
        "num_leaves": wandb.config.num_leaves,
        "learning_rate": wandb.config.learning_rate,
        "max_depth": wandb.config.max_depth,
        "min_data_in_leaf": wandb.config.min_data_in_leaf,
        "feature_fraction": wandb.config.feature_fraction,
        "bagging_fraction": wandb.config.bagging_fraction,
        "bagging_freq": wandb.config.bagging_freq,
        "lambda_l1": wandb.config.lambda_l1,
        "lambda_l2": wandb.config.lambda_l2,
        "cat_smooth": wandb.config.cat_smooth,
    }
    model = lgb.train(
        current_params,
        lgb_train,
        valid_sets=[lgb_train, lgb_valid],
        num_boost_round=500,
        callbacks=[
            wandb_callback(log_params=True, define_metric=True),
            lgb.early_stopping(30),
        ],
        categorical_feature=[
            "userID",
            "assessmentItemID",
            "testId",
            "KnowledgeTag",
            "Month"
        ],
    )
    preds = model.predict(X_valid[filtered_feat])
    acc = accuracy_score(y_valid, np.where(preds >= 0.5, 1, 0))
    auc = roc_auc_score(y_valid, preds)
    test_preds += model.predict(test[filtered_feat])
    print(f"VALID AUC : {auc} ACC : {acc}\n")
    wandb.log({"auc": auc, "accuracy": acc})
    wandb.finish()
    
    #output파일 생성
    output_dir = "output/"
    write_path = os.path.join(
        output_dir,
        f"auc:{auc} acc:{acc}" + "sweep" + " lgbm.csv",
    )
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    with open(write_path, "w", encoding="utf8") as w:
        print("writing prediction : {}".format(write_path))
        w.write("id,prediction\n")
        for id, p in enumerate(test_preds):
            w.write("{},{}\n".format(id, p))
            
    feature_importances = model.feature_importance()
    feature_names = model.feature_name()
    importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances}).sort_values(by='Importance', ascending=False)

    print(importance_df)

In [28]:

wandb.agent(sweep_id, train)

[34m[1mwandb[0m: Agent Starting Run: qpozxr25 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9059571298782872
[34m[1mwandb[0m: 	bagging_freq: 9
[34m[1mwandb[0m: 	feature_fraction: 0.8680158599290084
[34m[1mwandb[0m: 	lambda_l1: 5.623746883937578
[34m[1mwandb[0m: 	lambda_l2: 14.298044383066577
[34m[1mwandb[0m: 	learning_rate: 0.005370532667332486
[34m[1mwandb[0m: 	max_depth: 29
[34m[1mwandb[0m: 	min_data_in_leaf: 88
[34m[1mwandb[0m: 	num_leaves: 20
[34m[1mwandb[0m: 	ratio: 0.28187956866552055
[34m[1mwandb[0m: Currently logged in as: [33mkjswon12[0m ([33mboostcamp6-recsys6[0m). Use [1m`wandb login --relogin`[0m to force relogin


[LightGBM] [Info] Number of positive: 1187494, number of negative: 626443
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.064885 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22379
[LightGBM] [Info] Number of data points in the train set: 1813937, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654650 -> initscore=0.639543
[LightGBM] [Info] Start training from score 0.639543




Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[500]	training's auc: 0.845471	valid_1's auc: 0.844789
VALID AUC : 0.8447887995970295 ACC : 0.7942091432953334



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇██████
valid_1_auc,▁▁▂▂▂▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇██████

0,1
accuracy,0.79421
auc,0.84479
iteration,499.0


writing prediction : output/auc:0.8447887995970295 acc:0.7942091432953334sweep lgbm.csv
                                      Feature  Importance
15                                    ItemAcc        2007
17           AverageItemSolvingTime_Incorrect         986
13                      CumulativeUserItemAcc         942
0                                      userID         882
14                            UserItemElapsed         754
5                                 SolvingTime         520
28                         PreviousItemAnswer         477
23        CumulativeUserTagExponentialAverage         452
19  Difference_SolvingTime_AvgItemSolvingTime         449
22                CumulativeUserTagAverageAcc         342
20                      UserTagAvgSolvingTime         327
1                            assessmentItemID         281
31                         categorize_ItemAcc         200
3                                KnowledgeTag         161
21                                     Tag

[34m[1mwandb[0m: Agent Starting Run: ctwc82hi with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.7015105816570656
[34m[1mwandb[0m: 	bagging_freq: 9
[34m[1mwandb[0m: 	feature_fraction: 0.565222656580354
[34m[1mwandb[0m: 	lambda_l1: 1.5540822265220622
[34m[1mwandb[0m: 	lambda_l2: 14.87995453780294
[34m[1mwandb[0m: 	learning_rate: 0.36841279682121886
[34m[1mwandb[0m: 	max_depth: 43
[34m[1mwandb[0m: 	min_data_in_leaf: 65
[34m[1mwandb[0m: 	num_leaves: 48
[34m[1mwandb[0m: 	ratio: 0.16859610783586432


[LightGBM] [Info] Number of positive: 1374237, number of negative: 725838
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.306808 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 22373
[LightGBM] [Info] Number of data points in the train set: 2100075, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654375 -> initscore=0.638327
[LightGBM] [Info] Start training from score 0.638327




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[82]	training's auc: 0.877846	valid_1's auc: 0.856416
VALID AUC : 0.8564164636976394 ACC : 0.8025856988219714



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▄▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████
valid_1_auc,▁▄▆▆▇▇▇█████████████████████████████████

0,1
accuracy,0.80259
auc,0.85642
iteration,111.0


writing prediction : output/auc:0.8564164636976394 acc:0.8025856988219714sweep lgbm.csv
                                      Feature  Importance
1                            assessmentItemID        1236
0                                      userID        1036
3                                KnowledgeTag         528
2                                      testId         513
14                            UserItemElapsed          88
15                                    ItemAcc          67
13                      CumulativeUserItemAcc          33
17           AverageItemSolvingTime_Incorrect          32
20                      UserTagAvgSolvingTime          32
19  Difference_SolvingTime_AvgItemSolvingTime          31
23        CumulativeUserTagExponentialAverage          27
5                                 SolvingTime          26
6                              CumulativeTime          21
22                CumulativeUserTagAverageAcc          21
21                                     Tag

[34m[1mwandb[0m: Agent Starting Run: xyp51aje with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.7675224556765278
[34m[1mwandb[0m: 	bagging_freq: 15
[34m[1mwandb[0m: 	feature_fraction: 0.915064718152445
[34m[1mwandb[0m: 	lambda_l1: 9.875336391460907
[34m[1mwandb[0m: 	lambda_l2: 14.267794836673822
[34m[1mwandb[0m: 	learning_rate: 0.186105761522994
[34m[1mwandb[0m: 	max_depth: 14
[34m[1mwandb[0m: 	min_data_in_leaf: 8
[34m[1mwandb[0m: 	num_leaves: 30
[34m[1mwandb[0m: 	ratio: 0.11187242666646798


[LightGBM] [Info] Number of positive: 1469087, number of negative: 774291
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.050015 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22347
[LightGBM] [Info] Number of data points in the train set: 2243378, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654855 -> initscore=0.640449
[LightGBM] [Info] Start training from score 0.640449




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[335]	training's auc: 0.886857	valid_1's auc: 0.858161
VALID AUC : 0.8581608510606503 ACC : 0.802684568508518



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
training_auc,▁▂▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█████████
valid_1_auc,▁▄▅▆▇▇▇▇▇▇▇█████████████████████████████

0,1
accuracy,0.80268
auc,0.85816
iteration,364.0


writing prediction : output/auc:0.8581608510606503 acc:0.802684568508518sweep lgbm.csv
                                      Feature  Importance
0                                      userID        3612
1                            assessmentItemID        2852
3                                KnowledgeTag        1274
2                                      testId        1086
14                            UserItemElapsed         138
15                                    ItemAcc         136
13                      CumulativeUserItemAcc          90
20                      UserTagAvgSolvingTime          53
19  Difference_SolvingTime_AvgItemSolvingTime          50
17           AverageItemSolvingTime_Incorrect          50
5                                 SolvingTime          45
6                              CumulativeTime          40
23        CumulativeUserTagExponentialAverage          37
22                CumulativeUserTagAverageAcc          26
28                         PreviousItemAnsw

[34m[1mwandb[0m: Agent Starting Run: 7shso5dw with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.7269892921295703
[34m[1mwandb[0m: 	bagging_freq: 11
[34m[1mwandb[0m: 	feature_fraction: 0.45948907756264473
[34m[1mwandb[0m: 	lambda_l1: 7.452198088993471
[34m[1mwandb[0m: 	lambda_l2: 0.04744816450959144
[34m[1mwandb[0m: 	learning_rate: 0.3682511300613048
[34m[1mwandb[0m: 	max_depth: 39
[34m[1mwandb[0m: 	min_data_in_leaf: 91
[34m[1mwandb[0m: 	num_leaves: 59
[34m[1mwandb[0m: 	ratio: 0.2063739252151361


[LightGBM] [Info] Number of positive: 1312199, number of negative: 692460
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.253652 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 22372
[LightGBM] [Info] Number of data points in the train set: 2004659, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654575 -> initscore=0.639209
[LightGBM] [Info] Start training from score 0.639209




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[61]	training's auc: 0.876633	valid_1's auc: 0.855724
VALID AUC : 0.8557239895796818 ACC : 0.8015142999096484



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
training_auc,▁▃▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████████
valid_1_auc,▁▄▅▆▇▇▇▇████████████████████████████████

0,1
accuracy,0.80151
auc,0.85572
iteration,90.0


writing prediction : output/auc:0.8557239895796818 acc:0.8015142999096484sweep lgbm.csv
                                      Feature  Importance
1                            assessmentItemID         901
0                                      userID         900
3                                KnowledgeTag         542
2                                      testId         371
14                            UserItemElapsed         111
15                                    ItemAcc          77
13                      CumulativeUserItemAcc          54
19  Difference_SolvingTime_AvgItemSolvingTime          49
20                      UserTagAvgSolvingTime          49
17           AverageItemSolvingTime_Incorrect          45
31                         categorize_ItemAcc          40
5                                 SolvingTime          38
6                              CumulativeTime          36
21                                     TagAcc          33
23        CumulativeUserTagExponentialAver

[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6t63vqgz with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9228820382046142
[34m[1mwandb[0m: 	bagging_freq: 15
[34m[1mwandb[0m: 	feature_fraction: 0.8080140242138856
[34m[1mwandb[0m: 	lambda_l1: 0.49115647955440433
[34m[1mwandb[0m: 	lambda_l2: 8.039856021095117
[34m[1mwandb[0m: 	learning_rate: 0.04015437034888322
[34m[1mwandb[0m: 	max_depth: 42
[34m[1mwandb[0m: 	min_data_in_leaf: 73
[34m[1mwandb[0m: 	num_leaves: 46
[34m[1mwandb[0m: 	ratio: 0.11740561219711856


[LightGBM] [Info] Number of positive: 1459437, number of negative: 769960
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.282715 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 22374
[LightGBM] [Info] Number of data points in the train set: 2229397, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654633 -> initscore=0.639467
[LightGBM] [Info] Start training from score 0.639467




Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[500]	training's auc: 0.881439	valid_1's auc: 0.859767
VALID AUC : 0.8597666406185446 ACC : 0.8053237298480235



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▂▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████
valid_1_auc,▁▂▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇████████████████████

0,1
accuracy,0.80532
auc,0.85977
iteration,499.0


writing prediction : output/auc:0.8597666406185446 acc:0.8053237298480235sweep lgbm.csv
                                      Feature  Importance
0                                      userID        8386
1                            assessmentItemID        6274
2                                      testId        2403
3                                KnowledgeTag        1733
15                                    ItemAcc         715
14                            UserItemElapsed         605
13                      CumulativeUserItemAcc         343
17           AverageItemSolvingTime_Incorrect         290
5                                 SolvingTime         188
19  Difference_SolvingTime_AvgItemSolvingTime         187
20                      UserTagAvgSolvingTime         172
23        CumulativeUserTagExponentialAverage         133
29                                    TestAcc         115
22                CumulativeUserTagAverageAcc         110
6                              CumulativeT

[34m[1mwandb[0m: Agent Starting Run: 67anjxje with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.8481034854876464
[34m[1mwandb[0m: 	bagging_freq: 7
[34m[1mwandb[0m: 	feature_fraction: 0.5786504287877465
[34m[1mwandb[0m: 	lambda_l1: 4.102294379470218
[34m[1mwandb[0m: 	lambda_l2: 1.329074688190912
[34m[1mwandb[0m: 	learning_rate: 0.2249712907009698
[34m[1mwandb[0m: 	max_depth: 42
[34m[1mwandb[0m: 	min_data_in_leaf: 56
[34m[1mwandb[0m: 	num_leaves: 31
[34m[1mwandb[0m: 	ratio: 0.12554729947049087


[LightGBM] [Info] Number of positive: 1445777, number of negative: 762979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.030200 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22367
[LightGBM] [Info] Number of data points in the train set: 2208756, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654566 -> initscore=0.639172
[LightGBM] [Info] Start training from score 0.639172




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[227]	training's auc: 0.882434	valid_1's auc: 0.857114
VALID AUC : 0.8571136661755969 ACC : 0.8023675914249685



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▃▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█████████
valid_1_auc,▁▃▅▆▆▇▇▇▇▇▇█████████████████████████████

0,1
accuracy,0.80237
auc,0.85711
iteration,256.0


writing prediction : output/auc:0.8571136661755969 acc:0.8023675914249685sweep lgbm.csv
                                      Feature  Importance
0                                      userID        1963
1                            assessmentItemID        1957
3                                KnowledgeTag        1016
2                                      testId        1014
14                            UserItemElapsed         133
15                                    ItemAcc         104
13                      CumulativeUserItemAcc          63
20                      UserTagAvgSolvingTime          54
17           AverageItemSolvingTime_Incorrect          50
5                                 SolvingTime          44
6                              CumulativeTime          44
19  Difference_SolvingTime_AvgItemSolvingTime          36
31                         categorize_ItemAcc          33
22                CumulativeUserTagAverageAcc          32
23        CumulativeUserTagExponentialAver

[34m[1mwandb[0m: Agent Starting Run: 229c34k1 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9476398235900002
[34m[1mwandb[0m: 	bagging_freq: 15
[34m[1mwandb[0m: 	feature_fraction: 0.8661530092462761
[34m[1mwandb[0m: 	lambda_l1: 0.43968519302151177
[34m[1mwandb[0m: 	lambda_l2: 10.9209757462016
[34m[1mwandb[0m: 	learning_rate: 0.07716411477196085
[34m[1mwandb[0m: 	max_depth: 45
[34m[1mwandb[0m: 	min_data_in_leaf: 91
[34m[1mwandb[0m: 	num_leaves: 50
[34m[1mwandb[0m: 	ratio: 0.06217314745664852


[LightGBM] [Info] Number of positive: 1550940, number of negative: 817984
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.059901 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22362
[LightGBM] [Info] Number of data points in the train set: 2368924, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654702 -> initscore=0.639774
[LightGBM] [Info] Start training from score 0.639774




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[429]	training's auc: 0.892944	valid_1's auc: 0.860474
VALID AUC : 0.8604740139175818 ACC : 0.805275357888838



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▂▃▃▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████
valid_1_auc,▁▂▄▅▆▆▇▇▇▇▇▇▇▇██████████████████████████

0,1
accuracy,0.80528
auc,0.86047
iteration,458.0


writing prediction : output/auc:0.8604740139175818 acc:0.805275357888838sweep lgbm.csv
                                      Feature  Importance
0                                      userID        8052
1                            assessmentItemID        6591
2                                      testId        2264
3                                KnowledgeTag        1819
15                                    ItemAcc         400
14                            UserItemElapsed         373
13                      CumulativeUserItemAcc         239
17           AverageItemSolvingTime_Incorrect         165
20                      UserTagAvgSolvingTime         129
5                                 SolvingTime         123
19  Difference_SolvingTime_AvgItemSolvingTime         104
6                              CumulativeTime          98
23        CumulativeUserTagExponentialAverage          97
28                         PreviousItemAnswer          70
29                                    TestA

[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: glvs623p with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9910040674867368
[34m[1mwandb[0m: 	bagging_freq: 15
[34m[1mwandb[0m: 	feature_fraction: 0.6956893303485538
[34m[1mwandb[0m: 	lambda_l1: 1.7637891500578071
[34m[1mwandb[0m: 	lambda_l2: 11.08522969863944
[34m[1mwandb[0m: 	learning_rate: 0.0660938160009302
[34m[1mwandb[0m: 	max_depth: 35
[34m[1mwandb[0m: 	min_data_in_leaf: 70
[34m[1mwandb[0m: 	num_leaves: 58
[34m[1mwandb[0m: 	ratio: 0.06511480343549532


[LightGBM] [Info] Number of positive: 1546197, number of negative: 815366
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.048517 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22363
[LightGBM] [Info] Number of data points in the train set: 2361563, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654735 -> initscore=0.639917
[LightGBM] [Info] Start training from score 0.639917




Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[494]	training's auc: 0.895359	valid_1's auc: 0.861699
VALID AUC : 0.8616990949035715 ACC : 0.8051559372966002



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▂▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████
valid_1_auc,▁▃▄▅▅▆▆▇▇▇▇▇▇▇▇█████████████████████████

0,1
accuracy,0.80516
auc,0.8617
iteration,499.0


writing prediction : output/auc:0.8616990949035715 acc:0.8051559372966002sweep lgbm.csv
                                      Feature  Importance
0                                      userID        9581
1                            assessmentItemID        8870
2                                      testId        3610
3                                KnowledgeTag        2784
14                            UserItemElapsed         544
15                                    ItemAcc         434
13                      CumulativeUserItemAcc         315
17           AverageItemSolvingTime_Incorrect         210
5                                 SolvingTime         186
20                      UserTagAvgSolvingTime         185
6                              CumulativeTime         147
19  Difference_SolvingTime_AvgItemSolvingTime         145
23        CumulativeUserTagExponentialAverage         144
31                         categorize_ItemAcc         122
22                CumulativeUserTagAverage

[34m[1mwandb[0m: Agent Starting Run: i12dzqga with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9808782939544656
[34m[1mwandb[0m: 	bagging_freq: 13
[34m[1mwandb[0m: 	feature_fraction: 0.7261281367354071
[34m[1mwandb[0m: 	lambda_l1: 1.4269061754405532
[34m[1mwandb[0m: 	lambda_l2: 3.560596335654002
[34m[1mwandb[0m: 	learning_rate: 0.2708030436313351
[34m[1mwandb[0m: 	max_depth: 45
[34m[1mwandb[0m: 	min_data_in_leaf: 90
[34m[1mwandb[0m: 	num_leaves: 60
[34m[1mwandb[0m: 	ratio: 0.054941101836389825


[LightGBM] [Info] Number of positive: 1562895, number of negative: 824270
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.052131 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22363
[LightGBM] [Info] Number of data points in the train set: 2387165, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654708 -> initscore=0.639797
[LightGBM] [Info] Start training from score 0.639797




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[139]	training's auc: 0.897494	valid_1's auc: 0.861576
VALID AUC : 0.8615759767416482 ACC : 0.8037913121167799



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
training_auc,▁▂▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████
valid_1_auc,▁▄▅▆▆▇▇▇████████████████████████████████

0,1
accuracy,0.80379
auc,0.86158
iteration,168.0


writing prediction : output/auc:0.8615759767416482 acc:0.8037913121167799sweep lgbm.csv
                                      Feature  Importance
0                                      userID        2844
1                            assessmentItemID        2474
2                                      testId        1153
3                                KnowledgeTag         917
14                            UserItemElapsed         129
15                                    ItemAcc         124
13                      CumulativeUserItemAcc          66
20                      UserTagAvgSolvingTime          56
17           AverageItemSolvingTime_Incorrect          54
5                                 SolvingTime          44
6                              CumulativeTime          37
23        CumulativeUserTagExponentialAverage          35
19  Difference_SolvingTime_AvgItemSolvingTime          33
22                CumulativeUserTagAverageAcc          31
34           categorize_CumulativeUserItem

[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0dywv6bu with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9990787002665984
[34m[1mwandb[0m: 	bagging_freq: 17
[34m[1mwandb[0m: 	feature_fraction: 0.4466535335648072
[34m[1mwandb[0m: 	lambda_l1: 1.0963674867145878
[34m[1mwandb[0m: 	lambda_l2: 11.716532071078122
[34m[1mwandb[0m: 	learning_rate: 0.39169649422611097
[34m[1mwandb[0m: 	max_depth: 38
[34m[1mwandb[0m: 	min_data_in_leaf: 34
[34m[1mwandb[0m: 	num_leaves: 57
[34m[1mwandb[0m: 	ratio: 0.07414341866948841


[LightGBM] [Info] Number of positive: 1530932, number of negative: 807809
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.026046 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22370
[LightGBM] [Info] Number of data points in the train set: 2338741, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654597 -> initscore=0.639306
[LightGBM] [Info] Start training from score 0.639306




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[89]	training's auc: 0.887619	valid_1's auc: 0.860248
VALID AUC : 0.860248136323138 ACC : 0.805458964292391



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇██████████
valid_1_auc,▁▄▆▆▇▇▇▇████████████████████████████████

0,1
accuracy,0.80546
auc,0.86025
iteration,118.0


writing prediction : output/auc:0.860248136323138 acc:0.805458964292391sweep lgbm.csv
                                      Feature  Importance
0                                      userID        1364
1                            assessmentItemID        1330
2                                      testId         709
3                                KnowledgeTag         709
14                            UserItemElapsed         121
15                                    ItemAcc          83
20                      UserTagAvgSolvingTime          60
17           AverageItemSolvingTime_Incorrect          52
19  Difference_SolvingTime_AvgItemSolvingTime          51
13                      CumulativeUserItemAcc          50
5                                 SolvingTime          49
31                         categorize_ItemAcc          43
6                              CumulativeTime          38
23        CumulativeUserTagExponentialAverage          35
21                                     TagAc

[34m[1mwandb[0m: Agent Starting Run: wfwxtm8b with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9909698736772904
[34m[1mwandb[0m: 	bagging_freq: 19
[34m[1mwandb[0m: 	feature_fraction: 0.6988097471701846
[34m[1mwandb[0m: 	lambda_l1: 4.448096446687741
[34m[1mwandb[0m: 	lambda_l2: 8.442501755061707
[34m[1mwandb[0m: 	learning_rate: 0.01703922672276498
[34m[1mwandb[0m: 	max_depth: 46
[34m[1mwandb[0m: 	min_data_in_leaf: 14
[34m[1mwandb[0m: 	num_leaves: 57
[34m[1mwandb[0m: 	ratio: 0.053478803683061224


[LightGBM] [Info] Number of positive: 1565304, number of negative: 825563
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.054795 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22336
[LightGBM] [Info] Number of data points in the train set: 2390867, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654701 -> initscore=0.639770
[LightGBM] [Info] Start training from score 0.639770




Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[500]	training's auc: 0.870714	valid_1's auc: 0.860483
VALID AUC : 0.8604834264733741 ACC : 0.8049508102066045



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇███████
valid_1_auc,▁▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇███████████

0,1
accuracy,0.80495
auc,0.86048
iteration,499.0


writing prediction : output/auc:0.8604834264733741 acc:0.8049508102066045sweep lgbm.csv
                                      Feature  Importance
0                                      userID        9466
1                            assessmentItemID        5729
2                                      testId        2474
3                                KnowledgeTag        1707
15                                    ItemAcc        1533
14                            UserItemElapsed        1353
13                      CumulativeUserItemAcc         799
17           AverageItemSolvingTime_Incorrect         738
5                                 SolvingTime         504
19  Difference_SolvingTime_AvgItemSolvingTime         403
31                         categorize_ItemAcc         380
20                      UserTagAvgSolvingTime         364
23        CumulativeUserTagExponentialAverage         345
29                                    TestAcc         324
22                CumulativeUserTagAverage

[34m[1mwandb[0m: Agent Starting Run: a37puq31 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.925027151140548
[34m[1mwandb[0m: 	bagging_freq: 12
[34m[1mwandb[0m: 	feature_fraction: 0.3104689429071369
[34m[1mwandb[0m: 	lambda_l1: 1.1605257115179923
[34m[1mwandb[0m: 	lambda_l2: 8.10048533079828
[34m[1mwandb[0m: 	learning_rate: 0.01661953168740506
[34m[1mwandb[0m: 	max_depth: 35
[34m[1mwandb[0m: 	min_data_in_leaf: 16
[34m[1mwandb[0m: 	num_leaves: 56
[34m[1mwandb[0m: 	ratio: 0.0635610878291495


[LightGBM] [Info] Number of positive: 1548687, number of negative: 816721
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.322962 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 22362
[LightGBM] [Info] Number of data points in the train set: 2365408, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654723 -> initscore=0.639865
[LightGBM] [Info] Start training from score 0.639865




Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[500]	training's auc: 0.866038	valid_1's auc: 0.856555
VALID AUC : 0.8565552723747469 ACC : 0.8011498118942622



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████
valid_1_auc,▁▂▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████████

0,1
accuracy,0.80115
auc,0.85656
iteration,499.0


writing prediction : output/auc:0.8565552723747469 acc:0.8011498118942622sweep lgbm.csv
                                      Feature  Importance
1                            assessmentItemID        5277
0                                      userID        4955
2                                      testId        3028
3                                KnowledgeTag        2929
14                            UserItemElapsed        1274
15                                    ItemAcc        1092
31                         categorize_ItemAcc         974
13                      CumulativeUserItemAcc         723
17           AverageItemSolvingTime_Incorrect         634
5                                 SolvingTime         598
21                                     TagAcc         486
20                      UserTagAvgSolvingTime         478
29                                    TestAcc         438
19  Difference_SolvingTime_AvgItemSolvingTime         408
6                              CumulativeT

[34m[1mwandb[0m: Agent Starting Run: qw4goncb with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9103855453050764
[34m[1mwandb[0m: 	bagging_freq: 17
[34m[1mwandb[0m: 	feature_fraction: 0.8970495583580029
[34m[1mwandb[0m: 	lambda_l1: 1.295434175104001
[34m[1mwandb[0m: 	lambda_l2: 0.41348683265101827
[34m[1mwandb[0m: 	learning_rate: 0.27184348401396524
[34m[1mwandb[0m: 	max_depth: 46
[34m[1mwandb[0m: 	min_data_in_leaf: 30
[34m[1mwandb[0m: 	num_leaves: 47
[34m[1mwandb[0m: 	ratio: 0.06682667332130611


[LightGBM] [Info] Number of positive: 1543130, number of negative: 814080
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.299848 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 22357
[LightGBM] [Info] Number of data points in the train set: 2357210, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654643 -> initscore=0.639509
[LightGBM] [Info] Start training from score 0.639509




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[138]	training's auc: 0.892323	valid_1's auc: 0.860699
VALID AUC : 0.8606993746757163 ACC : 0.8051568629774928



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
training_auc,▁▂▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████
valid_1_auc,▁▄▅▆▇▇▇▇▇▇██████████████████████████████

0,1
accuracy,0.80516
auc,0.8607
iteration,167.0


writing prediction : output/auc:0.8606993746757163 acc:0.8051568629774928sweep lgbm.csv
                                      Feature  Importance
0                                      userID        2493
1                            assessmentItemID        1984
2                                      testId         692
3                                KnowledgeTag         553
14                            UserItemElapsed         106
15                                    ItemAcc         106
13                      CumulativeUserItemAcc          66
17           AverageItemSolvingTime_Incorrect          44
20                      UserTagAvgSolvingTime          37
23        CumulativeUserTagExponentialAverage          32
5                                 SolvingTime          30
6                              CumulativeTime          27
19  Difference_SolvingTime_AvgItemSolvingTime          26
22                CumulativeUserTagAverageAcc          18
28                         PreviousItemAns

[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1ybyrz33 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.8479608235157476
[34m[1mwandb[0m: 	bagging_freq: 20
[34m[1mwandb[0m: 	feature_fraction: 0.8858975841531473
[34m[1mwandb[0m: 	lambda_l1: 1.46298535065012
[34m[1mwandb[0m: 	lambda_l2: 1.1133218565781031
[34m[1mwandb[0m: 	learning_rate: 0.23811578847952972
[34m[1mwandb[0m: 	max_depth: 34
[34m[1mwandb[0m: 	min_data_in_leaf: 59
[34m[1mwandb[0m: 	num_leaves: 50
[34m[1mwandb[0m: 	ratio: 0.051616795325350226


[LightGBM] [Info] Number of positive: 1568165, number of negative: 827439
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.087899 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22347
[LightGBM] [Info] Number of data points in the train set: 2395604, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654601 -> initscore=0.639326
[LightGBM] [Info] Start training from score 0.639326




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[222]	training's auc: 0.899915	valid_1's auc: 0.860782
VALID AUC : 0.8607815054327651 ACC : 0.8058180925494047



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▃▃▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████
valid_1_auc,▁▄▆▆▇▇▇▇████████████████████████████████

0,1
accuracy,0.80582
auc,0.86078
iteration,251.0


writing prediction : output/auc:0.8607815054327651 acc:0.8058180925494047sweep lgbm.csv
                                      Feature  Importance
0                                      userID        4083
1                            assessmentItemID        3489
3                                KnowledgeTag        1264
2                                      testId        1254
14                            UserItemElapsed         133
15                                    ItemAcc         127
13                      CumulativeUserItemAcc          79
17           AverageItemSolvingTime_Incorrect          54
20                      UserTagAvgSolvingTime          52
5                                 SolvingTime          40
19  Difference_SolvingTime_AvgItemSolvingTime          37
23        CumulativeUserTagExponentialAverage          36
6                              CumulativeTime          33
22                CumulativeUserTagAverageAcc          23
28                         PreviousItemAns

[34m[1mwandb[0m: Agent Starting Run: wsqsfo8g with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.99081469766256
[34m[1mwandb[0m: 	bagging_freq: 17
[34m[1mwandb[0m: 	feature_fraction: 0.8876633167040902
[34m[1mwandb[0m: 	lambda_l1: 2.5925022551098587
[34m[1mwandb[0m: 	lambda_l2: 1.4427437237214058
[34m[1mwandb[0m: 	learning_rate: 0.20487825794996875
[34m[1mwandb[0m: 	max_depth: 35
[34m[1mwandb[0m: 	min_data_in_leaf: 44
[34m[1mwandb[0m: 	num_leaves: 60
[34m[1mwandb[0m: 	ratio: 0.11815585743067208


[LightGBM] [Info] Number of positive: 1457976, number of negative: 769572
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.054915 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22372
[LightGBM] [Info] Number of data points in the train set: 2227548, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654521 -> initscore=0.638970
[LightGBM] [Info] Start training from score 0.638970




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[170]	training's auc: 0.900829	valid_1's auc: 0.86011
VALID AUC : 0.8601099656284955 ACC : 0.8047371384145198



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇███████
valid_1_auc,▁▃▅▆▆▇▇▇▇███████████████████████████████

0,1
accuracy,0.80474
auc,0.86011
iteration,199.0


writing prediction : output/auc:0.8601099656284955 acc:0.8047371384145198sweep lgbm.csv
                                      Feature  Importance
0                                      userID        3988
1                            assessmentItemID        3325
2                                      testId         934
3                                KnowledgeTag         831
14                            UserItemElapsed         160
15                                    ItemAcc         160
13                      CumulativeUserItemAcc         105
17           AverageItemSolvingTime_Incorrect          60
5                                 SolvingTime          59
20                      UserTagAvgSolvingTime          54
19  Difference_SolvingTime_AvgItemSolvingTime          50
23        CumulativeUserTagExponentialAverage          41
6                              CumulativeTime          39
28                         PreviousItemAnswer          27
29                                    Test

[34m[1mwandb[0m: Agent Starting Run: ny4mig3o with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.8207068278752112
[34m[1mwandb[0m: 	bagging_freq: 19
[34m[1mwandb[0m: 	feature_fraction: 0.916905396502421
[34m[1mwandb[0m: 	lambda_l1: 1.1903786440581177
[34m[1mwandb[0m: 	lambda_l2: 9.136507981728457
[34m[1mwandb[0m: 	learning_rate: 0.32619733391192457
[34m[1mwandb[0m: 	max_depth: 50
[34m[1mwandb[0m: 	min_data_in_leaf: 43
[34m[1mwandb[0m: 	num_leaves: 52
[34m[1mwandb[0m: 	ratio: 0.05265301587877898


[LightGBM] [Info] Number of positive: 1566534, number of negative: 826439
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.069965 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22371
[LightGBM] [Info] Number of data points in the train set: 2392973, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654639 -> initscore=0.639495
[LightGBM] [Info] Start training from score 0.639495




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[96]	training's auc: 0.888485	valid_1's auc: 0.857292
VALID AUC : 0.8572923065584267 ACC : 0.8018017340562328



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▃▃▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█████████
valid_1_auc,▁▄▅▆▇▇▇▇████████████████████████████████

0,1
accuracy,0.8018
auc,0.85729
iteration,125.0


writing prediction : output/auc:0.8572923065584267 acc:0.8018017340562328sweep lgbm.csv
                                      Feature  Importance
0                                      userID        1854
1                            assessmentItemID        1697
2                                      testId         439
3                                KnowledgeTag         418
15                                    ItemAcc          93
14                            UserItemElapsed          85
13                      CumulativeUserItemAcc          54
17           AverageItemSolvingTime_Incorrect          39
5                                 SolvingTime          30
20                      UserTagAvgSolvingTime          23
23        CumulativeUserTagExponentialAverage          23
19  Difference_SolvingTime_AvgItemSolvingTime          22
6                              CumulativeTime          15
22                CumulativeUserTagAverageAcc          12
31                         categorize_Item

[34m[1mwandb[0m: Agent Starting Run: ckwq8o36 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9476711244812354
[34m[1mwandb[0m: 	bagging_freq: 17
[34m[1mwandb[0m: 	feature_fraction: 0.6467375587253605
[34m[1mwandb[0m: 	lambda_l1: 1.0689336051404996
[34m[1mwandb[0m: 	lambda_l2: 2.7287555907673635
[34m[1mwandb[0m: 	learning_rate: 0.2926059893701045
[34m[1mwandb[0m: 	max_depth: 27
[34m[1mwandb[0m: 	min_data_in_leaf: 54
[34m[1mwandb[0m: 	num_leaves: 47
[34m[1mwandb[0m: 	ratio: 0.06101810501976772


[LightGBM] [Info] Number of positive: 1552549, number of negative: 819309
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.284306 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 22367
[LightGBM] [Info] Number of data points in the train set: 2371858, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654571 -> initscore=0.639192
[LightGBM] [Info] Start training from score 0.639192




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[119]	training's auc: 0.888865	valid_1's auc: 0.85908
VALID AUC : 0.8590796245453359 ACC : 0.8049487988163376



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▃▃▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█████████
valid_1_auc,▁▃▅▆▆▇▇▇▇▇██████████████████████████████

0,1
accuracy,0.80495
auc,0.85908
iteration,148.0


writing prediction : output/auc:0.8590796245453359 acc:0.8049487988163376sweep lgbm.csv
                                      Feature  Importance
0                                      userID        1786
1                            assessmentItemID        1627
2                                      testId         717
3                                KnowledgeTag         704
14                            UserItemElapsed         103
15                                    ItemAcc         100
13                      CumulativeUserItemAcc          53
17           AverageItemSolvingTime_Incorrect          47
20                      UserTagAvgSolvingTime          39
5                                 SolvingTime          33
6                              CumulativeTime          30
19  Difference_SolvingTime_AvgItemSolvingTime          28
31                         categorize_ItemAcc          24
22                CumulativeUserTagAverageAcc          23
23        CumulativeUserTagExponentialAver

[34m[1mwandb[0m: Agent Starting Run: xlc1rug7 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9696846544464476
[34m[1mwandb[0m: 	bagging_freq: 20
[34m[1mwandb[0m: 	feature_fraction: 0.7798791436397821
[34m[1mwandb[0m: 	lambda_l1: 3.903094969769966
[34m[1mwandb[0m: 	lambda_l2: 0.999188772806856
[34m[1mwandb[0m: 	learning_rate: 0.21302502026700937
[34m[1mwandb[0m: 	max_depth: 48
[34m[1mwandb[0m: 	min_data_in_leaf: 118
[34m[1mwandb[0m: 	num_leaves: 38
[34m[1mwandb[0m: 	ratio: 0.07823272419889231


[LightGBM] [Info] Number of positive: 1524384, number of negative: 803999
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.038865 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22366
[LightGBM] [Info] Number of data points in the train set: 2328383, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654696 -> initscore=0.639748
[LightGBM] [Info] Start training from score 0.639748




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[179]	training's auc: 0.888597	valid_1's auc: 0.861328
VALID AUC : 0.8613279774189803 ACC : 0.8047304034458149



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████
valid_1_auc,▁▃▅▅▆▇▇▇▇▇▇▇████████████████████████████

0,1
accuracy,0.80473
auc,0.86133
iteration,208.0


writing prediction : output/auc:0.8613279774189803 acc:0.8047304034458149sweep lgbm.csv
                                      Feature  Importance
0                                      userID        2435
1                            assessmentItemID        1978
2                                      testId         760
3                                KnowledgeTag         660
14                            UserItemElapsed         127
15                                    ItemAcc         126
13                      CumulativeUserItemAcc          66
17           AverageItemSolvingTime_Incorrect          56
5                                 SolvingTime          44
20                      UserTagAvgSolvingTime          42
23        CumulativeUserTagExponentialAverage          40
19  Difference_SolvingTime_AvgItemSolvingTime          33
6                              CumulativeTime          28
28                         PreviousItemAnswer          24
31                         categorize_Item

[34m[1mwandb[0m: Agent Starting Run: s7rz9jdo with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9386026546378394
[34m[1mwandb[0m: 	bagging_freq: 14
[34m[1mwandb[0m: 	feature_fraction: 0.9747956021713036
[34m[1mwandb[0m: 	lambda_l1: 0.35890116668879646
[34m[1mwandb[0m: 	lambda_l2: 1.9752328579285132
[34m[1mwandb[0m: 	learning_rate: 0.219882185869755
[34m[1mwandb[0m: 	max_depth: 20
[34m[1mwandb[0m: 	min_data_in_leaf: 117
[34m[1mwandb[0m: 	num_leaves: 56
[34m[1mwandb[0m: 	ratio: 0.09199742933169962


[LightGBM] [Info] Number of positive: 1501472, number of negative: 792124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.376581 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 22363
[LightGBM] [Info] Number of data points in the train set: 2293596, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654637 -> initscore=0.639483
[LightGBM] [Info] Start training from score 0.639483




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[108]	training's auc: 0.888372	valid_1's auc: 0.860793
VALID AUC : 0.8607931395818131 ACC : 0.8064985367533138



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▂▃▃▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇███████
valid_1_auc,▁▃▄▅▅▆▇▇▇▇▇▇▇▇██████████████████████████

0,1
accuracy,0.8065
auc,0.86079
iteration,137.0


writing prediction : output/auc:0.8607931395818131 acc:0.8064985367533138sweep lgbm.csv
                                      Feature  Importance
0                                      userID        2296
1                            assessmentItemID        1659
3                                KnowledgeTag         640
2                                      testId         579
15                                    ItemAcc         159
14                            UserItemElapsed         132
13                      CumulativeUserItemAcc          77
17           AverageItemSolvingTime_Incorrect          62
5                                 SolvingTime          44
20                      UserTagAvgSolvingTime          40
19  Difference_SolvingTime_AvgItemSolvingTime          33
6                              CumulativeTime          28
23        CumulativeUserTagExponentialAverage          28
29                                    TestAcc          22
28                         PreviousItemAns

[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7439d1e6 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.8990428172314074
[34m[1mwandb[0m: 	bagging_freq: 19
[34m[1mwandb[0m: 	feature_fraction: 0.8815684034044944
[34m[1mwandb[0m: 	lambda_l1: 9.269142087192792
[34m[1mwandb[0m: 	lambda_l2: 0.21591968107705695
[34m[1mwandb[0m: 	learning_rate: 0.10162567524383058
[34m[1mwandb[0m: 	max_depth: 27
[34m[1mwandb[0m: 	min_data_in_leaf: 111
[34m[1mwandb[0m: 	num_leaves: 53
[34m[1mwandb[0m: 	ratio: 0.05178328037917766


[LightGBM] [Info] Number of positive: 1567945, number of negative: 827208
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.042948 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22381
[LightGBM] [Info] Number of data points in the train set: 2395153, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654633 -> initscore=0.639465
[LightGBM] [Info] Start training from score 0.639465




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[359]	training's auc: 0.895382	valid_1's auc: 0.862006
VALID AUC : 0.862005565526212 ACC : 0.8059065923564445



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▂▃▃▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████
valid_1_auc,▁▃▄▅▆▆▇▇▇▇▇▇▇███████████████████████████

0,1
accuracy,0.80591
auc,0.86201
iteration,388.0


writing prediction : output/auc:0.862005565526212 acc:0.8059065923564445sweep lgbm.csv
                                      Feature  Importance
0                                      userID        7309
1                            assessmentItemID        5878
2                                      testId        1972
3                                KnowledgeTag        1686
15                                    ItemAcc         323
14                            UserItemElapsed         318
13                      CumulativeUserItemAcc         202
17           AverageItemSolvingTime_Incorrect         129
5                                 SolvingTime         106
20                      UserTagAvgSolvingTime          96
19  Difference_SolvingTime_AvgItemSolvingTime          81
23        CumulativeUserTagExponentialAverage          75
6                              CumulativeTime          68
28                         PreviousItemAnswer          54
29                                    TestA

[34m[1mwandb[0m: Agent Starting Run: ljr4jhuw with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9935470968594132
[34m[1mwandb[0m: 	bagging_freq: 20
[34m[1mwandb[0m: 	feature_fraction: 0.8756683713489131
[34m[1mwandb[0m: 	lambda_l1: 4.856867580374198
[34m[1mwandb[0m: 	lambda_l2: 7.717593069518097
[34m[1mwandb[0m: 	learning_rate: 0.04943206984889444
[34m[1mwandb[0m: 	max_depth: 46
[34m[1mwandb[0m: 	min_data_in_leaf: 118
[34m[1mwandb[0m: 	num_leaves: 58
[34m[1mwandb[0m: 	ratio: 0.06095707379497671
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


[LightGBM] [Info] Number of positive: 1552637, number of negative: 819395
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.562650 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 22360
[LightGBM] [Info] Number of data points in the train set: 2372032, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654560 -> initscore=0.639144
[LightGBM] [Info] Start training from score 0.639144




Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[491]	training's auc: 0.891018	valid_1's auc: 0.861959
VALID AUC : 0.8619594610121543 ACC : 0.8068917127933266



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▂▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇███████
valid_1_auc,▁▂▄▄▅▅▆▆▇▇▇▇▇▇▇▇▇███████████████████████

0,1
accuracy,0.80689
auc,0.86196
iteration,499.0


writing prediction : output/auc:0.8619594610121543 acc:0.8068917127933266sweep lgbm.csv
                                      Feature  Importance
0                                      userID       11527
1                            assessmentItemID        8587
2                                      testId        2545
3                                KnowledgeTag        1675
15                                    ItemAcc         732
14                            UserItemElapsed         628
13                      CumulativeUserItemAcc         356
17           AverageItemSolvingTime_Incorrect         275
5                                 SolvingTime         198
20                      UserTagAvgSolvingTime         173
19  Difference_SolvingTime_AvgItemSolvingTime         156
23        CumulativeUserTagExponentialAverage         128
6                              CumulativeTime         124
29                                    TestAcc         113
28                         PreviousItemAns

[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qeengcp8 with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9692568927987442
[34m[1mwandb[0m: 	bagging_freq: 20
[34m[1mwandb[0m: 	feature_fraction: 0.7535236179545897
[34m[1mwandb[0m: 	lambda_l1: 6.87430486978105
[34m[1mwandb[0m: 	lambda_l2: 5.337201202094897
[34m[1mwandb[0m: 	learning_rate: 0.37255863924998006
[34m[1mwandb[0m: 	max_depth: 30
[34m[1mwandb[0m: 	min_data_in_leaf: 107
[34m[1mwandb[0m: 	num_leaves: 55
[34m[1mwandb[0m: 	ratio: 0.063140596900105


[LightGBM] [Info] Number of positive: 1548933, number of negative: 817555
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.295390 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 22374
[LightGBM] [Info] Number of data points in the train set: 2366488, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654528 -> initscore=0.639003
[LightGBM] [Info] Start training from score 0.639003




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[80]	training's auc: 0.888959	valid_1's auc: 0.859465
VALID AUC : 0.8594654588662768 ACC : 0.8037725437078286



0,1
accuracy,▁
auc,▁
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_auc,▁▂▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇████████
valid_1_auc,▁▃▅▆▇▇▇▇████████████████████████████████

0,1
accuracy,0.80377
auc,0.85947
iteration,109.0


writing prediction : output/auc:0.8594654588662768 acc:0.8037725437078286sweep lgbm.csv
                                      Feature  Importance
0                                      userID        1504
1                            assessmentItemID        1495
2                                      testId         497
3                                KnowledgeTag         344
15                                    ItemAcc          88
14                            UserItemElapsed          75
17           AverageItemSolvingTime_Incorrect          37
13                      CumulativeUserItemAcc          33
20                      UserTagAvgSolvingTime          29
5                                 SolvingTime          28
23        CumulativeUserTagExponentialAverage          24
19  Difference_SolvingTime_AvgItemSolvingTime          21
6                              CumulativeTime          20
34           categorize_CumulativeUserItemAcc          14
28                         PreviousItemAns

[34m[1mwandb[0m: Agent Starting Run: 9e81xp8e with config:
[34m[1mwandb[0m: 	bagging_fraction: 0.9957896690405126
[34m[1mwandb[0m: 	bagging_freq: 18
[34m[1mwandb[0m: 	feature_fraction: 0.8718380319929926
[34m[1mwandb[0m: 	lambda_l1: 7.065866418019543
[34m[1mwandb[0m: 	lambda_l2: 2.473333069037198
[34m[1mwandb[0m: 	learning_rate: 0.12368323258148985
[34m[1mwandb[0m: 	max_depth: 33
[34m[1mwandb[0m: 	min_data_in_leaf: 119
[34m[1mwandb[0m: 	num_leaves: 54
[34m[1mwandb[0m: 	ratio: 0.06401774477020646


[LightGBM] [Info] Number of positive: 1547783, number of negative: 816498
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.055753 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22352
[LightGBM] [Info] Number of data points in the train set: 2364281, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.654653 -> initscore=0.639554
[LightGBM] [Info] Start training from score 0.639554




Training until validation scores don't improve for 30 rounds
