In [19]:
import os
import pickle
import json
import gc

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline

import seaborn as sns

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import ExtraTreeClassifier

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [4]:
from sklearn.model_selection import KFold, StratifiedKFold
import lightgbm as lgb
import xgboost as xgb
import catboost as cb

## 读入数据

在此次竞赛的数据集中，每个样本都对应一个葡萄牙大学的学生。原始数据集中共有4424名学生，Kaggle基于原始数据集生成了超过100,000个合成样本。对于每个学生，我们获得了人口统计数据、宏观经济数据以及课程前两个学期的表现。竞赛的目标是预测学生在三年或四年学习后的状态：是否毕业、仍在就读，或退学。

In [5]:
train_df = pd.read_csv('../input/train.csv', index_col=0)
test_df = pd.read_csv('../input/test.csv', index_col=0)

train_df.shape[0], test_df.shape[0]

(76518, 51012)

In [6]:
ori_all_feats = [col for col in train_df.columns if col != 'Target']

In [7]:
ori_cat_feats = [
    'Marital status',
    'Application mode',
    'Application order',
    'Course',
    'Daytime/evening attendance',
    'Previous qualification',
    'Nacionality',
    'Mother\'s qualification',
    'Father\'s qualification',
    'Mother\'s occupation',
    'Father\'s occupation',
    'Displaced',
    'Educational special needs',
    'Debtor',
    'Tuition fees up to date',
    'Gender',
    'Scholarship holder',
    'International',
]

ori_num_feats = [
    'Previous qualification (grade)',
    'Admission grade',
    'Age at enrollment',
    'Curricular units 1st sem (credited)',
    'Curricular units 1st sem (enrolled)',
    'Curricular units 1st sem (evaluations)',
    'Curricular units 1st sem (approved)',
    'Curricular units 1st sem (grade)',
    'Curricular units 1st sem (without evaluations)',
    'Curricular units 2nd sem (credited)',
    'Curricular units 2nd sem (enrolled)',
    'Curricular units 2nd sem (evaluations)',
    'Curricular units 2nd sem (approved)',
    'Curricular units 2nd sem (grade)',
    'Curricular units 2nd sem (without evaluations)',
    'Unemployment rate',
    'Inflation rate',
    'GDP',
]

In [8]:
set(ori_all_feats) == ( set(ori_cat_feats) | set(ori_num_feats) )

True

In [9]:
len(ori_all_feats), len(ori_cat_feats), len(ori_num_feats)

(36, 18, 18)

## FE(简单)

- Tree-based 模型不需要对特征进行诸如StandardScaler MinMaxScaler之类的缩放
- 类别特征需要做label-encode / one-hot-encode

**清除列名里的特殊字符，避免后续软件包里出错**

In [10]:
special_char = ["'", '/', ' ']
def normalize_feature_name(name):
    for c in special_char:
        name = name.replace(c, '_')
    name = name.replace('(', '').replace(')', '')
    return name

In [11]:
ori_all_feats = [normalize_feature_name(col) for col in ori_all_feats]
ori_cat_feats = [normalize_feature_name(col) for col in ori_cat_feats]
ori_num_feats = [normalize_feature_name(col) for col in ori_num_feats]

In [12]:
com_df = pd.concat([train_df, test_df], axis=0, ignore_index=False)

In [13]:
com_df.columns = com_df.columns.map(normalize_feature_name)

In [14]:
for col in ori_cat_feats:
    com_df[col] = LabelEncoder().fit_transform(com_df[col])

label2code = {
    'Graduate': 0,
    'Enrolled': 1,
    'Dropout': 2,
}

code2label = {v: v for k, v in label2code.items()}
com_df['Target'] = com_df['Target'].map(label2code)

In [15]:
train_df = com_df.loc[train_df.index].copy()
train_df['Target'] = train_df['Target'].astype(int)
test_df = com_df.loc[test_df.index]

In [16]:
train_df.shape[0], test_df.shape[0]

(76518, 51012)

In [20]:
del com_df
gc.collect()

407

## CV(交叉验证)

CV是一切试验的基础，有了CV才知道哪些优化是正向的，哪些是没用的

In [22]:
def lgb_cv(params, train_df, test_df, feat_cols, cat_feat_cols, target_col, stratified=False, nfold=5, num_boost_round=10000):
    if stratified:
        folds = StratifiedKFold(n_splits=nfold, shuffle=True, random_state=42)
    else:
        folds = KFold(n_splits=nfold, shuffle=True, random_state=42)
        
    target = train_df[target_col]

    oof = np.zeros((train_df.shape[0], 3), dtype=np.float64)
    pred = np.zeros((test_df.shape[0], 3), dtype=np.float64)

    for i, (trn_idx, val_idx) in enumerate(folds.split(train_df.index, train_df[target_col].astype(int))):
        print(f'fold={i}', '- ' * 20)
        trn_data = lgb.Dataset(train_df.loc[trn_idx, feat_cols], label=target.loc[trn_idx], categorical_feature=cat_feat_cols)
        val_data = lgb.Dataset(train_df.loc[val_idx, feat_cols], label=target.loc[val_idx], categorical_feature=cat_feat_cols)
    
        model = lgb.train(params, trn_data, num_boost_round, valid_sets=val_data, callbacks=[lgb.log_evaluation(200)])
    
        oof[val_idx] = model.predict(train_df.loc[val_idx, feat_cols], num_iteration=model.best_iteration)
        pred += model.predict(test_df[feat_cols], num_iteration=model.best_iteration) / nfold

    cv = accuracy_score(target, oof.argmax(axis=-1))
    return cv, oof, pred

In [54]:
params = {
    'num_threads': 16,
    'learning_rate': 0.01,
    'objective': 'multiclass',
    'num_class': 3,
    'num_leaves': 31,
    'min_data_in_leaf': 20,
    'bagging_freq': 1,
    'bagging_fraction': 0.8,
    'feature_fraction': 0.8,
    'metric': 'multi_error',
    'early_stopping_rounds': 400,
}

In [25]:
cv, oof, pred = lgb_cv(params, train_df, test_df, ori_all_feats, ori_cat_feats, 'Target')

fold=0 - - - - - - - - - - - - - - - - - - - - 
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006901 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1372
[LightGBM] [Info] Number of data points in the train set: 61214, number of used features: 36
[LightGBM] [Info] Start training from score -0.746287
[LightGBM] [Info] Start training from score -1.635907
[LightGBM] [Info] Start training from score -1.105333
Training until validation scores don't improve for 400 rounds
[200]	valid_0's multi_error: 0.172569
[400]	valid_0's multi_error: 0.167734
[600]	valid_0's multi_error: 0.166231
[800]	valid_0's multi_error: 0.166035
[1000]	valid_0's multi_error: 0.16499
[1200]	valid_0's multi_error: 0.164663
[1400]	valid_0's multi_error: 0.164597
Early stopping, best iteration is:
[1110]	valid_0's multi_error: 0.164532
fold=1 - - - - - - - - - - - - - 

In [26]:
print(cv)

0.8340259808149716


In [110]:
params = {
    'num_threads': 16,
    'learning_rate': 0.05,
    'objective': 'multiclass',
    'num_class': 3,
    'metric': 'multi_error',
    'early_stopping_rounds': 400,
}

In [111]:
cv, oof, pred = lgb_cv(params, train_df, test_df, ori_all_feats, ori_cat_feats, 'Target')

fold=0 - - - - - - - - - - - - - - - - - - - - 
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004497 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1372
[LightGBM] [Info] Number of data points in the train set: 61214, number of used features: 36
[LightGBM] [Info] Start training from score -0.746287
[LightGBM] [Info] Start training from score -1.635907
[LightGBM] [Info] Start training from score -1.105333
Training until validation scores don't improve for 400 rounds
[200]	valid_0's multi_error: 0.165708
[400]	valid_0's multi_error: 0.164597
[600]	valid_0's multi_error: 0.164401
[800]	valid_0's multi_error: 0.16512
Early stopping, best iteration is:
[444]	valid_0's multi_error: 0.163879
fold=1 - - - - - - - - - - - - - - - - - - - - 
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008189 seconds.

In [112]:
print(cv)

0.8332418515904755


## AutoGluon

In [27]:
from autogluon.core.metrics import make_scorer
from autogluon.tabular import TabularDataset, TabularPredictor

### hack

AutoGluon在计算 LightGBM 模型 metrics 时有bug，需要手动修复

In [1]:
import autogluon.tabular
print(autogluon.tabular.__file__)

/Users/jiazhuang/opt/miniconda3/envs/py3.10/lib/python3.10/site-packages/autogluon/tabular/__init__.py


将文件`/<miniconda按照目录>/envs/py38/lib/python3.8/site-packages/autogluon/tabular/models/lgb/lgb_utils.py`的41行替换为：
```python
    if problem_type in [REGRESSION, QUANTILE]:
        # TODO: Might not work for custom quantile metrics
        def function_template(y_hat, data):
            y_true = data.get_label()
            return metric.name, metric(y_true, y_hat), is_higher_better  

    elif needs_pred_proba:
```
重启 notebook kernel 生效！

### 单模型

In [29]:
# autogluon根据数据集的dtype，自动处理 数值/类别 特征
for col in ori_cat_feats:
    train_df[col] = train_df[col].astype('category')
    test_df[col] = test_df[col].astype('category')

#### LightGBM

In [31]:
hyperparameters = {
    'GBM':{}  # 只使用 LightGBM 模型，默参
}

In [32]:
predictor = TabularPredictor(
    label='Target',
    problem_type='multiclass',
    eval_metric='accuracy',
    verbosity=3,
).fit(
    train_df, 
    hyperparameters=hyperparameters,
    presets='medium_quality',
    num_bag_sets=1,
    num_bag_folds=5,
    num_stack_levels=0
)

No path specified. Models will be saved in: "AutogluonModels/ag-20240626_153736"
Presets specified: ['medium_quality']
User Specified kwargs:
{'auto_stack': False,
 'num_bag_folds': 5,
 'num_bag_sets': 1,
 'num_stack_levels': 0}
Full kwargs:
{'_feature_generator_kwargs': None,
 '_save_bag_folds': None,
 'ag_args': None,
 'ag_args_ensemble': None,
 'ag_args_fit': None,
 'auto_stack': False,
 'calibrate': 'auto',
 'ds_args': {'clean_up_fits': True,
             'detection_time_frac': 0.25,
             'holdout_data': None,
             'holdout_frac': 0.1111111111111111,
             'memory_safe_fits': True,
             'n_folds': 2,
             'n_repeats': 1,
             'validation_procedure': 'holdout'},
 'excluded_model_types': None,
 'feature_generator': 'auto',
 'feature_prune_kwargs': None,
 'holdout_frac': None,
 'hyperparameter_tune_kwargs': None,
 'included_model_types': None,
 'keep_only_best': False,
 'name_suffix': None,
 'num_bag_folds': 5,
 'num_bag_sets': 1,
 'num_s

In [33]:
predictor.leaderboard()

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,LightGBM_BAG_L1,0.832719,accuracy,0.777771,244.744118,0.777771,244.744118,1,True,1
1,WeightedEnsemble_L2,0.832719,accuracy,0.786112,244.761742,0.008341,0.017624,2,True,2


#### LightGBM 手动设置超参

In [56]:
params = {
    'num_threads': 12,
    'learning_rate': 0.01,
    'objective': 'multiclass',
    'num_class': 3,
    'num_leaves': 31,
    'min_data_in_leaf': 20,
    'bagging_freq': 1,
    'bagging_fraction': 0.8,
    'feature_fraction': 0.8,
    'metric': 'multi_error',
    'early_stopping_rounds': 400,
}

In [57]:
hyperparameters = {
    'GBM': params  # 只使用 LightGBM 模型，默参
}

In [58]:
predictor = TabularPredictor(
    label='Target',
    problem_type='multiclass',
    eval_metric='accuracy',
    verbosity=3,
).fit(
    train_df, 
    hyperparameters=hyperparameters,
    presets='medium_quality',
    num_bag_sets=1,
    num_bag_folds=5,
    num_stack_levels=0
)

No path specified. Models will be saved in: "AutogluonModels/ag-20240627_134523"
Presets specified: ['medium_quality']
User Specified kwargs:
{'auto_stack': False,
 'num_bag_folds': 5,
 'num_bag_sets': 1,
 'num_stack_levels': 0}
Full kwargs:
{'_feature_generator_kwargs': None,
 '_save_bag_folds': None,
 'ag_args': None,
 'ag_args_ensemble': None,
 'ag_args_fit': None,
 'auto_stack': False,
 'calibrate': 'auto',
 'ds_args': {'clean_up_fits': True,
             'detection_time_frac': 0.25,
             'holdout_data': None,
             'holdout_frac': 0.1111111111111111,
             'memory_safe_fits': True,
             'n_folds': 2,
             'n_repeats': 1,
             'validation_procedure': 'holdout'},
 'excluded_model_types': None,
 'feature_generator': 'auto',
 'feature_prune_kwargs': None,
 'holdout_frac': None,
 'hyperparameter_tune_kwargs': None,
 'included_model_types': None,
 'keep_only_best': False,
 'name_suffix': None,
 'num_bag_folds': 5,
 'num_bag_sets': 1,
 'num_s

In [59]:
predictor.leaderboard(extra_info=True)

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,LightGBM_BAG_L1,0.833033,accuracy,3.08669,30.892312,3.08669,30.892312,1,True,1
1,WeightedEnsemble_L2,0.833033,accuracy,3.094825,30.90641,0.008135,0.014097,2,True,2


In [96]:
lb_df = predictor.leaderboard(extra_info=True)
lb_df

Loading: AutogluonModels/ag-20240627_134523/models/LightGBM_BAG_L1/info.pkl
Loading: AutogluonModels/ag-20240627_134523/models/LightGBM_BAG_L1/model.pkl
Loading: AutogluonModels/ag-20240627_134523/models/LightGBM_BAG_L1/S1F5/info.pkl
Loading: AutogluonModels/ag-20240627_134523/models/LightGBM_BAG_L1/S1F5/model.pkl
Loading: AutogluonModels/ag-20240627_134523/models/LightGBM_BAG_L1/S1F1/info.pkl
Loading: AutogluonModels/ag-20240627_134523/models/LightGBM_BAG_L1/S1F1/model.pkl
Loading: AutogluonModels/ag-20240627_134523/models/LightGBM_BAG_L1/S1F4/info.pkl
Loading: AutogluonModels/ag-20240627_134523/models/LightGBM_BAG_L1/S1F4/model.pkl
Loading: AutogluonModels/ag-20240627_134523/models/LightGBM_BAG_L1/S1F2/info.pkl
Loading: AutogluonModels/ag-20240627_134523/models/LightGBM_BAG_L1/S1F2/model.pkl
Loading: AutogluonModels/ag-20240627_134523/models/LightGBM_BAG_L1/S1F3/info.pkl
Loading: AutogluonModels/ag-20240627_134523/models/LightGBM_BAG_L1/S1F3/model.pkl
Loading: AutogluonModels/ag-2024

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order,...,hyperparameters,hyperparameters_fit,ag_args_fit,features,compile_time,child_hyperparameters,child_hyperparameters_fit,child_ag_args_fit,ancestors,descendants
0,LightGBM_BAG_L1,0.833033,accuracy,3.08669,30.892312,3.08669,30.892312,1,True,1,...,"{'use_orig_features': True, 'max_base_models':...",{},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...","[Curricular_units_1st_sem_evaluations, Scholar...",,"{'learning_rate': 0.01, 'num_threads': 12, 'ob...",{'num_boost_round': 1044},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...",[],[WeightedEnsemble_L2]
1,WeightedEnsemble_L2,0.833033,accuracy,3.094825,30.90641,0.008135,0.014097,2,True,2,...,"{'use_orig_features': False, 'max_base_models'...",{},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...","[LightGBM_BAG_L1_1, LightGBM_BAG_L1_2, LightGB...",,"{'ensemble_size': 25, 'subsample_size': 1000000}",{'ensemble_size': 1},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...",[LightGBM_BAG_L1],[]


In [102]:
lb_df.iloc[0].child_hyperparameters

{'learning_rate': 0.01,
 'num_threads': 12,
 'objective': 'multiclass',
 'num_class': 3,
 'num_leaves': 31,
 'min_data_in_leaf': 20,
 'bagging_freq': 1,
 'bagging_fraction': 0.8,
 'feature_fraction': 0.8,
 'metric': 'multi_error',
 'early_stopping_rounds': 400}

In [103]:
lb_df.iloc[0].child_hyperparameters_fit

{'num_boost_round': 1044}

#### LightGBM 调参

In [113]:
from autogluon.common import space

In [134]:
params = {
    # 'num_threads': 12,
    'learning_rate': space.Real(1e-3, 5e-1, default=5e-2, log=True),
    'objective': 'multiclass',
    'num_class': 3,
    'num_leaves': space.Int(lower=2, upper=100, default=31),
    'min_data_in_leaf': space.Int(lower=10, upper=100, default=20),
    'bagging_freq': 1,
    'bagging_fraction': space.Real(0.0, 1.0, default=0.8),
    'feature_fraction': space.Real(0.0, 1.0, default=0.8),
    'metric': 'multi_error',
    'early_stopping_rounds': 400,
}

In [135]:
hyperparameters = {
    'GBM': params
}

In [136]:
hyperparameter_tune_kwargs = {
    'num_trials': 100,
    'searcher': 'auto',
    'scheduler' : 'local',
}

In [137]:
predictor = TabularPredictor(
    label='Target',
    problem_type='multiclass',
    eval_metric='accuracy',
    verbosity=3,
).fit(
    train_df, 
    hyperparameters=hyperparameters,
    hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,
    num_stack_levels=0,
    holdout_frac=0.3,
)

No path specified. Models will be saved in: "AutogluonModels/ag-20240628_021311"
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='best_quality'   : Maximize accuracy. Default time_limit=3600.
	presets='high_quality'   : Strong accuracy with fast inference speed. Default time_limit=3600.
	presets='good_quality'   : Good accuracy with very fast inference speed. Default time_limit=3600.
	presets='medium_quality' : Fast training time, ideal for initial prototyping.
User Specified kwargs:
{'holdout_frac': 0.3,
 'hyperparameter_tune_kwargs': {'num_trials': 100,
                                'scheduler': 'local',
                                'searcher': 'auto'},
 'num_stack_levels': 0}
Full kwargs:
{'_feature_generator_kwargs': None,
 '_save_bag_folds': None,
 'ag_args': None,
 'ag_args_en

  0%|          | 0/100 [00:00<?, ?it/s]

Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.05, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 31, 'min_data_in_leaf': 20, 'bagging_freq': 1, 'bagging_fraction': 0.8, 'feature_fraction': 0.8, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T1/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.042348718392737926, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 11, 'min_data_in_leaf': 77, 'bagging_freq': 1, 'bagging_fraction': 0.5488135039273248, 'feature_fraction': 0.7151893663724195, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag

[1000]	valid_set's multi_error: 0.166972


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T3/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.1556033839305467, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 90, 'min_data_in_leaf': 56, 'bagging_freq': 1, 'bagging_fraction': 0.2726562945801132, 'feature_fraction': 0.47766511732134986, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T4/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.008139957109567855, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 82, 'min_data_in_leaf': 30, 'bagging_freq': 1, 'bagging_fraction': 0.3927847961008297, 'fea

[1000]	valid_set's multi_error: 0.167233


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T5/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.22291366459641138, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 90, 'min_data_in_leaf': 92, 'bagging_freq': 1, 'bagging_fraction': 0.832619845547938, 'feature_fraction': 0.7781567509498505, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T6/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.06796346121798984, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 34, 'min_data_in_leaf': 49, 'bagging_freq': 1, 'bagging_fraction': 0.8009107519796442, 'featu

[1000]	valid_set's multi_error: 0.177862
[2000]	valid_set's multi_error: 0.174116


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T9/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.044877431780743424, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 81, 'min_data_in_leaf': 27, 'bagging_freq': 1, 'bagging_fraction': 0.018789800436355142, 'feature_fraction': 0.6176354970758771, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T10/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.04514814213154827, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 67, 'min_data_in_leaf': 11, 'bagging_freq': 1, 'bagging_fraction': 0.9025984755294046, '

[1000]	valid_set's multi_error: 0.167538
[2000]	valid_set's multi_error: 0.165578


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T16/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.0010880013579881585, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 38, 'min_data_in_leaf': 60, 'bagging_freq': 1, 'bagging_fraction': 0.9495710534507421, 'feature_fraction': 0.6625268669500443, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T17/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.018402445310508105, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 23, 'min_data_in_leaf': 87, 'bagging_freq': 1, 'bagging_fraction': 0.09609840789396307,

[1000]	valid_set's multi_error: 0.214541
[2000]	valid_set's multi_error: 0.193021
[3000]	valid_set's multi_error: 0.185137
[4000]	valid_set's multi_error: 0.181783
[5000]	valid_set's multi_error: 0.1793
[6000]	valid_set's multi_error: 0.177296
[7000]	valid_set's multi_error: 0.176076
[8000]	valid_set's multi_error: 0.174377
[9000]	valid_set's multi_error: 0.17368
[10000]	valid_set's multi_error: 0.172852


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T19/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.23789100148510528, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 84, 'min_data_in_leaf': 77, 'bagging_freq': 1, 'bagging_fraction': 0.4808935308361628, 'feature_fraction': 0.6886611828057704, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T20/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.023642421622859816, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 43, 'min_data_in_leaf': 24, 'bagging_freq': 1, 'bagging_fraction': 0.5651888666048753, 'f

[1000]	valid_set's multi_error: 0.186008
[2000]	valid_set's multi_error: 0.180563
[3000]	valid_set's multi_error: 0.175945
[4000]	valid_set's multi_error: 0.173506


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T22/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.006040854339990919, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 48, 'min_data_in_leaf': 29, 'bagging_freq': 1, 'bagging_fraction': 0.13179786240439217, 'feature_fraction': 0.7163272041185655, 'metric': 'multi_error', 'early_stopping_rounds': 400}


[1000]	valid_set's multi_error: 0.169629


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T23/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.17269679360003765, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 26, 'min_data_in_leaf': 40, 'bagging_freq': 1, 'bagging_fraction': 0.5865129348100832, 'feature_fraction': 0.020107546187493552, 'metric': 'multi_error', 'early_stopping_rounds': 400}


[1000]	valid_set's multi_error: 0.171023


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T24/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.0964420899238991, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 42, 'min_data_in_leaf': 23, 'bagging_freq': 1, 'bagging_fraction': 0.6778165367962301, 'feature_fraction': 0.27000797319216485, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T25/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.2620479283097813, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 69, 'min_data_in_leaf': 62, 'bagging_freq': 1, 'bagging_fraction': 0.7561066938650409, 'fea

[1000]	valid_set's multi_error: 0.170239
[2000]	valid_set's multi_error: 0.167407
[3000]	valid_set's multi_error: 0.167407


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T31/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.0017754741633347207, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 97, 'min_data_in_leaf': 11, 'bagging_freq': 1, 'bagging_fraction': 0.878452190276042, 'feature_fraction': 0.10286335869343821, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T32/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.007351262416410615, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 5, 'min_data_in_leaf': 92, 'bagging_freq': 1, 'bagging_fraction': 0.033625093498832026,

[1000]	valid_set's multi_error: 0.176947
[2000]	valid_set's multi_error: 0.174334


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T33/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.2554314574235273, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 71, 'min_data_in_leaf': 71, 'bagging_freq': 1, 'bagging_fraction': 0.36756187004789653, 'feature_fraction': 0.4358649252656268, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T34/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.30314889811793405, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 56, 'min_data_in_leaf': 20, 'bagging_freq': 1, 'bagging_fraction': 0.7038885835403663, 'fe

[1000]	valid_set's multi_error: 0.197508
[2000]	valid_set's multi_error: 0.184222
[3000]	valid_set's multi_error: 0.179387
[4000]	valid_set's multi_error: 0.176991
[5000]	valid_set's multi_error: 0.175815
[6000]	valid_set's multi_error: 0.173637
[7000]	valid_set's multi_error: 0.172547


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T40/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.0014017049293645642, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 2, 'min_data_in_leaf': 46, 'bagging_freq': 1, 'bagging_fraction': 0.17162967726144052, 'feature_fraction': 0.5210366062041293, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T41/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.008552514657227738, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 64, 'min_data_in_leaf': 90, 'bagging_freq': 1, 'bagging_fraction': 0.7936977033574206, 

[1000]	valid_set's multi_error: 0.171589
[2000]	valid_set's multi_error: 0.167538


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T42/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.0027829447752274524, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 38, 'min_data_in_leaf': 50, 'bagging_freq': 1, 'bagging_fraction': 0.7044144019235328, 'feature_fraction': 0.03183892953130785, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T43/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.06805267512143187, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 31, 'min_data_in_leaf': 40, 'bagging_freq': 1, 'bagging_fraction': 0.9923963988886326, 

[1000]	valid_set's multi_error: 0.166667


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T45/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.01660461170757197, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 41, 'min_data_in_leaf': 95, 'bagging_freq': 1, 'bagging_fraction': 0.21874937373677183, 'feature_fraction': 0.569573534574738, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T46/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.02073286553547695, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 20, 'min_data_in_leaf': 81, 'bagging_freq': 1, 'bagging_fraction': 0.08529556585870057, 'f

[1000]	valid_set's multi_error: 0.180998
[2000]	valid_set's multi_error: 0.173767


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T47/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.029119768707609014, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 23, 'min_data_in_leaf': 56, 'bagging_freq': 1, 'bagging_fraction': 0.9764043865930425, 'feature_fraction': 0.6176579160958802, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T48/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.06720829203139939, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 83, 'min_data_in_leaf': 38, 'bagging_freq': 1, 'bagging_fraction': 0.7438345453097878, 'f

[1000]	valid_set's multi_error: 0.169063


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T54/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.018760453797426507, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 9, 'min_data_in_leaf': 93, 'bagging_freq': 1, 'bagging_fraction': 0.3136923925080297, 'feature_fraction': 0.9574508556589537, 'metric': 'multi_error', 'early_stopping_rounds': 400}


[1000]	valid_set's multi_error: 0.168671


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T55/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.017831019095854835, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 81, 'min_data_in_leaf': 89, 'bagging_freq': 1, 'bagging_fraction': 0.42468546875150626, 'feature_fraction': 0.37416998033422555, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T56/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.002075940748858884, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 76, 'min_data_in_leaf': 36, 'bagging_freq': 1, 'bagging_fraction': 0.5867843464581688,

[1000]	valid_set's multi_error: 0.167277


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T59/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.001679401941590302, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 82, 'min_data_in_leaf': 69, 'bagging_freq': 1, 'bagging_fraction': 0.7486636198505473, 'feature_fraction': 0.9037197397459334, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T60/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.00594805664341383, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 46, 'min_data_in_leaf': 83, 'bagging_freq': 1, 'bagging_fraction': 0.04561463715088643, '

[1000]	valid_set's multi_error: 0.181173
[2000]	valid_set's multi_error: 0.175597
[3000]	valid_set's multi_error: 0.171807
[4000]	valid_set's multi_error: 0.170761


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T61/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.0010591041777891583, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 9, 'min_data_in_leaf': 23, 'bagging_freq': 1, 'bagging_fraction': 0.06303828966937675, 'feature_fraction': 0.5556492427701452, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T62/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.0014880356802293828, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 8, 'min_data_in_leaf': 18, 'bagging_freq': 1, 'bagging_fraction': 0.2817301057539491, 

[1000]	valid_set's multi_error: 0.16915
[2000]	valid_set's multi_error: 0.167277


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T64/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.0853646571183463, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 89, 'min_data_in_leaf': 59, 'bagging_freq': 1, 'bagging_fraction': 0.8788697417774708, 'feature_fraction': 0.4922682415051587, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T65/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.1955569211746371, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 25, 'min_data_in_leaf': 67, 'bagging_freq': 1, 'bagging_fraction': 0.874287966624947, 'featu

[1000]	valid_set's multi_error: 0.189449
[2000]	valid_set's multi_error: 0.180911
[3000]	valid_set's multi_error: 0.17808


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T67/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.053200674728298494, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 85, 'min_data_in_leaf': 10, 'bagging_freq': 1, 'bagging_fraction': 0.47837030703998806, 'feature_fraction': 0.4973913654986627, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T68/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.0032538222127483194, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 44, 'min_data_in_leaf': 13, 'bagging_freq': 1, 'bagging_fraction': 0.13690027168559893

[1000]	valid_set's multi_error: 0.171763
[2000]	valid_set's multi_error: 0.168583


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T69/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.034378171013130794, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 12, 'min_data_in_leaf': 57, 'bagging_freq': 1, 'bagging_fraction': 0.9806996740240193, 'feature_fraction': 0.40612049298131103, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T70/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.009325299267022786, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 22, 'min_data_in_leaf': 41, 'bagging_freq': 1, 'bagging_fraction': 0.7888727777194641, 

[1000]	valid_set's multi_error: 0.168017


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T71/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.316097394561388, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 78, 'min_data_in_leaf': 93, 'bagging_freq': 1, 'bagging_fraction': 0.30183087592751656, 'feature_fraction': 0.7752197774666111, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T72/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.02753010535434901, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 14, 'min_data_in_leaf': 13, 'bagging_freq': 1, 'bagging_fraction': 0.9528706723868426, 'fea

[1000]	valid_set's multi_error: 0.180737
[2000]	valid_set's multi_error: 0.17368
[3000]	valid_set's multi_error: 0.172112


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T73/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.06734637252882937, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 60, 'min_data_in_leaf': 34, 'bagging_freq': 1, 'bagging_fraction': 0.882859998043791, 'feature_fraction': 0.25062272019388543, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T74/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.17340897959446988, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 62, 'min_data_in_leaf': 50, 'bagging_freq': 1, 'bagging_fraction': 0.4321480499248166, 'fe

[1000]	valid_set's multi_error: 0.171894
[2000]	valid_set's multi_error: 0.168409


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T83/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.0024933663413912073, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 64, 'min_data_in_leaf': 46, 'bagging_freq': 1, 'bagging_fraction': 0.5392235974137527, 'feature_fraction': 0.32568476641681543, 'metric': 'multi_error', 'early_stopping_rounds': 400}


[1000]	valid_set's multi_error: 0.173941
[2000]	valid_set's multi_error: 0.169847


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T84/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.017107644685186413, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 100, 'min_data_in_leaf': 97, 'bagging_freq': 1, 'bagging_fraction': 0.903983954928237, 'feature_fraction': 0.5438059500773263, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T85/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.011938709209716283, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 78, 'min_data_in_leaf': 32, 'bagging_freq': 1, 'bagging_fraction': 0.45860396176858587, 

[1000]	valid_set's multi_error: 0.168235


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T86/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.007664918362139111, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 63, 'min_data_in_leaf': 90, 'bagging_freq': 1, 'bagging_fraction': 0.6900250201912274, 'feature_fraction': 0.6996220542505167, 'metric': 'multi_error', 'early_stopping_rounds': 400}


[1000]	valid_set's multi_error: 0.167712


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T87/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.0027119986338300403, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 15, 'min_data_in_leaf': 85, 'bagging_freq': 1, 'bagging_fraction': 0.6360610554471413, 'feature_fraction': 0.24002027337970955, 'metric': 'multi_error', 'early_stopping_rounds': 400}


[1000]	valid_set's multi_error: 0.179169
[2000]	valid_set's multi_error: 0.172809
[3000]	valid_set's multi_error: 0.170021


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T88/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.039359405970602106, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 43, 'min_data_in_leaf': 25, 'bagging_freq': 1, 'bagging_fraction': 0.9591666030352225, 'feature_fraction': 0.45813882726004285, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T89/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.03580429414221614, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 43, 'min_data_in_leaf': 75, 'bagging_freq': 1, 'bagging_fraction': 0.45722345335385706, 

[1000]	valid_set's multi_error: 0.169324


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T91/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.004990919179076706, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 98, 'min_data_in_leaf': 22, 'bagging_freq': 1, 'bagging_fraction': 0.0627129520233457, 'feature_fraction': 0.42403225188984195, 'metric': 'multi_error', 'early_stopping_rounds': 400}


[1000]	valid_set's multi_error: 0.171023
[2000]	valid_set's multi_error: 0.167799


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T92/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.009101853950900013, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 92, 'min_data_in_leaf': 20, 'bagging_freq': 1, 'bagging_fraction': 0.03330462654669619, 'feature_fraction': 0.9589827218634736, 'metric': 'multi_error', 'early_stopping_rounds': 400}


[1000]	valid_set's multi_error: 0.171458


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T93/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.028014889211792477, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 36, 'min_data_in_leaf': 98, 'bagging_freq': 1, 'bagging_fraction': 0.5318491770716929, 'feature_fraction': 0.8453576818202398, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T94/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.0017716571983676167, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 90, 'min_data_in_leaf': 93, 'bagging_freq': 1, 'bagging_fraction': 0.609177581558388, '

[1000]	valid_set's multi_error: 0.188752
[2000]	valid_set's multi_error: 0.180476


Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T99/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_train.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/dataset_val.pkl
	Fitting 10000 rounds... Hyperparameters: {'learning_rate': 0.1459256771866733, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 80, 'min_data_in_leaf': 64, 'bagging_freq': 1, 'bagging_fraction': 0.3681024019815341, 'feature_fraction': 0.3127532957735243, 'metric': 'multi_error', 'early_stopping_rounds': 400}
Saving AutogluonModels/ag-20240628_021311/models/LightGBM/T100/model.pkl
Time for LightGBM model HPO: 1451.0054309368134
Best hyperparameter configuration for LightGBM model: 
{'learning_rate': 0.029500844112350745, 'objective': 'multiclass', 'num_class': 3, 'num_leaves': 56, 'min_data_in_leaf': 83, 'bagging_freq': 1, 'bagging_fraction': 0.5130748631715455, 'feature_fraction': 0.6498319735234533, 'metric': 'multi_error', 'early_stopping_rounds': 400}


In [138]:
lb_df = predictor.leaderboard(extra_info=True)
lb_df

Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/T1/info.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/T1/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/T2/info.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/T2/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/T3/info.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/T3/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/T4/info.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/T4/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/T5/info.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/T5/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/T6/info.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/T6/model.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGBM/T7/info.pkl
Loading: AutogluonModels/ag-20240628_021311/models/LightGB

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order,...,hyperparameters,hyperparameters_fit,ag_args_fit,features,compile_time,child_hyperparameters,child_hyperparameters_fit,child_ag_args_fit,ancestors,descendants
0,LightGBM/T97,0.834509,accuracy,0.127338,9.959136,0.127338,9.959136,1,True,97,...,"{'learning_rate': 0.029500844112350745, 'objec...",{'num_boost_round': 321},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...","[Daytime_evening_attendance, Previous_qualific...",,,,,[],[WeightedEnsemble_L2]
1,WeightedEnsemble_L2,0.834509,accuracy,0.130377,10.366061,0.003039,0.406924,2,True,101,...,"{'use_orig_features': False, 'max_base_models'...",{},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...","[LightGBM/T97_2, LightGBM/T97_1, LightGBM/T97_0]",,"{'ensemble_size': 25, 'subsample_size': 1000000}",{'ensemble_size': 1},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...",[LightGBM/T97],[]
2,LightGBM/T16,0.834422,accuracy,0.523794,35.213979,0.523794,35.213979,1,True,16,...,"{'learning_rate': 0.0071798932488217025, 'obje...",{'num_boost_round': 1657},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...","[Daytime_evening_attendance, Previous_qualific...",,,,,[],[]
3,LightGBM/T85,0.834292,accuracy,0.204002,21.573351,0.204002,21.573351,1,True,85,...,"{'learning_rate': 0.017107644685186413, 'objec...",{'num_boost_round': 465},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...","[Daytime_evening_attendance, Previous_qualific...",,,,,[],[]
4,LightGBM/T45,0.834292,accuracy,0.542179,44.468923,0.542179,44.468923,1,True,45,...,"{'learning_rate': 0.011879186536249427, 'objec...",{'num_boost_round': 1423},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...","[Daytime_evening_attendance, Previous_qualific...",,,,,[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,LightGBM/T38,0.474168,accuracy,0.016324,1.315640,0.016324,1.315640,1,True,38,...,"{'learning_rate': 0.0015370204507549887, 'obje...",{'num_boost_round': 1},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...","[Daytime_evening_attendance, Previous_qualific...",,,,,[],[]
97,LightGBM/T57,0.474168,accuracy,0.016430,2.068895,0.016430,2.068895,1,True,57,...,"{'learning_rate': 0.002075940748858884, 'objec...",{'num_boost_round': 1},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...","[Daytime_evening_attendance, Previous_qualific...",,,,,[],[]
98,LightGBM/T14,0.474168,accuracy,0.016719,2.116223,0.016719,2.116223,1,True,14,...,"{'learning_rate': 0.0018854537484233854, 'obje...",{'num_boost_round': 1},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...","[Daytime_evening_attendance, Previous_qualific...",,,,,[],[]
99,LightGBM/T17,0.474168,accuracy,0.017531,1.585774,0.017531,1.585774,1,True,17,...,"{'learning_rate': 0.0010880013579881585, 'obje...",{'num_boost_round': 1},"{'max_memory_usage_ratio': 1.0, 'max_time_limi...","[Daytime_evening_attendance, Previous_qualific...",,,,,[],[]


In [141]:
lb_df.iloc[0].hyperparameters

{'learning_rate': 0.029500844112350745,
 'objective': 'multiclass',
 'num_class': 3,
 'num_leaves': 56,
 'min_data_in_leaf': 83,
 'bagging_freq': 1,
 'bagging_fraction': 0.5130748631715455,
 'feature_fraction': 0.6498319735234533,
 'metric': 'multi_error',
 'early_stopping_rounds': 400}

In [142]:
lb_df.iloc[0].hyperparameters_fit

{'num_boost_round': 321}

#### 搜索到的参数在5-fold交叉验证上的指标

In [143]:
hyperparameters = {
    'GBM': lb_df.iloc[0].hyperparameters
}

In [144]:
hyperparameters

{'GBM': {'learning_rate': 0.029500844112350745,
  'objective': 'multiclass',
  'num_class': 3,
  'num_leaves': 56,
  'min_data_in_leaf': 83,
  'bagging_freq': 1,
  'bagging_fraction': 0.5130748631715455,
  'feature_fraction': 0.6498319735234533,
  'metric': 'multi_error',
  'early_stopping_rounds': 400}}

In [145]:
predictor = TabularPredictor(
    label='Target',
    problem_type='multiclass',
    eval_metric='accuracy',
    verbosity=2,
).fit(
    train_df, 
    hyperparameters=hyperparameters,
    num_bag_sets=1,
    num_bag_folds=5,
    num_stack_levels=0
)

No path specified. Models will be saved in: "AutogluonModels/ag-20240628_061312"
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='best_quality'   : Maximize accuracy. Default time_limit=3600.
	presets='high_quality'   : Strong accuracy with fast inference speed. Default time_limit=3600.
	presets='good_quality'   : Good accuracy with very fast inference speed. Default time_limit=3600.
	presets='medium_quality' : Fast training time, ideal for initial prototyping.
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240628_061312"
AutoGluon Version:  1.1.0
Python Version:     3.8.0
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Sun Jan 19 18:21:42 CST 2020
CPU Count:          64
Memory Avail:       92.97 GB / 251.62 GB (36.9%)

In [146]:
predictor.leaderboard()

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,LightGBM_BAG_L1,0.833294,accuracy,0.999749,9.922045,0.999749,9.922045,1,True,1
1,WeightedEnsemble_L2,0.833294,accuracy,1.00786,9.934711,0.008111,0.012666,2,True,2


### 神经网络模型

In [154]:
tp = TabularPredictor(verbosity=3, label='Target')
tp.fit?

No path specified. Models will be saved in: "AutogluonModels/ag-20240628_084914"


[0;31mSignature:[0m
[0mtp[0m[0;34m.[0m[0mfit[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mtrain_data[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtuning_data[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtime_limit[0m[0;34m:[0m [0;34m'float'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpresets[0m[0;34m:[0m [0;34m'List[str] | str'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mhyperparameters[0m[0;34m:[0m [0;34m'dict | str'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfeature_metadata[0m[0;34m=[0m[0;34m'infer'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minfer_limit[0m[0;34m:[0m [0;34m'float'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minfer_limit_batch_size[0m[0;34m:[0m [0;34m'int'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfit_weighted_ensemble[0m[0;34m:[0m [0;34m'bool'[0

In [155]:
hyperparameters = {
    'GBM': {},
    'NN_TORCH': {},
    'FASTAI': {},
}

In [156]:
predictor = TabularPredictor(
    label='Target',
    problem_type='multiclass',
    eval_metric='accuracy',
    verbosity=2,
).fit(
    train_df, 
    hyperparameters=hyperparameters,
    num_bag_sets=1,
    num_bag_folds=5,
    num_stack_levels=0
)

No path specified. Models will be saved in: "AutogluonModels/ag-20240628_085151"
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='best_quality'   : Maximize accuracy. Default time_limit=3600.
	presets='high_quality'   : Strong accuracy with fast inference speed. Default time_limit=3600.
	presets='good_quality'   : Good accuracy with very fast inference speed. Default time_limit=3600.
	presets='medium_quality' : Fast training time, ideal for initial prototyping.
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240628_085151"
AutoGluon Version:  1.1.0
Python Version:     3.8.0
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Sun Jan 19 18:21:42 CST 2020
CPU Count:          64
Memory Avail:       92.69 GB / 251.62 GB (36.8%)

In [157]:
predictor.leaderboard()

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,LightGBM_BAG_L1,0.832719,accuracy,0.725317,7.870559,0.725317,7.870559,1,True,2
1,WeightedEnsemble_L2,0.832719,accuracy,0.733454,8.5956,0.008137,0.72504,2,True,4
2,NeuralNetFastAI_BAG_L1,0.82693,accuracy,1.618734,122.150313,1.618734,122.150313,1,True,1
3,NeuralNetTorch_BAG_L1,0.825871,accuracy,0.625731,125.676896,0.625731,125.676896,1,True,3


## OpenFE

In [2]:
import openfe
print(openfe.__file__)

/Users/jiazhuang/opt/miniconda3/envs/py3.10/lib/python3.10/site-packages/openfe/__init__.py


修改 /<miniconda安装目录>/envs/py38/lib/python3.8/site-packages/openfe/FeatureGenerator.py文件

第6行修改为：

num_operators = ["abs", "log", "sqrt", "square", "sigmoid", "round", "residual", "<p0.2", "<p0.4", "<p0.6", "<p0.8",]
在第63行后增加：

            elif self.name == '<p0.2':
                new_data = (d < d.quantile(0.2).max()).astype(int)
            elif self.name == '<p0.4':
                new_data = (d < d.quantile(0.4).max()).astype(int)
            elif self.name == '<p0.6':
                new_data = (d < d.quantile(0.6).max()).astype(int)
            elif self.name == '<p0.8':
                new_data = (d < d.quantile(0.8).max()).astype(int)
重启 notebook kernel 生效！

In [47]:
from openfe import OpenFE, transform, get_candidate_features, tree_to_formula

In [48]:
candidate_features = get_candidate_features(numerical_features=ori_num_feats, categorical_features=ori_cat_feats)

In [49]:
len(candidate_features)

3708

In [50]:
# Restrict Search Space of Candidate Features
candidate_features = [
    f
    for f in candidate_features
    if f.name
    in {
        # "abs" -> dataset specific, not useful in most cases
        # "log" -> can be done by scalers, no need for GBDTs
        # "sqrt", -> see above (s.a.)
        # "square" , -> s.a.
        # "sigmoid" , -> s.a.
        "freq",
        "round",
        "residual",
        # "max", -> IMO, trivial to model for first-order features 
        # "min", -> s.a.
        "+",
        "-",
        "/",
        "*",
        "GroupByThenMin",  # -> &nbsp;The essential benefit of GroupBy is captured with any of these, so I filtered this to reduce the search space.
        "GroupByThenMax",  # -> s.a.
        # "GroupByThenMean", -> s.a.
        "GroupByThenMedian",
        "GroupByThenStd",
        "GroupByThenRank",
        "GroupByThenFreq",
        "GroupByThenNUnique",
        "Combine",
        # New Generators 
        #   - Hacked into OpenFE by adding `new_data = int(d < d.quantile(X).max())` to the generator options. 
        "<p0.2",  # X = 0.2
        "<p0.4",   
        "<p0.6",
        "<p0.8",
    }
]

In [51]:
len(candidate_features)

2835

In [52]:
train_x, train_y = train_df[ori_all_feats], train_df[['Target']]
test_x = test_df[ori_all_feats]

In [None]:
ofe = OpenFE()

%%capture --no-stderr

features = ofe.fit(
    data=train_x,
    label=train_y,
    task='classification',
    candidate_features_list=candidate_features,
    metric='multi_logloss',
    n_data_blocks=2,
    min_candidate_features=30000,
    n_jobs=32,
    verbose=True
)

In [55]:
len(features)

1841

In [56]:
train_x, test_x = transform(train_x, test_x, features, n_jobs=32)

In [57]:
train_x.head()

Unnamed: 0_level_0,Marital_status,Application_mode,Application_order,Course,Daytime_evening_attendance,Previous_qualification,Previous_qualification_grade,Nacionality,Mother_s_qualification,Father_s_qualification,...,autoFE_f_1831,autoFE_f_1832,autoFE_f_1833,autoFE_f_1834,autoFE_f_1835,autoFE_f_1836,autoFE_f_1837,autoFE_f_1838,autoFE_f_1839,autoFE_f_1840
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,0,1,14,1,0,126.0,0,0,16,...,0.0,-4.06,0.0,0.476095,1.061701,190.0,0.0,5.676337,112006.0,58584.0
1,0,12,1,14,1,0,125.0,0,16,16,...,0.0,-4.06,0.0,0.476095,0.872394,190.0,0.0,5.676337,112006.0,27730.0
2,0,12,2,15,1,0,137.0,0,2,16,...,0.0,-4.06,0.0,0.497029,1.250124,190.0,0.0,5.08794,112006.0,27730.0
3,0,0,3,17,1,0,131.0,0,16,2,...,0.0,-4.06,0.0,0.498465,0.872394,190.0,0.0,4.72453,112006.0,58584.0
4,0,0,2,17,1,0,132.0,0,16,33,...,0.0,-4.06,0.0,0.497029,0.872394,190.0,0.0,5.08794,112006.0,58584.0


In [58]:
def get_feature_description(col):
    if not col.startswith('autoFE_f_'): return ''
    idx = int(col.split('_')[-1])
    return tree_to_formula(features[idx])

In [59]:
get_feature_description('autoFE_f_0')

'(Curricular_units_2nd_sem_approved*Curricular_units_2nd_sem_grade)'

In [60]:
get_feature_description('autoFE_f_1831')

'GroupByThenMin(Curricular_units_2nd_sem_enrolled,Nacionality)'

In [61]:
fe_train_df = pd.concat([train_x, train_y], axis=1)
fe_test_df = test_x

In [65]:
fe_feats = [c for c in fe_train_df.columns if c != 'Target']

In [66]:
cv, oof, pred = lgb_cv(params, fe_train_df, fe_test_df, fe_feats, [], 'Target')

fold=0 - - - - - - - - - - - - - - - - - - - - 
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.531524 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 150597
[LightGBM] [Info] Number of data points in the train set: 61214, number of used features: 1870
[LightGBM] [Info] Start training from score -0.746287
[LightGBM] [Info] Start training from score -1.635907
[LightGBM] [Info] Start training from score -1.105333
Training until validation scores don't improve for 400 rounds
[200]	valid_0's multi_error: 0.17185
[400]	valid_0's multi_error: 0.167603
[600]	valid_0's multi_error: 0.166492
[800]	valid_0's multi_error: 0.166427
[1000]	valid_0's multi_error: 0.165904
Early stopping, best iteration is:
[694]	valid_0's multi_error: 0.165578
fold=1 - - - - - - - - - - - - - - - - - - - - 
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.622575 seconds.
You can set `force_c

In [67]:
print(cv)

0.8333594709741499


#### 后向特征选择(通过autogluon实现)

AutoGluon的特征选择/修剪(feature_prune)功能在实验阶段，没有直接的API，这里的使用方法有些hack

In [None]:
# 这里设置4h，迭代100轮，裁剪掉80%的特征，尽量少一点特征方便后续迭代优化
hyperparameters = {'GBM':{}}
feature_prune_kwargs = {'feature_prune_time_limit': 3600 * 4, 'n_train_subsample': None, 'n_fi_subsample': 20000, 'prune_ratio': 0.8, 'stopping_round': 100, 'min_improvement': 1e-6, 'force_prune': True}

predictor = TabularPredictor(
    label='Target',
    problem_type='multiclass',
    eval_metric='accuracy',
).fit(
    fe_train_df,
    hyperparameters=hyperparameters,
    feature_prune_kwargs=feature_prune_kwargs,
    presets='medium_quality',
    time_limit=3600 * 5
)

In [73]:
with open('../automl/AutogluonModels/ag-20240619_064449/models/LightGBM_Prune/model.pkl', 'rb') as f:
    prune_model = pickle.load(f)

In [74]:
selected_feats = prune_model.features

In [75]:
len(selected_feats)

428

In [77]:
# 通常倾向于保留原始特征
mix_selected_feats = ori_all_feats + [c for c in selected_feats if c not in ori_all_feats]
len(mix_selected_feats)

463

## AutoGluon 模型融合

In [159]:
def read_pickle(path):
    with open(path, 'rb') as f:
        data = pickle.load(f)
    return data

In [161]:
fe_train_selected = read_pickle('fe_train_selected.pkl')
fe_test_selected = read_pickle('fe_test_selected.pkl')

In [162]:
null_imp_selected_cat_feats = read_pickle('null_imp_selected_cat_feats.pkl')

In [163]:
for col in null_imp_selected_cat_feats:
    fe_train_selected[col] = fe_train_selected[col].astype(int).astype('category')
    fe_test_selected[col] = fe_test_selected[col].astype(int).astype('category')

In [164]:
fe_train_selected.shape

(76518, 56)

In [None]:
fe_predictor = TabularPredictor(
    label='Target',
    problem_type='multiclass',
    eval_metric='accuracy',
    verbosity=3,
).fit(
    fe_train_selected, 
    presets='best_quality', 
    num_bag_sets=1,
    num_bag_folds=5,
    num_stack_levels=3,
    time_limit=3600 * 24
)

In [165]:
fe_predictor = TabularPredictor.load('AutogluonModels/ag-20240626_021250')

In [166]:
fe_predictor.leaderboard()

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L3,0.835490,accuracy,242.015740,15328.497464,0.008564,5.314710,3,True,220
1,XGBoost_r31_BAG_L3,0.835163,accuracy,781.187671,31338.957801,3.507597,34.828593,3,True,295
2,WeightedEnsemble_L5,0.835163,accuracy,781.195905,31344.348478,0.008233,5.390677,5,True,438
3,WeightedEnsemble_L4,0.835163,accuracy,781.196100,31344.334512,0.008429,5.376711,4,True,329
4,XGBoost_r95_BAG_L3,0.835111,accuracy,781.269534,31332.827060,3.589459,28.697853,3,True,324
...,...,...,...,...,...,...,...,...,...,...
433,ExtraTrees_r42_BAG_L1,0.823989,accuracy,4.926667,3.956279,4.926667,3.956279,1,True,22
434,NeuralNetTorch_r158_BAG_L1,0.823872,accuracy,1.140451,264.488195,1.140451,264.488195,1,True,51
435,RandomForest_r16_BAG_L1,0.823074,accuracy,4.902473,10.774498,4.902473,10.774498,1,True,94
436,KNeighborsUnif_BAG_L1,0.785136,accuracy,1.979865,0.103050,1.979865,0.103050,1,True,1
