In [1]:
import time

import numpy as np
import pandas as pd
from sklearn.discriminant_analysis import (
    LinearDiscriminantAnalysis,
    QuadraticDiscriminantAnalysis,
)
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.feature_selection import (
    RFE,
    SelectFromModel,
    SelectKBest,
    SequentialFeatureSelector,
    f_classif,
    mutual_info_classif,
)
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from utils import (
    experiment,
    get_data,
    get_param_combinations,
    get_params_json,
    save_results,
)

from xgboost import XGBClassifier

In [2]:
X, y = get_data()

In [3]:
def run_experiment(feature_selectors, classifiers, ks, results, train_test_seeds=[42]):
    for fs in feature_selectors:
        for clf in classifiers:
            for k in ks:
                # Generate parameter combinations
                fs_cls, fs_params, k_param_name, requires_estimator = fs
                clf_cls, clf_params = clf

                fs_param_combinations = get_param_combinations(fs_params)
                clf_param_combinations = get_param_combinations(clf_params)

                for fs_params in fs_param_combinations:
                    for clf_params in clf_param_combinations:
                        result = experiment(
                            X,
                            y,
                            fs_cls,
                            fs_params,
                            clf_cls,
                            clf_params,
                            k,
                            k_param_name,
                            requires_estimator,
                            train_test_seeds,
                        )

                        print(result)
                        print(f"Elapsed time: {result[-1]:.2f}s\n")
                        results.append(result)
                        save_results(results, filename)

In [4]:
results = []

filename = "tomkowe_tree"

feature_selectors = [
    (SelectFromModel, {"threshold": [-np.inf]}, "max_features", True)
]

classifiers = [
    (
        XGBClassifier,
        {
            "booster": ["gbtree"],
            "device": ["gpu"],
            "learning_rate": [0.001, 0.01, 0.1, 0.3, 0.5],
            "n_estimators": [10, 75, 150],
            "min_child_weight": [0.5, 1, 3, 5],
            "max_depth": [2, 6, 10],
            "tree_method": ["auto", "hist", "approx"]
        },
    )
]

ks = np.arange(1, 10, 1)
train_test_seeds = list(range(42, 45))

run_experiment(feature_selectors, classifiers, ks, results, train_test_seeds)

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




42 0.546
43 0.547
44 0.558
('SelectFromModel', "{'threshold': -Infinity}", 'XGBClassifier', "{'booster': 'gbtree', 'device': 'gpu', 'learning_rate': 0.001, 'n_estimators': 10, 'min_child_weight': 0.5, 'max_depth': 2, 'tree_method': 'auto'}", 1, 0.5503333333333333, 0.005436502143433369, 0.6116666666666667, 0.4591035842895508)
Elapsed time: 0.46s

42 0.546
43 0.547
44 0.558
('SelectFromModel', "{'threshold': -Infinity}", 'XGBClassifier', "{'booster': 'gbtree', 'device': 'gpu', 'learning_rate': 0.001, 'n_estimators': 10, 'min_child_weight': 0.5, 'max_depth': 2, 'tree_method': 'hist'}", 1, 0.5503333333333333, 0.005436502143433369, 0.6116666666666667, 0.2832772731781006)
Elapsed time: 0.28s

42 0.546
43 0.547
44 0.558
('SelectFromModel', "{'threshold': -Infinity}", 'XGBClassifier', "{'booster': 'gbtree', 'device': 'gpu', 'learning_rate': 0.001, 'n_estimators': 10, 'min_child_weight': 0.5, 'max_depth': 2, 'tree_method': 'approx'}", 1, 0.5503333333333333, 0.005436502143433369, 0.6116666666666

KeyboardInterrupt: 

In [4]:
results = []

filename = "tomkowe_linear"

feature_selectors = [
    (SelectFromModel, {"threshold": [-np.inf]}, "max_features", True)
]

classifiers = [
    (
        XGBClassifier,
        {
            "booster": ["gblinear"],
            "device": ["gpu"],
            "learning_rate": [0.001, 0.01, 0.1, 0.2, 0.3, 0.5],
            "n_estimators": [10, 100, 200, 300],
            "updater": ["coord_descent", "shotgun"]
        }
    )
]

ks = np.arange(1, 10, 1)
train_test_seeds = list(range(42, 47))

run_experiment(feature_selectors, classifiers, ks, results, train_test_seeds)

42 0.501
43 0.501
44 0.502
45 0.501
46 0.501
('SelectFromModel', "{'threshold': -Infinity}", 'XGBClassifier', "{'booster': 'gblinear', 'device': 'gpu', 'learning_rate': 0.001, 'n_estimators': 10, 'updater': 'coord_descent'}", 1, 0.5012, 0.00040000000000000034, 0.505, 1.261199426651001)
Elapsed time: 1.26s

42 0.501
43 0.501
44 0.502
45 0.501
46 0.501
('SelectFromModel', "{'threshold': -Infinity}", 'XGBClassifier', "{'booster': 'gblinear', 'device': 'gpu', 'learning_rate': 0.001, 'n_estimators': 10, 'updater': 'shotgun'}", 1, 0.5012, 0.00040000000000000034, 0.505, 0.17819948196411134)
Elapsed time: 0.18s

42 0.505
43 0.509
44 0.496
45 0.495
46 0.503
('SelectFromModel', "{'threshold': -Infinity}", 'XGBClassifier', "{'booster': 'gblinear', 'device': 'gpu', 'learning_rate': 0.001, 'n_estimators': 100, 'updater': 'coord_descent'}", 1, 0.5016, 0.005351635264103866, 0.506, 11.837090826034546)
Elapsed time: 11.84s

42 0.505
43 0.509
44 0.496
45 0.495
46 0.503
('SelectFromModel', "{'threshold':

KeyboardInterrupt: 