forked from scikit-learn-contrib/imbalanced-learn
/
test_weight_boosting.py
93 lines (71 loc) · 3.56 KB
/
test_weight_boosting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import pytest
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.utils.testing import assert_array_equal
from imblearn.ensemble import RUSBoostClassifier
@pytest.fixture
def imbalanced_dataset():
return make_classification(n_samples=10000, n_features=2, n_informative=2,
n_redundant=0, n_repeated=0, n_classes=3,
n_clusters_per_class=1,
weights=[0.01, 0.05, 0.94], class_sep=0.8,
random_state=0)
@pytest.mark.parametrize(
"boosting_params, err_msg",
[({"n_estimators": 'whatever'}, "n_estimators must be an integer"),
({"n_estimators": -100}, "n_estimators must be greater than zero")]
)
def test_balanced_random_forest_error(imbalanced_dataset, boosting_params,
err_msg):
rusboost = RUSBoostClassifier(**boosting_params)
with pytest.raises(ValueError, message=err_msg):
rusboost.fit(*imbalanced_dataset)
@pytest.mark.parametrize('algorithm', ['SAMME', 'SAMME.R'])
def test_rusboost(imbalanced_dataset, algorithm):
X, y = imbalanced_dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)
classes = np.unique(y)
n_estimators = 500
rusboost = RUSBoostClassifier(n_estimators=n_estimators,
algorithm=algorithm,
random_state=0)
rusboost.fit(X_train, y_train)
assert_array_equal(classes, rusboost.classes_)
# check that we have an ensemble of samplers and estimators with a
# consistent size
assert len(rusboost.estimators_) > 1
assert len(rusboost.estimators_) == len(rusboost.samplers_)
assert len(rusboost.pipelines_) == len(rusboost.samplers_)
# each sampler in the ensemble should have different random state
assert (len(set(sampler.random_state for sampler in rusboost.samplers_)) ==
len(rusboost.samplers_))
# each estimator in the ensemble should have different random state
assert (len(set(est.random_state for est in rusboost.estimators_)) ==
len(rusboost.estimators_))
# check the consistency of the feature importances
assert len(rusboost.feature_importances_) == imbalanced_dataset[0].shape[1]
# check the consistency of the prediction outpus
y_pred = rusboost.predict_proba(X_test)
assert y_pred.shape[1] == len(classes)
assert rusboost.decision_function(X_test).shape[1] == len(classes)
score = rusboost.score(X_test, y_test)
assert score > 0.7, "Failed with algorithm {} and score {}".format(
algorithm, score)
y_pred = rusboost.predict(X_test)
assert y_pred.shape == y_test.shape
@pytest.mark.parametrize('algorithm', ['SAMME', 'SAMME.R'])
def test_rusboost_sample_weight(imbalanced_dataset, algorithm):
X, y = imbalanced_dataset
sample_weight = np.ones_like(y)
rusboost = RUSBoostClassifier(algorithm=algorithm,
random_state=0)
# Predictions should be the same when sample_weight are all ones
y_pred_sample_weight = rusboost.fit(X, y, sample_weight).predict(X)
y_pred_no_sample_weight = rusboost.fit(X, y).predict(X)
assert_array_equal(y_pred_sample_weight, y_pred_no_sample_weight)
rng = np.random.RandomState(42)
sample_weight = rng.rand(y.shape[0])
y_pred_sample_weight = rusboost.fit(X, y, sample_weight).predict(X)
with pytest.raises(AssertionError):
assert_array_equal(y_pred_no_sample_weight, y_pred_sample_weight)