Skip to content

Commit

Permalink
Reorganize tests
Browse files Browse the repository at this point in the history
  • Loading branch information
petrovicboban committed May 24, 2023
1 parent 5f48df4 commit dc2031a
Show file tree
Hide file tree
Showing 10 changed files with 171 additions and 109 deletions.
4 changes: 3 additions & 1 deletion Python/random_forestry/forestry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1285,7 +1285,9 @@ def __eq__(self, other):
def _more_tags(self):
return {
"_xfail_checks": {
# "check_estimators_pickle": "To be fixed later",
"check_estimators_pickle": "To be fixed later",
"check_n_features_in": "To be fixed later",
"check_estimators_nan_inf": "To be fixed later",
"check_dtype_object": "To be fixed later",
},
}
17 changes: 17 additions & 0 deletions Python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from typing import Type

import pytest
from helpers import get_data

from random_forestry import RandomForest


@pytest.fixture
def forest(request: Type[pytest.FixtureRequest]):
X, y = get_data()

forest_parameters = request.node.get_closest_marker("forest_parameters")
if hasattr(forest_parameters, "kwargs"):
return RandomForest(**forest_parameters.kwargs).fit(X, y)
else:
return RandomForest().fit(X, y)
14 changes: 2 additions & 12 deletions Python/tests/test_aggregation_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,8 @@
import pytest
from helpers import get_data

from random_forestry import RandomForest


@pytest.fixture
def forest():
forest = RandomForest(seed=432432)

X, y = get_data()

forest.fit(X, y)
return forest


@pytest.mark.forest_parameters(seed=432432)
def test_predict_settings(forest):
X, _ = get_data()

Expand All @@ -25,6 +14,7 @@ def test_predict_settings(forest):
assert np.array_equal(predictions_1, predictions_2)


@pytest.mark.forest_parameters(seed=432432)
def test_linearity(forest):
X, _ = get_data()

Expand Down
21 changes: 16 additions & 5 deletions Python/tests/test_constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,26 @@


def test_properties():
assert RandomForest(oob_honest=True).fit(X, y, splitratio=0.3).splitratio_ == 1
with pytest.warns(
UserWarning, match="oob_honest is set to true, so we will run OOBhonesty rather than standard honesty."
):
assert RandomForest(oob_honest=True).fit(X, y, splitratio=0.3).splitratio_ == 1

assert RandomForest(oob_honest=True).fit(X, y, replace=False).replace_
with pytest.warns(UserWarning, match="replace must be set to TRUE to use OOBhonesty, setting this to True now"):
assert RandomForest(oob_honest=True).fit(X, y, replace=False).replace_

# assert RandomForest().fit(X, y, splitratio=0, double_tree=True).double_tree_ is False
# assert RandomForest().fit(X, y, splitratio=0.3, double_tree=True).double_tree_
assert RandomForest().fit(X, y, splitratio=1, double_tree=True).double_tree_ is False

assert RandomForest().fit(X, y, interaction_depth=23, max_depth=4).interaction_depth_ == 4
with pytest.warns(
UserWarning, match="Trees cannot be doubled if splitratio is 1. We have set double_tree to False."
):
assert RandomForest().fit(X, y, splitratio=1, double_tree=True).double_tree_ is False

with pytest.warns(
UserWarning,
match="interaction_depth cannot be greater than max_depth. We have set interaction_depth to max_depth.",
):
assert RandomForest().fit(X, y, interaction_depth=23, max_depth=4).interaction_depth_ == 4

# with pytest.raises(ValidationError):
# RandomForest(ntree=False)
Expand Down
34 changes: 9 additions & 25 deletions Python/tests/test_different_aggregations.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,27 @@
import pytest
from helpers import get_data

from random_forestry import RandomForest


def test_predict_error():
X, y = get_data()

forest = RandomForest()
forest.fit(X, y)

def test_predict_error(forest):
with pytest.raises(ValueError):
forest.predict(aggregation="average")


def test_predict_average():
X, y = get_data()

forest = RandomForest()
forest.fit(X, y)
def test_predict_average(forest):
X, _ = get_data()
prediction = forest.predict(X, aggregation="average")
assert len(prediction) == len(X)


def test_predict_oob():
X, y = get_data()

forest = RandomForest(oob_honest=True)
forest.fit(X, y)

@pytest.mark.forest_parameters(oob_honest=True)
def test_predict_oob(forest):
X, _ = get_data()
prediction = forest.predict(X, aggregation="oob")
assert len(prediction) == len(X)


def test_predict_double_oob():
X, y = get_data()

forest = RandomForest(oob_honest=True)
forest.fit(X, y)

@pytest.mark.forest_parameters(oob_honest=True)
def test_predict_double_oob(forest):
X, _ = get_data()
prediction = forest.predict(X, aggregation="doubleOOB")
assert len(prediction) == len(X)
5 changes: 4 additions & 1 deletion Python/tests/test_forestry.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ def test_newdata_shuffled_warning(forest: RandomForest):
def test_equal_predictions(forest: RandomForest):
X, _ = get_data()
predictions_1 = forest.predict(X)
predictions_2 = forest.predict(X.iloc[:, ::-1])
with pytest.warns(
UserWarning, match="newdata columns have been reordered so that they match the training feature matrix"
):
predictions_2 = forest.predict(X.iloc[:, ::-1])

assert np.array_equal(predictions_1, predictions_2)

Expand Down
56 changes: 56 additions & 0 deletions Python/tests/test_forests_equality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# import platform

import time

import pytest
from helpers import get_data

from random_forestry import RandomForest

X, y = get_data()


class TestAfterInit:
@pytest.mark.skip
def test_when_default_seed(self):
forest_1 = RandomForest()
time.sleep(1)
forest_2 = RandomForest()
assert forest_1 != forest_2

def test_when_equal_seed(self):
forest_1 = RandomForest(seed=123)
forest_2 = RandomForest(seed=123)
assert forest_1 == forest_2

def test_when_different_params(self):
forest_1 = RandomForest(seed=56, ntree=34)
forest_2 = RandomForest(seed=56, nthread=6)
assert forest_1 != forest_2


class TestAfterFit:
def test_it_is_different(self):
forest_1 = RandomForest(seed=123)
forest_2 = RandomForest(seed=123).fit(X, y)
assert forest_1 != forest_2

def test_no_randomness_added(self):
forest_1 = RandomForest(seed=123).fit(X, y)
forest_2 = RandomForest(seed=123).fit(X, y)
assert forest_1 == forest_2

def test_idempotency(self):
forest_1 = RandomForest(seed=123).fit(X, y)
forest_2 = RandomForest(seed=123).fit(X, y).fit(X, y)
assert forest_1 == forest_2

@pytest.mark.skip
def test_different_params(self):
forest_1 = RandomForest(seed=123).fit(X, y, double_bootstrap=True)
forest_2 = RandomForest(seed=123).fit(X, y, double_bootstrap=False)
assert forest_1 != forest_2

forest_1 = RandomForest(seed=123).fit(X, y, max_obs=4)
forest_2 = RandomForest(seed=123).fit(X, y, max_obs=5)
assert forest_1 != forest_2
56 changes: 22 additions & 34 deletions Python/tests/test_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,25 @@
from random_forestry import RandomForest


def test_groups():
X, y = get_data()

forest = RandomForest()
forest.fit(X, y)
pred_avg = forest.predict(X, aggregation="average")
pred_oob = forest.predict(X, aggregation="oob")

forest = RandomForest()
groups = Series([i for i in range(len(X) // 10) for _ in range(10)])
forest.fit(X, y, groups=groups)
pred_avg_groups = forest.predict(X, aggregation="average")
pred_oob_groups = forest.predict(X, aggregation="oob")

assert np.array_equal(pred_avg, pred_avg_groups)
assert not np.array_equal(pred_oob, pred_oob_groups)


def test_groups_honest():
X, y = get_data()

forest = RandomForest(oob_honest=True)
forest.fit(X, y)
pred_avg = forest.predict(X, aggregation="average")
pred_oob = forest.predict(X, aggregation="oob")

forest = RandomForest(oob_honest=True)
groups = Series([i for i in range(len(X) // 10) for _ in range(10)])
forest.fit(X, y, groups=groups)
pred_avg_groups = forest.predict(X, aggregation="average")
pred_oob_groups = forest.predict(X, aggregation="oob")

assert not np.array_equal(pred_avg, pred_avg_groups)
assert not np.array_equal(pred_oob, pred_oob_groups)
class TestGroups:
def _predict(self, **kwargs):
X, y = get_data()

forest = RandomForest(**kwargs).fit(X, y)
self.pred_avg = forest.predict(X, aggregation="average")
self.pred_oob = forest.predict(X, aggregation="oob")

groups = Series([i for i in range(len(X) // 10) for _ in range(10)])
forest = RandomForest(**kwargs).fit(X, y, groups=groups)
self.pred_avg_groups = forest.predict(X, aggregation="average")
self.pred_oob_groups = forest.predict(X, aggregation="oob")

def test_groups_honest_default(self):
self._predict()
assert np.array_equal(self.pred_avg, self.pred_avg_groups)
assert not np.array_equal(self.pred_oob, self.pred_oob_groups)

def test_groups_honest_true(self):
self._predict(oob_honest=True)
assert not np.array_equal(self.pred_avg, self.pred_avg_groups)
assert not np.array_equal(self.pred_oob, self.pred_oob_groups)
30 changes: 30 additions & 0 deletions Python/tests/test_validators_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from typing import Any

from pytest import mark

from random_forestry.validators import (
negative_float,
negative_integer,
positive_float,
positive_integer,
)


@mark.parametrize("test_input,expected", [(-1, False), (3, True), (1.4, False), (0, False)])
def test_positive_integer(test_input: Any, expected: bool):
assert positive_integer(test_input) == expected


@mark.parametrize("test_input,expected", [(-1, False), (3, False), (1.4, True), (-2.7, False), (0, False)])
def test_positive_float(test_input: Any, expected: bool):
assert positive_float(test_input) == expected


@mark.parametrize("test_input,expected", [(-1, True), (3, False), (-1.4, False), (0, False)])
def test_negative_integer(test_input: Any, expected: bool):
assert negative_integer(test_input) == expected


@mark.parametrize("test_input,expected", [(-1, False), (3, False), (1.4, False), (0, False), (-3.4, True)])
def test_negative_float(test_input: Any, expected: bool):
assert negative_float(test_input) == expected
43 changes: 12 additions & 31 deletions Python/tests/test_weight_matrix.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,24 @@
import pytest
from helpers import get_data
from numpy.testing import assert_array_equal

from random_forestry import RandomForest

def _test_predictions(forest, aggregation):
X, _ = get_data()

def test_average():
X, y = get_data()

forest = RandomForest()
forest.fit(X, y)

pred = forest.predict(X, aggregation="average")
pred_weight_matrix = forest.predict(X, aggregation="average", return_weight_matrix=True)

assert_array_equal(pred, pred_weight_matrix["predictions"])
assert pred_weight_matrix["weightMatrix"].shape == (len(X.index), len(X.index))


def test_oob():
X, y = get_data()

forest = RandomForest(oob_honest=True)
forest.fit(X, y)

pred = forest.predict(X, aggregation="oob")
pred_weight_matrix = forest.predict(X, aggregation="oob", return_weight_matrix=True)
pred = forest.predict(X, aggregation=aggregation)
pred_weight_matrix = forest.predict(X, aggregation=aggregation, return_weight_matrix=True)

assert_array_equal(pred, pred_weight_matrix["predictions"])
assert pred_weight_matrix["weightMatrix"].shape == (len(X.index), len(X.index))


def test_double_oob():
X, y = get_data()
@pytest.mark.forest_parameters(oob_honest=True)
@pytest.mark.parametrize("aggregation", ["average", "oob", "doubleOOB"])
def test_predictions_oob_honest_true(forest, aggregation):
_test_predictions(forest, aggregation)

forest = RandomForest(oob_honest=True)
forest.fit(X, y)

pred = forest.predict(X, aggregation="doubleOOB")
pred_weight_matrix = forest.predict(X, aggregation="doubleOOB", return_weight_matrix=True)

assert_array_equal(pred, pred_weight_matrix["predictions"])
assert pred_weight_matrix["weightMatrix"].shape == (len(X.index), len(X.index))
@pytest.mark.parametrize("aggregation", ["average", "oob", pytest.param("doubleOOB", marks=pytest.mark.xfail)])
def test_predictions_oob_honest_default(forest, aggregation):
_test_predictions(forest, aggregation)

0 comments on commit dc2031a

Please sign in to comment.