Skip to content

Commit

Permalink
Changed large split to be percentage across the board and var name
Browse files Browse the repository at this point in the history
  • Loading branch information
christopherbunn committed Sep 25, 2020
1 parent 10c2225 commit 3c26ec3
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions evalml/automl/automl_search.py
Expand Up @@ -69,7 +69,7 @@ class AutoMLSearch:
_MAX_NAME_LEN = 40
_MAX_TRAINING_ROWS = int(1e5)
_LARGE_DATA_ROW_THRESHOLD = int(1e5)
_LARGE_DATA_PERCENT_TEST = 0.75
_LARGE_DATA_PERCENT_VALIDATION = 0.75

# Necessary for "Plotting" documentation, since Sphinx does not work well with instance attributes.
plot = PipelineSearchPlots
Expand Down Expand Up @@ -391,8 +391,7 @@ def search(self, X, y, data_checks="auto", feature_types=None, show_iteration_pl
default_data_split = StratifiedKFold(n_splits=3, random_state=self.random_state)

if X.shape[0] > self._LARGE_DATA_ROW_THRESHOLD:
test_size = min(self._LARGE_DATA_PERCENT_TEST, float(self._MAX_TRAINING_ROWS / X.shape[0]))
default_data_split = TrainingValidationSplit(test_size=test_size)
default_data_split = TrainingValidationSplit(test_size=self._LARGE_DATA_PERCENT_VALIDATION)

self.data_split = self.data_split or default_data_split

Expand Down

0 comments on commit 3c26ec3

Please sign in to comment.