diff --git a/docs/source/automl/pipeline_search.ipynb b/docs/source/automl/pipeline_search.ipynb index a350764faf..f3a4e90a62 100644 --- a/docs/source/automl/pipeline_search.ipynb +++ b/docs/source/automl/pipeline_search.ipynb @@ -54,7 +54,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 2, @@ -74,7 +74,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -108,7 +108,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -139,9 +139,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from evalml.objectives import FraudCost\n", "\n", @@ -171,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -188,7 +199,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -197,7 +208,7 @@ "[evalml.pipelines.classification.random_forest.RFClassificationPipeline]" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -215,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -226,7 +237,7 @@ " ]" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -237,16 +248,17 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[]" + "[,\n", + " ]" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -261,14 +273,45 @@ "source": [ "## Limiting Search Time\n", "\n", - "You can limit the search time by specifying a maximum number of pipelines or a maximum amount of time. EvalML won't build new pipelines after the maximum time has passed or the maximum number of pipelines have been built.\n", + "You can limit the search time by specifying a maximum number of pipelines and/or a maximum amount of time. EvalML won't build new pipelines after the maximum time has passed or the maximum number of pipelines have been built. If a limit is not set, then a maximum of 5 pipelines will be built. \n", + "\n", + "The maximum search time can be specified as a integer in seconds or as a string in seconds, minutes, or hours. " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "evalml.AutoClassifier(objective=\"f1\",\n", + " max_time=60)\n", "\n", + "evalml.AutoClassifier(objective=\"f1\",\n", + " max_time=\"1 minute\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "To start, EvalML samples 10 sets of hyperparameters chosen randomly for each possible pipeline. Therefore, we recommend setting `max_pipelines` at least 10 times the number of possible pipelines." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -277,16 +320,16 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -308,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -336,7 +379,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.7.5" }, "mimetype": "text/x-python", "name": "python", diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 2bf060b56f..5a5be32a7c 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -12,6 +12,7 @@ Changelog * Changes * Refactoring pipelines :pr:`108` * Documentation Changes + * Updated documentation to show max_time enhancements :pr:`189` * Testing Changes **v0.5.0 Oct. 29, 2019** diff --git a/docs/source/index.ipynb b/docs/source/index.ipynb index c5d2992646..b59f225e1d 100644 --- a/docs/source/index.ipynb +++ b/docs/source/index.ipynb @@ -406,7 +406,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.7.5" } }, "nbformat": 4, diff --git a/evalml/__init__.py b/evalml/__init__.py index 60be450bf3..21cacca42e 100644 --- a/evalml/__init__.py +++ b/evalml/__init__.py @@ -6,6 +6,10 @@ # must import sklearn first import sklearn +with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + import skopt + import evalml.demos import evalml.model_types import evalml.objectives @@ -20,11 +24,6 @@ from evalml.models import AutoClassifier, AutoRegressor from evalml.pipelines import list_model_types, load_pipeline, save_pipeline -with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - import skopt - - warnings.filterwarnings("ignore", category=DeprecationWarning) diff --git a/evalml/models/auto_classifier.py b/evalml/models/auto_classifier.py index cc754e2cd1..e81c767063 100644 --- a/evalml/models/auto_classifier.py +++ b/evalml/models/auto_classifier.py @@ -34,10 +34,13 @@ def __init__(self, multiclass (bool): If True, expecting multiclass data. By default: False. - max_pipelines (int): maximum number of pipelines to search + max_pipelines (int): Maximum number of pipelines to search. If max_pipelines and + max_time is not set, then max_pipelines will default to max_pipelines of 5. - max_time (int): maximum time in seconds to search for pipelines. - won't start new pipeline search after this duration has elapsed + max_time (int, str): Maximum time to search for pipelines. + This will not start a new pipeline search after the duration + has elapsed. If it is an integer, then the time will be in seconds. + For strings, time can be specified as seconds, minutes, or hours. model_types (list): The model types to search. By default searches over all model_types. Run evalml.list_model_types("classification") to see options. diff --git a/evalml/models/auto_regressor.py b/evalml/models/auto_regressor.py index 6a5f7fff00..8768341747 100644 --- a/evalml/models/auto_regressor.py +++ b/evalml/models/auto_regressor.py @@ -29,10 +29,13 @@ def __init__(self, Arguments: objective (Object): the objective to optimize - max_pipelines (int): maximum number of pipelines to search + max_pipelines (int): Maximum number of pipelines to search. If max_pipelines and + max_time is not set, then max_pipelines will default to max_pipelines of 5. - max_time (int): maximum time in seconds to search for pipelines. - won't start new pipeline search after this duration has elapsed + max_time (int, str): Maximum time to search for pipelines. + This will not start a new pipeline search after the duration + has elapsed. If it is an integer, then the time will be in seconds. + For strings, time can be specified as seconds, minutes, or hours. model_types (list): The model types to search. By default searches over all model_types. Run evalml.list_model_types("regression") to see options.