From 1933122dbf35c800d8e47dc1a0c30705c5c359b3 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Wed, 16 Oct 2019 12:03:33 -0400 Subject: [PATCH 01/25] Create static plot of iterations during fit() --- evalml/models/auto_base.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py index 80ba6e9e12..a45fbf2e2f 100644 --- a/evalml/models/auto_base.py +++ b/evalml/models/auto_base.py @@ -13,6 +13,8 @@ from evalml.problem_types import ProblemTypes from evalml.tuners import SKOptTuner from evalml.utils import Logger, convert_to_seconds +from IPython.display import display +import matplotlib.pyplot as plt class AutoBase: @@ -71,7 +73,7 @@ def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time, self.additional_objectives = additional_objectives self._MAX_NAME_LEN = 40 - def fit(self, X, y, feature_types=None, raise_errors=False): + def fit(self, X, y, feature_types=None, raise_errors=False, plot_iterations=False): """Find best classifier Arguments: @@ -127,8 +129,15 @@ def fit(self, X, y, feature_types=None, raise_errors=False): self.logger.log("\n\nMax time elapsed. Stopping search early.") break self._do_iteration(X, y, pbar, raise_errors) - pbar.close() + if plot_iterations: + plot_data = self.rankings[['id', 'score']] + plot_data = plot_data.sort_values('id') + title = 'Pipeline Search: Iteration vs. {}'.format(self.objective.name) + ax = plot_data.plot(x='id', y='score', xticks=plot_data['id'], legend=False, style='-o', title=title) + ax.set_xlabel('iteration') + ax.set_ylabel(self.objective.name) + plt.show() self.logger.log("\n✔ Optimization finished") From f4ce55134355d4ee594ebf995f2d8eff7f456ea8 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Wed, 16 Oct 2019 13:06:19 -0400 Subject: [PATCH 02/25] Changed to interactive iter vs score plot --- evalml/models/auto_base.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py index a45fbf2e2f..aebb038706 100644 --- a/evalml/models/auto_base.py +++ b/evalml/models/auto_base.py @@ -3,6 +3,8 @@ from collections import OrderedDict from sys import stdout +import matplotlib +import matplotlib.pyplot as plt import numpy as np import pandas as pd from tqdm import tqdm @@ -13,8 +15,6 @@ from evalml.problem_types import ProblemTypes from evalml.tuners import SKOptTuner from evalml.utils import Logger, convert_to_seconds -from IPython.display import display -import matplotlib.pyplot as plt class AutoBase: @@ -86,10 +86,23 @@ def fit(self, X, y, feature_types=None, raise_errors=False, plot_iterations=Fals raise_errors (boolean): If true, raise errors and exit search if a pipeline errors during fitting + plot_iterations (boolean, False): Show interactive plot of iteration vs. score + during fitting. Can only be ran in Jupyter Notebook. + Returns: self """ + def update_plot(fig, ax): + ax.clear() + plot_data = self.rankings[['id', 'score']] + plot_data = plot_data.sort_values('id') + title = 'Pipeline Search: Iteration vs. {}'.format(self.objective.name) + plot_data.plot(x='id', y='score', xticks=plot_data['id'], legend=False, style='-o', ax=ax, title=title) + ax.set_xlabel('iteration') + ax.set_ylabel(self.objective.name) + fig.canvas.draw() + # make everything pandas objects if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) @@ -120,7 +133,12 @@ def fit(self, X, y, feature_types=None, raise_errors=False, plot_iterations=Fals if len(leaked) > 0: leaked = [str(k) for k in leaked.keys()] self.logger.log("WARNING: Possible label leakage: %s" % ", ".join(leaked)) - + if plot_iterations: + matplotlib.use('nbagg') + fig = plt.figure() + ax = fig.add_subplot(111) + fig.canvas.draw() + plt.show() pbar = tqdm(range(self.max_pipelines), disable=not self.verbose, file=stdout, bar_format='{desc} {percentage:3.0f}%|{bar}| Elapsed:{elapsed}') start = time.time() for n in pbar: @@ -129,15 +147,9 @@ def fit(self, X, y, feature_types=None, raise_errors=False, plot_iterations=Fals self.logger.log("\n\nMax time elapsed. Stopping search early.") break self._do_iteration(X, y, pbar, raise_errors) + if plot_iterations: + update_plot(fig, ax) pbar.close() - if plot_iterations: - plot_data = self.rankings[['id', 'score']] - plot_data = plot_data.sort_values('id') - title = 'Pipeline Search: Iteration vs. {}'.format(self.objective.name) - ax = plot_data.plot(x='id', y='score', xticks=plot_data['id'], legend=False, style='-o', title=title) - ax.set_xlabel('iteration') - ax.set_ylabel(self.objective.name) - plt.show() self.logger.log("\n✔ Optimization finished") From e0afe11d08f37a789736db21a54e208afd4de4a8 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Fri, 18 Oct 2019 15:41:38 -0400 Subject: [PATCH 03/25] Changed to show best score in iteration --- evalml/models/auto_base.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py index aebb038706..8f3ef9685d 100644 --- a/evalml/models/auto_base.py +++ b/evalml/models/auto_base.py @@ -93,12 +93,12 @@ def fit(self, X, y, feature_types=None, raise_errors=False, plot_iterations=Fals self """ - def update_plot(fig, ax): + def update_plot(fig, ax, iter_scores): ax.clear() - plot_data = self.rankings[['id', 'score']] - plot_data = plot_data.sort_values('id') title = 'Pipeline Search: Iteration vs. {}'.format(self.objective.name) - plot_data.plot(x='id', y='score', xticks=plot_data['id'], legend=False, style='-o', ax=ax, title=title) + iter_numbers = list(range(len(iter_scores))) + plt.plot(iter_numbers, iter_scores, '-o') + plt.title(title) ax.set_xlabel('iteration') ax.set_ylabel(self.objective.name) fig.canvas.draw() @@ -139,6 +139,7 @@ def update_plot(fig, ax): ax = fig.add_subplot(111) fig.canvas.draw() plt.show() + iter_scores = list() pbar = tqdm(range(self.max_pipelines), disable=not self.verbose, file=stdout, bar_format='{desc} {percentage:3.0f}%|{bar}| Elapsed:{elapsed}') start = time.time() for n in pbar: @@ -148,7 +149,9 @@ def update_plot(fig, ax): break self._do_iteration(X, y, pbar, raise_errors) if plot_iterations: - update_plot(fig, ax) + new_score = self.rankings['score'].max() + iter_scores.append(new_score) + update_plot(fig, ax, iter_scores) pbar.close() self.logger.log("\n✔ Optimization finished") From eeec53d4d59fbfffb7893eb47ecdcbb617f29acf Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Mon, 21 Oct 2019 11:34:06 -0400 Subject: [PATCH 04/25] Support for metrics where lower is better --- evalml/models/auto_base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py index 8f3ef9685d..b51bfb0040 100644 --- a/evalml/models/auto_base.py +++ b/evalml/models/auto_base.py @@ -149,7 +149,10 @@ def update_plot(fig, ax, iter_scores): break self._do_iteration(X, y, pbar, raise_errors) if plot_iterations: - new_score = self.rankings['score'].max() + if self.objective.greater_is_better: + new_score = self.rankings['score'].max() + else: + new_score = self.rankings['score'].min() iter_scores.append(new_score) update_plot(fig, ax, iter_scores) pbar.close() From 19e640d1184d97dc07646a52ee11aa2c62bfd8e9 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Mon, 21 Oct 2019 16:53:32 -0400 Subject: [PATCH 05/25] Separated plotting and calculating into two separate functions --- evalml/models/auto_base.py | 53 +++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py index 2ebd5c37ee..29a5d135de 100644 --- a/evalml/models/auto_base.py +++ b/evalml/models/auto_base.py @@ -36,6 +36,8 @@ def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time, self.logger = Logger(self.verbose) self.possible_pipelines = get_pipelines(problem_type=self.problem_type, model_types=model_types) self.objective = get_objective(objective) + self.best_score_by_iter_fig = None + self.best_score_by_iter_ax = None if self.problem_type not in self.objective.problem_types: raise ValueError("Given objective {} is not compatible with a {} problem.".format(self.objective.name, self.problem_type.value)) @@ -93,15 +95,6 @@ def fit(self, X, y, feature_types=None, raise_errors=False, plot_iterations=Fals self """ - def update_plot(fig, ax, iter_scores): - ax.clear() - title = 'Pipeline Search: Iteration vs. {}'.format(self.objective.name) - iter_numbers = list(range(len(iter_scores))) - plt.plot(iter_numbers, iter_scores, '-o') - plt.title(title) - ax.set_xlabel('iteration') - ax.set_ylabel(self.objective.name) - fig.canvas.draw() # make everything pandas objects if not isinstance(X, pd.DataFrame): @@ -133,19 +126,12 @@ def update_plot(fig, ax, iter_scores): if len(leaked) > 0: leaked = [str(k) for k in leaked.keys()] self.logger.log("WARNING: Possible label leakage: %s" % ", ".join(leaked)) - if plot_iterations: - matplotlib.use('nbagg') - fig = plt.figure() - ax = fig.add_subplot(111) - fig.canvas.draw() - plt.show() - iter_scores = list() if self.null_threshold is not None: highly_null_columns = guardrails.detect_highly_null(X, percent_threshold=self.null_threshold) if len(highly_null_columns) > 0: self.logger.log("WARNING: {} columns are at least {}% null.".format(', '.join(highly_null_columns), self.null_threshold * 100)) - + self.best_score_by_iteration = list() pbar = tqdm(range(self.max_pipelines), disable=not self.verbose, file=stdout, bar_format='{desc} {percentage:3.0f}%|{bar}| Elapsed:{elapsed}') start = time.time() for n in pbar: @@ -154,13 +140,13 @@ def update_plot(fig, ax, iter_scores): self.logger.log("\n\nMax time elapsed. Stopping search early.") break self._do_iteration(X, y, pbar, raise_errors) + if self.objective.greater_is_better: + new_score = self.rankings['score'].max() + else: + new_score = self.rankings['score'].min() + self.best_score_by_iteration.append(new_score) if plot_iterations: - if self.objective.greater_is_better: - new_score = self.rankings['score'].max() - else: - new_score = self.rankings['score'].min() - iter_scores.append(new_score) - update_plot(fig, ax, iter_scores) + self.plot_best_score_by_iteration(interactive_plot=True) pbar.close() self.logger.log("\n✔ Optimization finished") @@ -356,6 +342,27 @@ def describe_pipeline(self, pipeline_id, return_dict=False): if return_dict: return pipeline_results + def plot_best_score_by_iteration(self, interactive_plot=False): + if interactive_plot: + matplotlib.use('nbagg') + no_plot_made = self.best_score_by_iter_fig is None and self.best_score_by_iter_ax is None + if no_plot_made or interactive_plot is False: + fig = plt.figure() + ax = fig.add_subplot(111) + self.best_score_by_iter_fig = fig + self.best_score_by_iter_ax = ax + plt.show() + self.best_score_by_iter_fig.canvas.draw() + if interactive_plot: + self.best_score_by_iter_ax.clear() + title = 'Pipeline Search: Iteration vs. {}'.format(self.objective.name) + iter_numbers = list(range(len(self.best_score_by_iteration))) + plt.plot(iter_numbers, self.best_score_by_iteration, '-o') + plt.title(title) + self.best_score_by_iter_ax.set_xlabel('iteration') + self.best_score_by_iter_ax.set_ylabel(self.objective.name) + self.best_score_by_iter_fig.canvas.draw() + @property def rankings(self): """Returns the rankings of the models searched""" From c231be740aaf943092739d7c4d379cd674499109 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Tue, 22 Oct 2019 11:29:50 -0400 Subject: [PATCH 06/25] Fixed blank figures bug --- evalml/models/auto_base.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py index 29a5d135de..6317e6601b 100644 --- a/evalml/models/auto_base.py +++ b/evalml/models/auto_base.py @@ -351,8 +351,9 @@ def plot_best_score_by_iteration(self, interactive_plot=False): ax = fig.add_subplot(111) self.best_score_by_iter_fig = fig self.best_score_by_iter_ax = ax - plt.show() - self.best_score_by_iter_fig.canvas.draw() + if interactive_plot: + plt.show() + self.best_score_by_iter_fig.canvas.draw() if interactive_plot: self.best_score_by_iter_ax.clear() title = 'Pipeline Search: Iteration vs. {}'.format(self.objective.name) @@ -361,7 +362,10 @@ def plot_best_score_by_iteration(self, interactive_plot=False): plt.title(title) self.best_score_by_iter_ax.set_xlabel('iteration') self.best_score_by_iter_ax.set_ylabel(self.objective.name) - self.best_score_by_iter_fig.canvas.draw() + if interactive_plot is False: + return self.best_score_by_iter_fig + else: + self.best_score_by_iter_fig.canvas.draw() @property def rankings(self): From c06e56ea42be1df3b732c2418851936f6bfc8b81 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Tue, 22 Oct 2019 11:39:48 -0400 Subject: [PATCH 07/25] Created test for plotting iterations --- evalml/tests/automl_tests/test_autoclassifier.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/evalml/tests/automl_tests/test_autoclassifier.py b/evalml/tests/automl_tests/test_autoclassifier.py index 12f4123bc2..37006afcb6 100644 --- a/evalml/tests/automl_tests/test_autoclassifier.py +++ b/evalml/tests/automl_tests/test_autoclassifier.py @@ -1,3 +1,4 @@ +import matplotlib.pyplot as plt import numpy as np import pandas as pd import pytest @@ -272,4 +273,15 @@ def test_max_time_units(): with pytest.raises(TypeError, match="max_time must be a float, int, or string. Received a ."): AutoClassifier(objective='F1', max_time=(30, 'minutes')) + + +def test_plot_iterations(X_y): + X, y = X_y + + clf = AutoClassifier(multiclass=False, max_pipelines=1) + + clf.fit(X, y) + + figure = clf.plot_best_score_by_iteration() + assert isinstance(figure, type(plt.figure())) # def test_serialization(trained_model) From 2b5d308a2894e53402ec934c3ad2b0888b178a50 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Tue, 22 Oct 2019 15:45:27 -0400 Subject: [PATCH 08/25] Increased pipeline number in test --- evalml/tests/automl_tests/test_autoclassifier.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/evalml/tests/automl_tests/test_autoclassifier.py b/evalml/tests/automl_tests/test_autoclassifier.py index 37006afcb6..896c4bd3c0 100644 --- a/evalml/tests/automl_tests/test_autoclassifier.py +++ b/evalml/tests/automl_tests/test_autoclassifier.py @@ -278,10 +278,11 @@ def test_max_time_units(): def test_plot_iterations(X_y): X, y = X_y - clf = AutoClassifier(multiclass=False, max_pipelines=1) + clf = AutoClassifier(multiclass=False, max_pipelines=3) clf.fit(X, y) figure = clf.plot_best_score_by_iteration() assert isinstance(figure, type(plt.figure())) + clf.plot_best_score_by_iteration(interactive_plot=True) # def test_serialization(trained_model) From a85048ec98e0cb3e1b4064092296597e98cebfa9 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Mon, 28 Oct 2019 10:09:20 -0400 Subject: [PATCH 09/25] Updated changelog --- docs/source/changelog.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index e425a9b64e..db2d6ea6a0 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -11,6 +11,7 @@ Changelog * Added support for other units in max_time :pr:`125` * Detect highly null columns :pr:`121` * Added additional regression objectives :pr:`100` + * Show an interactive iteration vs. score plot when using fit() :pr:`134` * Fixes * Reordered `describe_pipeline` :pr:`94` * Added type check for model_type :pr:`109` @@ -59,9 +60,9 @@ Changelog **v0.2.0 Aug. 13, 2019** * Enhancements * Created fraud detection objective :pr:`4` - + **v0.1.0 July. 31, 2019** - * *First Release* + * *First Release* * Enhancements * Added lead scoring objecitve :pr:`1` * Added basic classifier :pr:`1` From c64e3a86d90a8e3b15c6dd28c878fc6d5ec3251f Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Tue, 12 Nov 2019 11:30:34 -0500 Subject: [PATCH 10/25] Moved plotting functionality to plotly --- evalml/models/auto_base.py | 91 ++++++++++--------- .../tests/automl_tests/test_autoclassifier.py | 27 +----- 2 files changed, 52 insertions(+), 66 deletions(-) diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py index 3d8ae63d1b..bb3b64713f 100644 --- a/evalml/models/auto_base.py +++ b/evalml/models/auto_base.py @@ -3,11 +3,11 @@ from collections import OrderedDict from sys import stdout -import matplotlib -import matplotlib.pyplot as plt import numpy as np import pandas as pd from tqdm import tqdm +import plotly.graph_objects as go +from IPython.display import display from evalml import guardrails from evalml.objectives import get_objective, get_objectives @@ -97,7 +97,8 @@ def fit(self, X, y, feature_types=None, raise_errors=False, plot_iterations=Fals self """ - + self.best_score_by_iter_fig = None + self.best_score_by_iter_ax = None # make everything pandas objects if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) @@ -132,6 +133,7 @@ def fit(self, X, y, feature_types=None, raise_errors=False, plot_iterations=Fals if len(leaked) > 0: leaked = [str(k) for k in leaked.keys()] self.logger.log("WARNING: Possible label leakage: %s" % ", ".join(leaked)) + if self.id_cols_threshold is not None: id_cols = guardrails.detect_id_columns(X, self.id_cols_threshold) if len(id_cols) > 0: @@ -141,24 +143,34 @@ def fit(self, X, y, feature_types=None, raise_errors=False, plot_iterations=Fals highly_null_columns = guardrails.detect_highly_null(X, percent_threshold=self.null_threshold) if len(highly_null_columns) > 0: self.logger.log("WARNING: {} columns are at least {}% null.".format(', '.join(highly_null_columns), self.null_threshold * 100)) - self.best_score_by_iteration = list() - pbar = tqdm(range(self.max_pipelines), disable=not self.verbose, file=stdout, bar_format='{desc} {percentage:3.0f}%|{bar}| Elapsed:{elapsed}') - start = time.time() - for n in pbar: - elapsed = time.time() - start - if self.max_time and elapsed > self.max_time: - self.logger.log("\n\nMax time elapsed. Stopping search early.") - break - self._do_iteration(X, y, pbar, raise_errors) - if self.objective.greater_is_better: - new_score = self.rankings['score'].max() - else: - new_score = self.rankings['score'].min() - self.best_score_by_iteration.append(new_score) - if plot_iterations: - self.plot_best_score_by_iteration(interactive_plot=True) - pbar.close() + # Setup plot iterations if needed + if plot_iterations: + self.plot_best_score_by_iteration(interactive_plot=True) + + if self.max_pipelines is None: + start = time.time() + pbar = tqdm(total=self.max_time, disable=not self.verbose, file=stdout, bar_format='{desc} | Elapsed:{elapsed}') + pbar._instances.clear() + while time.time() - start <= self.max_time: + self._do_iteration(X, y, pbar, raise_errors) + if plot_iterations: + self.plot_best_score_by_iteration(interactive_plot=True) + pbar.close() + else: + pbar = tqdm(range(self.max_pipelines), disable=not self.verbose, file=stdout, bar_format='{desc} {percentage:3.0f}%|{bar}| Elapsed:{elapsed}') + pbar._instances.clear() + start = time.time() + for n in pbar: + elapsed = time.time() - start + if self.max_time and elapsed > self.max_time: + pbar.close() + self.logger.log("\n\nMax time elapsed. Stopping search early.") + break + self._do_iteration(X, y, pbar, raise_errors) + if plot_iterations: + self.plot_best_score_by_iteration(interactive_plot=True) + pbar.close() self.logger.log("\n✔ Optimization finished") def check_multiclass(self, y): @@ -352,29 +364,26 @@ def describe_pipeline(self, pipeline_id, return_dict=False): return pipeline_results def plot_best_score_by_iteration(self, interactive_plot=False): - if interactive_plot: - matplotlib.use('nbagg') - no_plot_made = self.best_score_by_iter_fig is None and self.best_score_by_iter_ax is None + if self.best_score_by_iter_fig is None: + self.best_score_by_iteration = list() + + no_plot_made = self.best_score_by_iter_fig is None if no_plot_made or interactive_plot is False: - fig = plt.figure() - ax = fig.add_subplot(111) - self.best_score_by_iter_fig = fig - self.best_score_by_iter_ax = ax - if interactive_plot: - plt.show() - self.best_score_by_iter_fig.canvas.draw() - if interactive_plot: - self.best_score_by_iter_ax.clear() - title = 'Pipeline Search: Iteration vs. {}'.format(self.objective.name) - iter_numbers = list(range(len(self.best_score_by_iteration))) - plt.plot(iter_numbers, self.best_score_by_iteration, '-o') - plt.title(title) - self.best_score_by_iter_ax.set_xlabel('iteration') - self.best_score_by_iter_ax.set_ylabel(self.objective.name) - if interactive_plot is False: - return self.best_score_by_iter_fig + iter_numbers = list(range(len(self.best_score_by_iteration))) + title = 'Pipeline Search: Iteration vs. {}'.format(self.objective.name) + data = go.Scatter(x=iter_numbers, y=self.best_score_by_iteration, mode='lines+markers') + layout = dict(title=title) + self.best_score_by_iter_fig = go.FigureWidget(data, layout) + display(self.best_score_by_iter_fig) else: - self.best_score_by_iter_fig.canvas.draw() + if self.objective.greater_is_better: + new_score = self.rankings['score'].max() + else: + new_score = self.rankings['score'].min() + self.best_score_by_iteration.append(new_score) + trace = self.best_score_by_iter_fig.data[0] + trace.x = list(range(len(self.best_score_by_iteration))) + trace.y = self.best_score_by_iteration @property def rankings(self): diff --git a/evalml/tests/automl_tests/test_autoclassifier.py b/evalml/tests/automl_tests/test_autoclassifier.py index 78197ebe77..5cd8a8cf80 100644 --- a/evalml/tests/automl_tests/test_autoclassifier.py +++ b/evalml/tests/automl_tests/test_autoclassifier.py @@ -1,4 +1,3 @@ -import matplotlib.pyplot as plt import numpy as np import pandas as pd import pytest @@ -280,27 +279,5 @@ def test_plot_iterations(X_y): clf = AutoClassifier(multiclass=False, max_pipelines=3) - clf.fit(X, y) - - figure = clf.plot_best_score_by_iteration() - assert isinstance(figure, type(plt.figure())) - clf.plot_best_score_by_iteration(interactive_plot=True) - - -def test_guardrail_warnings(X_y, capsys): - X, y = X_y - X = pd.DataFrame(X) - y = pd.Series(y) - - # create outliers - X.iloc[2, :] = -1000 - X.iloc[5, :] = 1000 - - clf = AutoClassifier(check_outliers=True) - clf.fit(X, y) - clf.describe_pipeline(0) - out, err = capsys.readouterr() - out_stripped = " ".join(out.split()) - assert err == '' - outlier_warning = "may contain outlier data." - assert outlier_warning in out_stripped + clf.fit(X, y, plot_iterations=True) + clf.plot_best_score_by_iteration() From 195732b22f2cf6b47d46d7b7dd5c50177db7db6f Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Tue, 12 Nov 2019 15:12:26 -0500 Subject: [PATCH 11/25] Fixed linting and test errors --- evalml/models/auto_base.py | 2 +- requirements.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py index f625bdf82b..5f91f31d8e 100644 --- a/evalml/models/auto_base.py +++ b/evalml/models/auto_base.py @@ -5,9 +5,9 @@ import numpy as np import pandas as pd -from tqdm import tqdm import plotly.graph_objects as go from IPython.display import display +from tqdm import tqdm from evalml import guardrails from evalml.objectives import get_objective, get_objectives diff --git a/requirements.txt b/requirements.txt index 1dcc7b45d2..7cf149de87 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ s3fs==0.2.2 joblib>=0.10.3 category_encoders>=2.0.0 cloudpickle>=0.2.2 +plotly>=4.3.0 \ No newline at end of file From 12c2ca3cc87703e8801cd7fa79e79c3b3831a52e Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Wed, 13 Nov 2019 10:36:03 -0500 Subject: [PATCH 12/25] Created plot with only max_time --- evalml/tests/automl_tests/test_autoclassifier.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/evalml/tests/automl_tests/test_autoclassifier.py b/evalml/tests/automl_tests/test_autoclassifier.py index 5cd8a8cf80..e6432d2910 100644 --- a/evalml/tests/automl_tests/test_autoclassifier.py +++ b/evalml/tests/automl_tests/test_autoclassifier.py @@ -278,6 +278,9 @@ def test_plot_iterations(X_y): X, y = X_y clf = AutoClassifier(multiclass=False, max_pipelines=3) - clf.fit(X, y, plot_iterations=True) clf.plot_best_score_by_iteration() + + clf2 = AutoClassifier(multiclass=False, max_time=2) + clf2.fit(X, y, plot_iterations=True) + clf2.plot_best_score_by_iteration() \ No newline at end of file From 4426656c61b49024f651cae8213d9c326b9f13cd Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Fri, 6 Dec 2019 15:08:43 -0500 Subject: [PATCH 13/25] Moved iteration plotting to pipeline search plots --- evalml/models/auto_base.py | 50 +++++++++----------------- evalml/models/pipeline_search_plots.py | 23 ++++++++++++ 2 files changed, 40 insertions(+), 33 deletions(-) diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py index 0149d5f799..071a71e235 100644 --- a/evalml/models/auto_base.py +++ b/evalml/models/auto_base.py @@ -5,8 +5,6 @@ import numpy as np import pandas as pd -import plotly.graph_objects as go -from IPython.display import display from tqdm import tqdm from .pipeline_search_plots import PipelineSearchPlots @@ -79,7 +77,7 @@ def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time, self.plot = PipelineSearchPlots(self) - def fit(self, X, y, feature_types=None, raise_errors=False): + def fit(self, X, y, feature_types=None, raise_errors=False, no_iteration_plot=False): """Find best classifier Arguments: @@ -92,15 +90,21 @@ def fit(self, X, y, feature_types=None, raise_errors=False): raise_errors (boolean): If true, raise errors and exit search if a pipeline errors during fitting - plot_iterations (boolean, False): Show interactive plot of iteration vs. score - during fitting. Can only be ran in Jupyter Notebook. + no_iteration_plot (boolean, False): Disables the iteration vs. score plot in Jupyter notebook. + Disabled by default in non-Jupyter enviroments. Returns: self """ - self.best_score_by_iter_fig = None - self.best_score_by_iter_ax = None + # don't show iteration plot outside of a jupyter notebook + if no_iteration_plot is False: + try: + get_ipython + no_iteration_plot = False + except NameError: + no_iteration_plot = True + # make everything pandas objects if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) @@ -136,14 +140,15 @@ def fit(self, X, y, feature_types=None, raise_errors=False): leaked = [str(k) for k in leaked.keys()] self.logger.log("WARNING: Possible label leakage: %s" % ", ".join(leaked)) + if no_iteration_plot is False: + self.plot.best_score_by_iteration() + if self.max_pipelines is None: start = time.time() pbar = tqdm(total=self.max_time, disable=not self.verbose, file=stdout, bar_format='{desc} | Elapsed:{elapsed}') pbar._instances.clear() while time.time() - start <= self.max_time: self._do_iteration(X, y, pbar, raise_errors) - if plot_iterations: - self.plot_best_score_by_iteration(interactive_plot=True) pbar.close() else: pbar = tqdm(range(self.max_pipelines), disable=not self.verbose, file=stdout, bar_format='{desc} {percentage:3.0f}%|{bar}| Elapsed:{elapsed}') @@ -156,8 +161,6 @@ def fit(self, X, y, feature_types=None, raise_errors=False): self.logger.log("\n\nMax time elapsed. Stopping search early.") break self._do_iteration(X, y, pbar, raise_errors) - if plot_iterations: - self.plot_best_score_by_iteration(interactive_plot=True) pbar.close() self.logger.log("\n✔ Optimization finished") @@ -232,6 +235,9 @@ def _do_iteration(self, X, y, pbar, raise_errors): training_time=training_time, cv_data=cv_data) + # Update the score for the score vs. iteration plots + self.plot.add_iteration_score() + desc = "✔" + desc[1:] pbar.set_description_str(desc=desc, refresh=True) if self.verbose: # To force new line between progress bar iterations @@ -344,28 +350,6 @@ def describe_pipeline(self, pipeline_id, return_dict=False): if return_dict: return pipeline_results - def plot_best_score_by_iteration(self, interactive_plot=False): - if self.best_score_by_iter_fig is None: - self.best_score_by_iteration = list() - - no_plot_made = self.best_score_by_iter_fig is None - if no_plot_made or interactive_plot is False: - iter_numbers = list(range(len(self.best_score_by_iteration))) - title = 'Pipeline Search: Iteration vs. {}'.format(self.objective.name) - data = go.Scatter(x=iter_numbers, y=self.best_score_by_iteration, mode='lines+markers') - layout = dict(title=title) - self.best_score_by_iter_fig = go.FigureWidget(data, layout) - display(self.best_score_by_iter_fig) - else: - if self.objective.greater_is_better: - new_score = self.rankings['score'].max() - else: - new_score = self.rankings['score'].min() - self.best_score_by_iteration.append(new_score) - trace = self.best_score_by_iter_fig.data[0] - trace.x = list(range(len(self.best_score_by_iteration))) - trace.y = self.best_score_by_iteration - @property def rankings(self): """Returns the rankings of the models searched""" diff --git a/evalml/models/pipeline_search_plots.py b/evalml/models/pipeline_search_plots.py index 36c4dae3a1..0328545a1f 100644 --- a/evalml/models/pipeline_search_plots.py +++ b/evalml/models/pipeline_search_plots.py @@ -1,6 +1,7 @@ import numpy as np import plotly.graph_objects as go import sklearn.metrics +from IPython.display import display from scipy import interp from evalml.problem_types import ProblemTypes @@ -17,6 +18,8 @@ def __init__(self, data): data (AutoClassifier or AutoRegressor): Automated pipeline search object """ self.data = data + self.best_score_by_iter_fig = None + self.iteration_scores = list() def get_roc_data(self, pipeline_id): """Gets data that can be used to create a ROC plot. @@ -154,3 +157,23 @@ def generate_confusion_matrix(self, pipeline_id, fold_num=None): ''), # necessary to remove unwanted trace info layout=layout) return figure + + def add_iteration_score(self): + if self.data.objective.greater_is_better: + new_score = self.data.rankings['score'].max() + else: + new_score = self.data.rankings['score'].min() + self.iteration_scores.append(new_score) + + if self.best_score_by_iter_fig is not None: + trace = self.best_score_by_iter_fig.data[0] + trace.x = list(range(len(self.iteration_scores))) + trace.y = self.iteration_scores + + def best_score_by_iteration(self): + iter_numbers = list(range(len(self.iteration_scores))) + title = 'Pipeline Search: Iteration vs. {}'.format(self.data.objective.name) + data = go.Scatter(x=iter_numbers, y=self.iteration_scores, mode='lines+markers') + layout = dict(title=title, xaxis_title='Iteration', yaxis_title='Score') + self.best_score_by_iter_fig = go.FigureWidget(data, layout) + display(self.best_score_by_iter_fig) From 61379c7f65963925814133e85f62f8fc75c2fdf3 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Fri, 6 Dec 2019 15:36:32 -0500 Subject: [PATCH 14/25] Fixed lint and test issues --- evalml/tests/automl_tests/test_autoclassifier.py | 12 ------------ .../automl_tests/test_pipeline_search_plots.py | 14 +++++++++++++- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/evalml/tests/automl_tests/test_autoclassifier.py b/evalml/tests/automl_tests/test_autoclassifier.py index b9df8f59c6..87a0533229 100644 --- a/evalml/tests/automl_tests/test_autoclassifier.py +++ b/evalml/tests/automl_tests/test_autoclassifier.py @@ -269,15 +269,3 @@ def test_max_time_units(): with pytest.raises(TypeError, match="max_time must be a float, int, or string. Received a ."): AutoClassifier(objective='F1', max_time=(30, 'minutes')) - - -def test_plot_iterations(X_y): - X, y = X_y - - clf = AutoClassifier(multiclass=False, max_pipelines=3) - clf.fit(X, y, plot_iterations=True) - clf.plot_best_score_by_iteration() - - clf2 = AutoClassifier(multiclass=False, max_time=2) - clf2.fit(X, y, plot_iterations=True) - clf2.plot_best_score_by_iteration() \ No newline at end of file diff --git a/evalml/tests/automl_tests/test_pipeline_search_plots.py b/evalml/tests/automl_tests/test_pipeline_search_plots.py index da6c4e7bcb..54ae2e0c4a 100644 --- a/evalml/tests/automl_tests/test_pipeline_search_plots.py +++ b/evalml/tests/automl_tests/test_pipeline_search_plots.py @@ -6,6 +6,7 @@ from sklearn.model_selection import StratifiedKFold from evalml.models.auto_base import AutoBase +from evalml.models import AutoClassifier from evalml.models.pipeline_search_plots import PipelineSearchPlots from evalml.pipelines import LogisticRegressionPipeline from evalml.problem_types import ProblemTypes @@ -150,7 +151,6 @@ def fit(self): def test_confusion_matrix_regression_throws_error(): - # Make mock class and generate mock results class MockAutoRegressor(AutoBase): def __init__(self): @@ -164,3 +164,15 @@ def __init__(self): search_plots.get_confusion_matrix_data(0) with pytest.raises(RuntimeError, match="Confusion matrix plots can only be generated for classification problems."): search_plots.generate_confusion_matrix(0) + + +def test_plot_iterations(X_y): + X, y = X_y + + clf = AutoClassifier(multiclass=False, max_pipelines=3) + clf.fit(X, y, no_iteration_plot=True) + clf.plot.best_score_by_iteration() + + clf2 = AutoClassifier(multiclass=False, max_time=2) + clf2.fit(X, y, no_iteration_plot=False) + clf2.plot.best_score_by_iteration() From 33021166bf6a5afe384bb8088b63913ea0e4a498 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Fri, 6 Dec 2019 16:22:39 -0500 Subject: [PATCH 15/25] Fixed other lint error --- evalml/tests/automl_tests/test_pipeline_search_plots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evalml/tests/automl_tests/test_pipeline_search_plots.py b/evalml/tests/automl_tests/test_pipeline_search_plots.py index 54ae2e0c4a..e901d8011b 100644 --- a/evalml/tests/automl_tests/test_pipeline_search_plots.py +++ b/evalml/tests/automl_tests/test_pipeline_search_plots.py @@ -5,8 +5,8 @@ import pytest from sklearn.model_selection import StratifiedKFold -from evalml.models.auto_base import AutoBase from evalml.models import AutoClassifier +from evalml.models.auto_base import AutoBase from evalml.models.pipeline_search_plots import PipelineSearchPlots from evalml.pipelines import LogisticRegressionPipeline from evalml.problem_types import ProblemTypes From 75f1b110060adc1a4e89619eda6e73f52021138b Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Mon, 9 Dec 2019 10:56:08 -0500 Subject: [PATCH 16/25] Introduced new SearchIterationPlot obj --- evalml/models/auto_base.py | 19 +++++---- evalml/models/pipeline_search_plots.py | 55 ++++++++++++++++---------- 2 files changed, 44 insertions(+), 30 deletions(-) diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py index 68a8363d77..e2a031c263 100644 --- a/evalml/models/auto_base.py +++ b/evalml/models/auto_base.py @@ -77,7 +77,7 @@ def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time, self.plot = PipelineSearchPlots(self) - def fit(self, X, y, feature_types=None, raise_errors=False, no_iteration_plot=False): + def fit(self, X, y, feature_types=None, raise_errors=False, show_iteration_plot=True): """Find best classifier Arguments: @@ -90,7 +90,7 @@ def fit(self, X, y, feature_types=None, raise_errors=False, no_iteration_plot=Fa raise_errors (boolean): If true, raise errors and exit search if a pipeline errors during fitting - no_iteration_plot (boolean, False): Disables the iteration vs. score plot in Jupyter notebook. + show_iteration_plot (boolean, True): Shows an iteration vs. score plot in Jupyter notebook. Disabled by default in non-Jupyter enviroments. Returns: @@ -98,12 +98,11 @@ def fit(self, X, y, feature_types=None, raise_errors=False, no_iteration_plot=Fa self """ # don't show iteration plot outside of a jupyter notebook - if no_iteration_plot is False: + if show_iteration_plot is True: try: get_ipython - no_iteration_plot = False except NameError: - no_iteration_plot = True + show_iteration_plot = False # make everything pandas objects if not isinstance(X, pd.DataFrame): @@ -140,8 +139,8 @@ def fit(self, X, y, feature_types=None, raise_errors=False, no_iteration_plot=Fa leaked = [str(k) for k in leaked.keys()] self.logger.log("WARNING: Possible label leakage: %s" % ", ".join(leaked)) - if no_iteration_plot is False: - self.plot.best_score_by_iteration() + if show_iteration_plot is True: + self.plot.search_iteration_plot(interactive_plot=True) if self.max_pipelines is None: start = time.time() @@ -235,9 +234,6 @@ def _do_iteration(self, X, y, pbar, raise_errors): training_time=training_time, cv_data=cv_data) - # Update the score for the score vs. iteration plots - self.plot.add_iteration_score() - desc = "✔" + desc[1:] pbar.set_description_str(desc=desc, refresh=True) if self.verbose: # To force new line between progress bar iterations @@ -281,6 +277,9 @@ def _add_result(self, trained_pipeline, parameters, training_time, cv_data): "cv_data": cv_data } + # Update the iteration plot to include new score + self.plot.iter_plot.update() + if self.add_result_callback: self.add_result_callback(self.results[pipeline_id], trained_pipeline) diff --git a/evalml/models/pipeline_search_plots.py b/evalml/models/pipeline_search_plots.py index 0328545a1f..b7ff43c6d8 100644 --- a/evalml/models/pipeline_search_plots.py +++ b/evalml/models/pipeline_search_plots.py @@ -7,6 +7,31 @@ from evalml.problem_types import ProblemTypes +class SearchIterationPlot(): + def __init__(self, data, show_plot=True): + self.data = data + self.best_score_by_iter_fig = None + self.iteration_scores = list() + + iter_numbers = list(range(len(self.iteration_scores))) + title = 'Pipeline Search: Iteration vs. {}'.format(self.data.objective.name) + data = go.Scatter(x=iter_numbers, y=self.iteration_scores, mode='lines+markers') + layout = dict(title=title, xaxis_title='Iteration', yaxis_title='Score') + self.best_score_by_iter_fig = go.FigureWidget(data, layout) + + def update(self): + if self.data.objective.greater_is_better: + new_score = self.data.rankings['score'].max() + else: + new_score = self.data.rankings['score'].min() + self.iteration_scores.append(new_score) + + if self.best_score_by_iter_fig is not None: + trace = self.best_score_by_iter_fig.data[0] + trace.x = list(range(len(self.iteration_scores))) + trace.y = self.iteration_scores + + class PipelineSearchPlots: """Plots for the AutoClassifier/AutoRegressor class. """ @@ -18,8 +43,7 @@ def __init__(self, data): data (AutoClassifier or AutoRegressor): Automated pipeline search object """ self.data = data - self.best_score_by_iter_fig = None - self.iteration_scores = list() + self.iter_plot = SearchIterationPlot(self.data) def get_roc_data(self, pipeline_id): """Gets data that can be used to create a ROC plot. @@ -158,22 +182,13 @@ def generate_confusion_matrix(self, pipeline_id, fold_num=None): layout=layout) return figure - def add_iteration_score(self): - if self.data.objective.greater_is_better: - new_score = self.data.rankings['score'].max() - else: - new_score = self.data.rankings['score'].min() - self.iteration_scores.append(new_score) + def search_iteration_plot(self, interactive_plot=False): + """Shows a plot of the best score at each iteration using data gathered during training. - if self.best_score_by_iter_fig is not None: - trace = self.best_score_by_iter_fig.data[0] - trace.x = list(range(len(self.iteration_scores))) - trace.y = self.iteration_scores - - def best_score_by_iteration(self): - iter_numbers = list(range(len(self.iteration_scores))) - title = 'Pipeline Search: Iteration vs. {}'.format(self.data.objective.name) - data = go.Scatter(x=iter_numbers, y=self.iteration_scores, mode='lines+markers') - layout = dict(title=title, xaxis_title='Iteration', yaxis_title='Score') - self.best_score_by_iter_fig = go.FigureWidget(data, layout) - display(self.best_score_by_iter_fig) + Returns: + plot + """ + if interactive_plot is True: + display(self.iter_plot.best_score_by_iter_fig) + else: + return go.Figure(self.iter_plot.best_score_by_iter_fig) From 15b054a6bb3c8b56ce7fdfe7d3e242409bed2222 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Mon, 9 Dec 2019 11:09:13 -0500 Subject: [PATCH 17/25] Updated plot axis to show only int values --- evalml/models/pipeline_search_plots.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/evalml/models/pipeline_search_plots.py b/evalml/models/pipeline_search_plots.py index b7ff43c6d8..aff4a6701f 100644 --- a/evalml/models/pipeline_search_plots.py +++ b/evalml/models/pipeline_search_plots.py @@ -16,7 +16,17 @@ def __init__(self, data, show_plot=True): iter_numbers = list(range(len(self.iteration_scores))) title = 'Pipeline Search: Iteration vs. {}'.format(self.data.objective.name) data = go.Scatter(x=iter_numbers, y=self.iteration_scores, mode='lines+markers') - layout = dict(title=title, xaxis_title='Iteration', yaxis_title='Score') + layout = { + 'title': title, + 'xaxis': { + 'title': 'Iteration', + 'tickformat': ',d', + 'rangemode': 'tozero' + }, + 'yaxis': { + 'title': 'Score' + } + } self.best_score_by_iter_fig = go.FigureWidget(data, layout) def update(self): From 2bb414fd28c9dcf728d2efbf94e7412bce195d23 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Mon, 9 Dec 2019 11:36:43 -0500 Subject: [PATCH 18/25] Added gray dot to indicate current iteration --- evalml/models/pipeline_search_plots.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/evalml/models/pipeline_search_plots.py b/evalml/models/pipeline_search_plots.py index aff4a6701f..0c6f09a87c 100644 --- a/evalml/models/pipeline_search_plots.py +++ b/evalml/models/pipeline_search_plots.py @@ -14,8 +14,8 @@ def __init__(self, data, show_plot=True): self.iteration_scores = list() iter_numbers = list(range(len(self.iteration_scores))) - title = 'Pipeline Search: Iteration vs. {}'.format(self.data.objective.name) - data = go.Scatter(x=iter_numbers, y=self.iteration_scores, mode='lines+markers') + title = 'Pipeline Search: Iteration vs. {}
Gray marker indicates current iteration'.format(self.data.objective.name) + data = [go.Scatter(x=iter_numbers, y=self.iteration_scores, mode='lines+markers'), go.Scatter(x=[], y=[], mode='markers', marker={'color': 'gray'})] layout = { 'title': title, 'xaxis': { @@ -28,6 +28,7 @@ def __init__(self, data, show_plot=True): } } self.best_score_by_iter_fig = go.FigureWidget(data, layout) + self.best_score_by_iter_fig.update_layout(showlegend=False) def update(self): if self.data.objective.greater_is_better: @@ -36,10 +37,15 @@ def update(self): new_score = self.data.rankings['score'].min() self.iteration_scores.append(new_score) - if self.best_score_by_iter_fig is not None: - trace = self.best_score_by_iter_fig.data[0] - trace.x = list(range(len(self.iteration_scores))) - trace.y = self.iteration_scores + # Update current point in plot + trace = self.best_score_by_iter_fig.data[1] + trace.x = [len(self.iteration_scores) - 1] + trace.y = [new_score] + + # Update entire line plot + trace = self.best_score_by_iter_fig.data[0] + trace.x = list(range(len(self.iteration_scores))) + trace.y = self.iteration_scores class PipelineSearchPlots: From 38dc29d3ee1ca15343b71f65b407d56d8905b471 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Mon, 9 Dec 2019 13:03:32 -0500 Subject: [PATCH 19/25] Updated test to check monotonic --- .../automl_tests/test_pipeline_search_plots.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/evalml/tests/automl_tests/test_pipeline_search_plots.py b/evalml/tests/automl_tests/test_pipeline_search_plots.py index e901d8011b..e860be58d7 100644 --- a/evalml/tests/automl_tests/test_pipeline_search_plots.py +++ b/evalml/tests/automl_tests/test_pipeline_search_plots.py @@ -169,10 +169,13 @@ def __init__(self): def test_plot_iterations(X_y): X, y = X_y - clf = AutoClassifier(multiclass=False, max_pipelines=3) - clf.fit(X, y, no_iteration_plot=True) - clf.plot.best_score_by_iteration() - - clf2 = AutoClassifier(multiclass=False, max_time=2) - clf2.fit(X, y, no_iteration_plot=False) - clf2.plot.best_score_by_iteration() + clf = AutoClassifier(max_pipelines=3) + clf.fit(X, y) + plot = clf.plot.search_iteration_plot() + plot_data = plot.data[0] + x = pd.Series(plot_data['x']) + y = pd.Series(plot_data['y']) + + assert isinstance(plot, go.Figure) + assert x.is_monotonic_increasing + assert y.is_monotonic_increasing From e33fcfa19ce0d3e8245d049e4d7533808c7b22da Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Mon, 9 Dec 2019 16:58:44 -0500 Subject: [PATCH 20/25] Added trace for current iter and restructured plot in AutoBase --- evalml/models/auto_base.py | 17 +++---- evalml/models/pipeline_search_plots.py | 49 +++++++++++-------- .../test_pipeline_search_plots.py | 18 ++++++- 3 files changed, 54 insertions(+), 30 deletions(-) diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py index e2a031c263..587d39baaf 100644 --- a/evalml/models/auto_base.py +++ b/evalml/models/auto_base.py @@ -139,15 +139,14 @@ def fit(self, X, y, feature_types=None, raise_errors=False, show_iteration_plot= leaked = [str(k) for k in leaked.keys()] self.logger.log("WARNING: Possible label leakage: %s" % ", ".join(leaked)) - if show_iteration_plot is True: - self.plot.search_iteration_plot(interactive_plot=True) + plot = self.plot.search_iteration_plot(interactive_plot=show_iteration_plot) if self.max_pipelines is None: - start = time.time() pbar = tqdm(total=self.max_time, disable=not self.verbose, file=stdout, bar_format='{desc} | Elapsed:{elapsed}') pbar._instances.clear() + start = time.time() while time.time() - start <= self.max_time: - self._do_iteration(X, y, pbar, raise_errors) + self._do_iteration(X, y, pbar, raise_errors, plot) pbar.close() else: pbar = tqdm(range(self.max_pipelines), disable=not self.verbose, file=stdout, bar_format='{desc} {percentage:3.0f}%|{bar}| Elapsed:{elapsed}') @@ -159,7 +158,7 @@ def fit(self, X, y, feature_types=None, raise_errors=False, show_iteration_plot= pbar.close() self.logger.log("\n\nMax time elapsed. Stopping search early.") break - self._do_iteration(X, y, pbar, raise_errors) + self._do_iteration(X, y, pbar, raise_errors, plot) pbar.close() self.logger.log("\n✔ Optimization finished") @@ -173,7 +172,7 @@ def check_multiclass(self, y): if ProblemTypes.MULTICLASS not in obj.problem_types: raise ValueError("Additional objective {} is not compatible with a multiclass problem.".format(obj.name)) - def _do_iteration(self, X, y, pbar, raise_errors): + def _do_iteration(self, X, y, pbar, raise_errors, plot): # determine which pipeline to build pipeline_class = self._select_pipeline() @@ -234,6 +233,9 @@ def _do_iteration(self, X, y, pbar, raise_errors): training_time=training_time, cv_data=cv_data) + # Update plot with new score + plot.update() + desc = "✔" + desc[1:] pbar.set_description_str(desc=desc, refresh=True) if self.verbose: # To force new line between progress bar iterations @@ -277,9 +279,6 @@ def _add_result(self, trained_pipeline, parameters, training_time, cv_data): "cv_data": cv_data } - # Update the iteration plot to include new score - self.plot.iter_plot.update() - if self.add_result_callback: self.add_result_callback(self.results[pipeline_id], trained_pipeline) diff --git a/evalml/models/pipeline_search_plots.py b/evalml/models/pipeline_search_plots.py index 0c6f09a87c..dc48267eb4 100644 --- a/evalml/models/pipeline_search_plots.py +++ b/evalml/models/pipeline_search_plots.py @@ -11,11 +11,14 @@ class SearchIterationPlot(): def __init__(self, data, show_plot=True): self.data = data self.best_score_by_iter_fig = None - self.iteration_scores = list() - - iter_numbers = list(range(len(self.iteration_scores))) - title = 'Pipeline Search: Iteration vs. {}
Gray marker indicates current iteration'.format(self.data.objective.name) - data = [go.Scatter(x=iter_numbers, y=self.iteration_scores, mode='lines+markers'), go.Scatter(x=[], y=[], mode='markers', marker={'color': 'gray'})] + self.curr_iteration_scores = list() + self.best_iteration_scores = list() + + title = 'Pipeline Search: Iteration vs. {}
Gray marker indicates the score at current iteration'.format(self.data.objective.name) + data = [ + go.Scatter(x=[], y=[], mode='lines+markers', name='Best Score'), + go.Scatter(x=[], y=[], mode='markers', name='Iter score', marker={'color': 'gray'}) + ] layout = { 'title': title, 'xaxis': { @@ -31,21 +34,23 @@ def __init__(self, data, show_plot=True): self.best_score_by_iter_fig.update_layout(showlegend=False) def update(self): + iter_idx = self.data.rankings['id'].idxmax() + self.curr_iteration_scores.append(self.data.rankings['score'].iloc[iter_idx]) + if self.data.objective.greater_is_better: - new_score = self.data.rankings['score'].max() + iter_max_score = self.data.rankings['score'].max() else: - new_score = self.data.rankings['score'].min() - self.iteration_scores.append(new_score) - - # Update current point in plot - trace = self.best_score_by_iter_fig.data[1] - trace.x = [len(self.iteration_scores) - 1] - trace.y = [new_score] + iter_max_score = self.data.rankings['score'].min() + self.best_iteration_scores.append(iter_max_score) # Update entire line plot - trace = self.best_score_by_iter_fig.data[0] - trace.x = list(range(len(self.iteration_scores))) - trace.y = self.iteration_scores + curr_score_trace = self.best_score_by_iter_fig.data[1] + curr_score_trace.x = list(range(len(self.curr_iteration_scores))) + curr_score_trace.y = self.curr_iteration_scores + + best_score_trace = self.best_score_by_iter_fig.data[0] + best_score_trace.x = list(range(len(self.best_iteration_scores))) + best_score_trace.y = self.best_iteration_scores class PipelineSearchPlots: @@ -59,7 +64,6 @@ def __init__(self, data): data (AutoClassifier or AutoRegressor): Automated pipeline search object """ self.data = data - self.iter_plot = SearchIterationPlot(self.data) def get_roc_data(self, pipeline_id): """Gets data that can be used to create a ROC plot. @@ -204,7 +208,12 @@ def search_iteration_plot(self, interactive_plot=False): Returns: plot """ - if interactive_plot is True: - display(self.iter_plot.best_score_by_iter_fig) - else: + + if hasattr(self, 'iter_plot'): return go.Figure(self.iter_plot.best_score_by_iter_fig) + else: + self.iter_plot = SearchIterationPlot(self.data) + + if interactive_plot: + display(self.iter_plot.best_score_by_iter_fig) + return self.iter_plot diff --git a/evalml/tests/automl_tests/test_pipeline_search_plots.py b/evalml/tests/automl_tests/test_pipeline_search_plots.py index e860be58d7..82e4d81054 100644 --- a/evalml/tests/automl_tests/test_pipeline_search_plots.py +++ b/evalml/tests/automl_tests/test_pipeline_search_plots.py @@ -169,7 +169,7 @@ def __init__(self): def test_plot_iterations(X_y): X, y = X_y - clf = AutoClassifier(max_pipelines=3) + clf = AutoClassifier(objective="f1", max_pipelines=3) clf.fit(X, y) plot = clf.plot.search_iteration_plot() plot_data = plot.data[0] @@ -179,3 +179,19 @@ def test_plot_iterations(X_y): assert isinstance(plot, go.Figure) assert x.is_monotonic_increasing assert y.is_monotonic_increasing + assert len(x) == 3 + assert len(y) == 3 + + X, y = X_y + clf2 = AutoClassifier(objective="f1", max_time=10) + clf2.fit(X, y, show_iteration_plot=False) + plot = clf2.plot.search_iteration_plot() + plot_data = plot.data[0] + x = pd.Series(plot_data['x']) + y = pd.Series(plot_data['y']) + + assert isinstance(plot, go.Figure) + assert x.is_monotonic_increasing + assert y.is_monotonic_increasing + assert len(x) > 0 + assert len(y) > 0 From b04827f99450e202c9041219f3a822cdc3798e00 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Tue, 10 Dec 2019 14:49:03 -0500 Subject: [PATCH 21/25] Moved plotting out of _do_iteration --- evalml/models/auto_base.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py index 587d39baaf..826738a025 100644 --- a/evalml/models/auto_base.py +++ b/evalml/models/auto_base.py @@ -146,7 +146,8 @@ def fit(self, X, y, feature_types=None, raise_errors=False, show_iteration_plot= pbar._instances.clear() start = time.time() while time.time() - start <= self.max_time: - self._do_iteration(X, y, pbar, raise_errors, plot) + self._do_iteration(X, y, pbar, raise_errors) + plot.update() pbar.close() else: pbar = tqdm(range(self.max_pipelines), disable=not self.verbose, file=stdout, bar_format='{desc} {percentage:3.0f}%|{bar}| Elapsed:{elapsed}') @@ -158,7 +159,8 @@ def fit(self, X, y, feature_types=None, raise_errors=False, show_iteration_plot= pbar.close() self.logger.log("\n\nMax time elapsed. Stopping search early.") break - self._do_iteration(X, y, pbar, raise_errors, plot) + self._do_iteration(X, y, pbar, raise_errors) + plot.update() pbar.close() self.logger.log("\n✔ Optimization finished") @@ -172,7 +174,7 @@ def check_multiclass(self, y): if ProblemTypes.MULTICLASS not in obj.problem_types: raise ValueError("Additional objective {} is not compatible with a multiclass problem.".format(obj.name)) - def _do_iteration(self, X, y, pbar, raise_errors, plot): + def _do_iteration(self, X, y, pbar, raise_errors): # determine which pipeline to build pipeline_class = self._select_pipeline() @@ -233,9 +235,6 @@ def _do_iteration(self, X, y, pbar, raise_errors, plot): training_time=training_time, cv_data=cv_data) - # Update plot with new score - plot.update() - desc = "✔" + desc[1:] pbar.set_description_str(desc=desc, refresh=True) if self.verbose: # To force new line between progress bar iterations From 3240805a2b8442449aa6634f7f0d9087a90e3ef1 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Tue, 10 Dec 2019 15:02:54 -0500 Subject: [PATCH 22/25] Changed update to use curr_score info from results['search_order'] --- evalml/models/pipeline_search_plots.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/evalml/models/pipeline_search_plots.py b/evalml/models/pipeline_search_plots.py index 60c9254b3c..e616238086 100644 --- a/evalml/models/pipeline_search_plots.py +++ b/evalml/models/pipeline_search_plots.py @@ -23,7 +23,6 @@ def __init__(self, data, show_plot=True): 'title': title, 'xaxis': { 'title': 'Iteration', - 'tickformat': ',d', 'rangemode': 'tozero' }, 'yaxis': { @@ -34,8 +33,9 @@ def __init__(self, data, show_plot=True): self.best_score_by_iter_fig.update_layout(showlegend=False) def update(self): - iter_idx = self.data.rankings['id'].idxmax() - self.curr_iteration_scores.append(self.data.rankings['score'].iloc[iter_idx]) + iter_idx = self.data.results['search_order'] + pipeline_res = self.data.results['pipeline_results'] + iter_scores = [pipeline_res[i]['score'] for i in range(len(pipeline_res))] if self.data.objective.greater_is_better: iter_max_score = self.data.rankings['score'].max() @@ -45,8 +45,8 @@ def update(self): # Update entire line plot curr_score_trace = self.best_score_by_iter_fig.data[1] - curr_score_trace.x = list(range(len(self.curr_iteration_scores))) - curr_score_trace.y = self.curr_iteration_scores + curr_score_trace.x = iter_idx + curr_score_trace.y = iter_scores best_score_trace = self.best_score_by_iter_fig.data[0] best_score_trace.x = list(range(len(self.best_iteration_scores))) From ee3bd4592f97b56eccaa64ec466e573bab98cdd4 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Wed, 11 Dec 2019 10:05:34 -0500 Subject: [PATCH 23/25] Fixed iter score appearance bug --- evalml/models/pipeline_search_plots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evalml/models/pipeline_search_plots.py b/evalml/models/pipeline_search_plots.py index e616238086..c66ff8f509 100644 --- a/evalml/models/pipeline_search_plots.py +++ b/evalml/models/pipeline_search_plots.py @@ -35,7 +35,7 @@ def __init__(self, data, show_plot=True): def update(self): iter_idx = self.data.results['search_order'] pipeline_res = self.data.results['pipeline_results'] - iter_scores = [pipeline_res[i]['score'] for i in range(len(pipeline_res))] + iter_scores = [pipeline_res[i]['score'] for i in iter_idx] if self.data.objective.greater_is_better: iter_max_score = self.data.rankings['score'].max() From 7f32c49926e802266bb65c668a28b26f91cf805d Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Thu, 12 Dec 2019 11:01:01 -0500 Subject: [PATCH 24/25] Overhauled plot object behavior to reload every iteration --- evalml/models/pipeline_search_plots.py | 65 +++++++++------- evalml/tests/automl_tests/test_autobase.py | 1 - .../tests/automl_tests/test_autoclassifier.py | 34 +++++++++ .../tests/automl_tests/test_autoregressor.py | 34 +++++++++ .../test_pipeline_search_plots.py | 75 ++++++++++++------- 5 files changed, 154 insertions(+), 55 deletions(-) diff --git a/evalml/models/pipeline_search_plots.py b/evalml/models/pipeline_search_plots.py index c66ff8f509..32671834b7 100644 --- a/evalml/models/pipeline_search_plots.py +++ b/evalml/models/pipeline_search_plots.py @@ -31,26 +31,41 @@ def __init__(self, data, show_plot=True): } self.best_score_by_iter_fig = go.FigureWidget(data, layout) self.best_score_by_iter_fig.update_layout(showlegend=False) + self.update() def update(self): - iter_idx = self.data.results['search_order'] - pipeline_res = self.data.results['pipeline_results'] - iter_scores = [pipeline_res[i]['score'] for i in iter_idx] - - if self.data.objective.greater_is_better: - iter_max_score = self.data.rankings['score'].max() - else: - iter_max_score = self.data.rankings['score'].min() - self.best_iteration_scores.append(iter_max_score) - - # Update entire line plot - curr_score_trace = self.best_score_by_iter_fig.data[1] - curr_score_trace.x = iter_idx - curr_score_trace.y = iter_scores - - best_score_trace = self.best_score_by_iter_fig.data[0] - best_score_trace.x = list(range(len(self.best_iteration_scores))) - best_score_trace.y = self.best_iteration_scores + if len(self.data.results['search_order']) > 0 and len(self.data.results['pipeline_results']) > 0: + iter_idx = self.data.results['search_order'] + pipeline_res = self.data.results['pipeline_results'] + iter_scores = [pipeline_res[i]['score'] for i in iter_idx] + + iter_score_pairs = zip(iter_idx, iter_scores) + iter_score_pairs = sorted(iter_score_pairs, key=lambda value: value[0]) + sorted_iter_idx, sorted_iter_scores = zip(*iter_score_pairs) + + # Create best score data + best_iteration_scores = list() + curr_best = None + for score in sorted_iter_scores: + if curr_best is None: + best_iteration_scores.append(score) + curr_best = score + else: + if self.data.objective.greater_is_better and score > curr_best \ + or not self.data.objective.greater_is_better and score < curr_best: + best_iteration_scores.append(score) + curr_best = score + else: + best_iteration_scores.append(curr_best) + + # Update entire line plot + best_score_trace = self.best_score_by_iter_fig.data[0] + best_score_trace.x = sorted_iter_idx + best_score_trace.y = best_iteration_scores + + curr_score_trace = self.best_score_by_iter_fig.data[1] + curr_score_trace.x = sorted_iter_idx + curr_score_trace.y = sorted_iter_scores class PipelineSearchPlots: @@ -208,12 +223,10 @@ def search_iteration_plot(self, interactive_plot=False): Returns: plot """ - - if hasattr(self, 'iter_plot'): - return go.Figure(self.iter_plot.best_score_by_iter_fig) - else: - self.iter_plot = SearchIterationPlot(self.data) - if interactive_plot: - display(self.iter_plot.best_score_by_iter_fig) - return self.iter_plot + plot_obj = SearchIterationPlot(self.data) + display(plot_obj.best_score_by_iter_fig) + return plot_obj + else: + plot_obj = SearchIterationPlot(self.data) + return go.Figure(plot_obj.best_score_by_iter_fig) diff --git a/evalml/tests/automl_tests/test_autobase.py b/evalml/tests/automl_tests/test_autobase.py index 17d569bd8d..d0e69cddbe 100644 --- a/evalml/tests/automl_tests/test_autobase.py +++ b/evalml/tests/automl_tests/test_autobase.py @@ -1,4 +1,3 @@ - import plotly.graph_objects as go from sklearn.model_selection import StratifiedKFold diff --git a/evalml/tests/automl_tests/test_autoclassifier.py b/evalml/tests/automl_tests/test_autoclassifier.py index e717f1011a..3e189e772d 100644 --- a/evalml/tests/automl_tests/test_autoclassifier.py +++ b/evalml/tests/automl_tests/test_autoclassifier.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +import plotly.graph_objects as go import pytest from sklearn.model_selection import StratifiedKFold, TimeSeriesSplit @@ -258,3 +259,36 @@ def test_max_time_units(): with pytest.raises(TypeError, match="max_time must be a float, int, or string. Received a ."): AutoClassifier(objective='F1', max_time=(30, 'minutes')) + + +def test_plot_iterations_max_pipelines(X_y): + X, y = X_y + + clf = AutoClassifier(objective="f1", max_pipelines=3) + clf.fit(X, y) + plot = clf.plot.search_iteration_plot() + plot_data = plot.data[0] + x = pd.Series(plot_data['x']) + y = pd.Series(plot_data['y']) + + assert isinstance(plot, go.Figure) + assert x.is_monotonic_increasing + assert y.is_monotonic_increasing + assert len(x) == 3 + assert len(y) == 3 + + +def test_plot_iterations_max_time(X_y): + X, y = X_y + clf = AutoClassifier(objective="f1", max_time=10) + clf.fit(X, y, show_iteration_plot=False) + plot = clf.plot.search_iteration_plot() + plot_data = plot.data[0] + x = pd.Series(plot_data['x']) + y = pd.Series(plot_data['y']) + + assert isinstance(plot, go.Figure) + assert x.is_monotonic_increasing + assert y.is_monotonic_increasing + assert len(x) > 0 + assert len(y) > 0 diff --git a/evalml/tests/automl_tests/test_autoregressor.py b/evalml/tests/automl_tests/test_autoregressor.py index 1352da1aff..e27603631d 100644 --- a/evalml/tests/automl_tests/test_autoregressor.py +++ b/evalml/tests/automl_tests/test_autoregressor.py @@ -1,4 +1,5 @@ import pandas as pd +import plotly.graph_objects as go import pytest from evalml import AutoRegressor @@ -81,3 +82,36 @@ def add_result_callback(results, trained_pipeline, counts=counts): assert counts["start_iteration_callback"] == max_pipelines assert counts["add_result_callback"] == max_pipelines + + +def test_plot_iterations_max_pipelines(X_y): + X, y = X_y + + clf = AutoRegressor(max_pipelines=3) + clf.fit(X, y) + plot = clf.plot.search_iteration_plot() + plot_data = plot.data[0] + x = pd.Series(plot_data['x']) + y = pd.Series(plot_data['y']) + + assert isinstance(plot, go.Figure) + assert x.is_monotonic_increasing + assert y.is_monotonic_increasing + assert len(x) == 3 + assert len(y) == 3 + + +def test_plot_iterations_max_time(X_y): + X, y = X_y + clf = AutoRegressor(max_time=10) + clf.fit(X, y, show_iteration_plot=False) + plot = clf.plot.search_iteration_plot() + plot_data = plot.data[0] + x = pd.Series(plot_data['x']) + y = pd.Series(plot_data['y']) + + assert isinstance(plot, go.Figure) + assert x.is_monotonic_increasing + assert y.is_monotonic_increasing + assert len(x) > 0 + assert len(y) > 0 diff --git a/evalml/tests/automl_tests/test_pipeline_search_plots.py b/evalml/tests/automl_tests/test_pipeline_search_plots.py index 923a60649b..d4a57cbb34 100644 --- a/evalml/tests/automl_tests/test_pipeline_search_plots.py +++ b/evalml/tests/automl_tests/test_pipeline_search_plots.py @@ -7,7 +7,7 @@ from evalml.models import AutoClassifier from evalml.models.auto_base import AutoBase -from evalml.models.pipeline_search_plots import PipelineSearchPlots +from evalml.models.pipeline_search_plots import PipelineSearchPlots, SearchIterationPlot from evalml.pipelines import LogisticRegressionPipeline from evalml.problem_types import ProblemTypes @@ -169,32 +169,51 @@ def __init__(self): search_plots.generate_confusion_matrix(0) -def test_plot_iterations(X_y): - X, y = X_y - - clf = AutoClassifier(objective="f1", max_pipelines=3) - clf.fit(X, y) - plot = clf.plot.search_iteration_plot() - plot_data = plot.data[0] - x = pd.Series(plot_data['x']) - y = pd.Series(plot_data['y']) +def test_search_iteration_plot_class(X_y): - assert isinstance(plot, go.Figure) - assert x.is_monotonic_increasing - assert y.is_monotonic_increasing - assert len(x) == 3 - assert len(y) == 3 + class MockObjective: + def __init__(self): + self.name = 'Test Objective' + self.greater_is_better = True - X, y = X_y - clf2 = AutoClassifier(objective="f1", max_time=10) - clf2.fit(X, y, show_iteration_plot=False) - plot = clf2.plot.search_iteration_plot() - plot_data = plot.data[0] - x = pd.Series(plot_data['x']) - y = pd.Series(plot_data['y']) - - assert isinstance(plot, go.Figure) - assert x.is_monotonic_increasing - assert y.is_monotonic_increasing - assert len(x) > 0 - assert len(y) > 0 + class MockResults: + def __init__(self): + self.objective = MockObjective() + self.results = { + 'pipeline_results': { + 2: { + 'score': 0.50 + }, + 0: { + 'score': 0.60 + }, + 1: { + 'score': 0.75 + }, + }, + 'search_order': [1, 2, 0] + } + self.rankings = pd.DataFrame({ + 'score': [0.75, 0.60, 0.50] + }) + + mock_data = MockResults() + plot = SearchIterationPlot(mock_data) + + # Check best score trace + plot_data = plot.best_score_by_iter_fig.data[0] + x = list(plot_data['x']) + y = list(plot_data['y']) + + assert isinstance(plot, SearchIterationPlot) + assert x == [0, 1, 2] + assert y == [0.60, 0.75, 0.75] + + # Check current score trace + plot_data = plot.best_score_by_iter_fig.data[1] + x = list(plot_data['x']) + y = list(plot_data['y']) + + assert isinstance(plot, SearchIterationPlot) + assert x == [1, 2, 0] + assert y == [0.75, 0.50, 0.60] From 3eb4736edbb6cd006fd28b21d02b1316a56a8d57 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Thu, 12 Dec 2019 11:12:26 -0500 Subject: [PATCH 25/25] Fixed lint and test errors --- .../tests/automl_tests/test_pipeline_search_plots.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/evalml/tests/automl_tests/test_pipeline_search_plots.py b/evalml/tests/automl_tests/test_pipeline_search_plots.py index d4a57cbb34..6455f695c8 100644 --- a/evalml/tests/automl_tests/test_pipeline_search_plots.py +++ b/evalml/tests/automl_tests/test_pipeline_search_plots.py @@ -5,9 +5,11 @@ import pytest from sklearn.model_selection import StratifiedKFold -from evalml.models import AutoClassifier from evalml.models.auto_base import AutoBase -from evalml.models.pipeline_search_plots import PipelineSearchPlots, SearchIterationPlot +from evalml.models.pipeline_search_plots import ( + PipelineSearchPlots, + SearchIterationPlot +) from evalml.pipelines import LogisticRegressionPipeline from evalml.problem_types import ProblemTypes @@ -215,5 +217,5 @@ def __init__(self): y = list(plot_data['y']) assert isinstance(plot, SearchIterationPlot) - assert x == [1, 2, 0] - assert y == [0.75, 0.50, 0.60] + assert x == [0, 1, 2] + assert y == [0.60, 0.75, 0.50]