Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Show interactive iteration vs. score plot when using fit() #134

Merged
merged 38 commits into from Dec 12, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
1933122
Create static plot of iterations during fit()
christopherbunn Oct 16, 2019
f4ce551
Changed to interactive iter vs score plot
christopherbunn Oct 16, 2019
e0afe11
Changed to show best score in iteration
christopherbunn Oct 18, 2019
eeec53d
Support for metrics where lower is better
christopherbunn Oct 21, 2019
8d83385
Merge branch 'master' of github.com:FeatureLabs/evalml into perf-by-i…
christopherbunn Oct 21, 2019
19e640d
Separated plotting and calculating into two separate functions
christopherbunn Oct 21, 2019
c231be7
Fixed blank figures bug
christopherbunn Oct 22, 2019
c06e56e
Created test for plotting iterations
christopherbunn Oct 22, 2019
2b5d308
Increased pipeline number in test
christopherbunn Oct 22, 2019
2b1e8df
Merge branch 'master' of github.com:FeatureLabs/evalml into perf-by-i…
christopherbunn Oct 22, 2019
e351f79
Merge branch 'master' into perf-by-iter-plot
christopherbunn Oct 24, 2019
72c34ab
Merge branch 'master' of github.com:FeatureLabs/evalml into perf-by-i…
christopherbunn Oct 28, 2019
a85048e
Updated changelog
christopherbunn Oct 28, 2019
70e5b86
Merge branch 'master' of github.com:FeatureLabs/evalml into perf-by-i…
christopherbunn Nov 11, 2019
c64e3a8
Moved plotting functionality to plotly
christopherbunn Nov 12, 2019
dfeb1bc
Merge branch 'master' of github.com:FeatureLabs/evalml into perf-by-i…
christopherbunn Nov 12, 2019
195732b
Fixed linting and test errors
christopherbunn Nov 12, 2019
d577d0b
Merge github.com:FeatureLabs/evalml into perf-by-iter-plot
christopherbunn Nov 12, 2019
12c2ca3
Created plot with only max_time
christopherbunn Nov 13, 2019
68ac474
Merge branch 'master' of github.com:FeatureLabs/evalml into perf-by-i…
christopherbunn Dec 6, 2019
2d088d1
Merge branches 'perf-by-iter-plot' and 'master' of github.com:Feature…
christopherbunn Dec 6, 2019
4426656
Moved iteration plotting to pipeline search plots
christopherbunn Dec 6, 2019
61379c7
Fixed lint and test issues
christopherbunn Dec 6, 2019
2af8700
Merge branch 'master' into perf-by-iter-plot
christopherbunn Dec 6, 2019
3302116
Fixed other lint error
christopherbunn Dec 6, 2019
75f1b11
Introduced new SearchIterationPlot obj
christopherbunn Dec 9, 2019
15b054a
Updated plot axis to show only int values
christopherbunn Dec 9, 2019
2bb414f
Added gray dot to indicate current iteration
christopherbunn Dec 9, 2019
38dc29d
Updated test to check monotonic
christopherbunn Dec 9, 2019
e33fcfa
Added trace for current iter and restructured plot in AutoBase
christopherbunn Dec 9, 2019
7c314ca
Merge branch 'master' into perf-by-iter-plot
christopherbunn Dec 9, 2019
d8163c1
Merge branch 'master' into perf-by-iter-plot
christopherbunn Dec 10, 2019
b04827f
Moved plotting out of _do_iteration
christopherbunn Dec 10, 2019
25da4a0
Merge branch 'master' of github.com:FeatureLabs/evalml into perf-by-i…
christopherbunn Dec 10, 2019
3240805
Changed update to use curr_score info from results['search_order']
christopherbunn Dec 10, 2019
ee3bd45
Fixed iter score appearance bug
christopherbunn Dec 11, 2019
7f32c49
Overhauled plot object behavior to reload every iteration
christopherbunn Dec 12, 2019
3eb4736
Fixed lint and test errors
christopherbunn Dec 12, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 5 additions & 6 deletions evalml/models/auto_base.py
Expand Up @@ -150,7 +150,8 @@ def fit(self, X, y, feature_types=None, raise_errors=False, show_iteration_plot=
pbar._instances.clear()
start = time.time()
while time.time() - start <= self.max_time:
self._do_iteration(X, y, pbar, raise_errors, plot)
self._do_iteration(X, y, pbar, raise_errors)
plot.update()
pbar.close()
else:
pbar = tqdm(range(self.max_pipelines), disable=not self.verbose, file=stdout, bar_format='{desc} {percentage:3.0f}%|{bar}| Elapsed:{elapsed}')
Expand All @@ -162,7 +163,8 @@ def fit(self, X, y, feature_types=None, raise_errors=False, show_iteration_plot=
pbar.close()
self.logger.log("\n\nMax time elapsed. Stopping search early.")
break
self._do_iteration(X, y, pbar, raise_errors, plot)
self._do_iteration(X, y, pbar, raise_errors)
plot.update()
pbar.close()

self.logger.log("\n✔ Optimization finished")
Expand All @@ -176,7 +178,7 @@ def _check_multiclass(self, y):
if ProblemTypes.MULTICLASS not in obj.problem_types:
raise ValueError("Additional objective {} is not compatible with a multiclass problem.".format(obj.name))

def _do_iteration(self, X, y, pbar, raise_errors, plot):
def _do_iteration(self, X, y, pbar, raise_errors):
# determine which pipeline to build
pipeline_class = self._select_pipeline()

Expand Down Expand Up @@ -237,9 +239,6 @@ def _do_iteration(self, X, y, pbar, raise_errors, plot):
training_time=training_time,
cv_data=cv_data)

# Update plot with new score
plot.update()

desc = "✔" + desc[1:]
pbar.set_description_str(desc=desc, refresh=True)
if self.verbose: # To force new line between progress bar iterations
Expand Down
10 changes: 5 additions & 5 deletions evalml/models/pipeline_search_plots.py
Expand Up @@ -23,7 +23,6 @@ def __init__(self, data, show_plot=True):
'title': title,
'xaxis': {
'title': 'Iteration',
'tickformat': ',d',
'rangemode': 'tozero'
},
'yaxis': {
Expand All @@ -34,8 +33,9 @@ def __init__(self, data, show_plot=True):
self.best_score_by_iter_fig.update_layout(showlegend=False)

def update(self):
iter_idx = self.data.rankings['id'].idxmax()
self.curr_iteration_scores.append(self.data.rankings['score'].iloc[iter_idx])
iter_idx = self.data.results['search_order']
pipeline_res = self.data.results['pipeline_results']
iter_scores = [pipeline_res[i]['score'] for i in range(len(pipeline_res))]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to make sure these score in the same order as iter_idx, should you do

iter_scores = [pipeline_res[i]['score'] for i in iter_idx]


if self.data.objective.greater_is_better:
iter_max_score = self.data.rankings['score'].max()
Expand All @@ -45,8 +45,8 @@ def update(self):

# Update entire line plot
curr_score_trace = self.best_score_by_iter_fig.data[1]
curr_score_trace.x = list(range(len(self.curr_iteration_scores)))
curr_score_trace.y = self.curr_iteration_scores
curr_score_trace.x = iter_idx
curr_score_trace.y = iter_scores

best_score_trace = self.best_score_by_iter_fig.data[0]
best_score_trace.x = list(range(len(self.best_iteration_scores)))
Expand Down