Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve describe_pipeline by using pipeline's new describe method #190

Merged
merged 25 commits into from
Nov 13, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f4738db
moving describe things
angela97lin Nov 6, 2019
dda82e4
changelog~
angela97lin Nov 6, 2019
1cc2fd7
linting
angela97lin Nov 6, 2019
0024fec
updating via comments
angela97lin Nov 6, 2019
2f0dab4
Merge branch 'master' into improve_describe
angela97lin Nov 6, 2019
ef99665
Merge branch 'master' into improve_describe
angela97lin Nov 7, 2019
a4d4b76
remove dict, add to enum
angela97lin Nov 7, 2019
0a7385b
reusing dict in str fcn
angela97lin Nov 8, 2019
3cf720f
Merge branch 'master' into improve_describe
angela97lin Nov 8, 2019
3beeb16
addressing comments on describe
angela97lin Nov 8, 2019
882f065
Merge branch 'improve_describe' of github.com:FeatureLabs/evalml into…
angela97lin Nov 8, 2019
b81d793
updating notebooks
angela97lin Nov 8, 2019
6e83c4f
removing newline
angela97lin Nov 11, 2019
7752fa3
updating to fix warning
angela97lin Nov 11, 2019
5de3c30
updating changelog
angela97lin Nov 11, 2019
cb8fd96
moving typing from transformer to simpleimputer
angela97lin Nov 11, 2019
19072b3
linting
angela97lin Nov 11, 2019
97c1e5c
lint
angela97lin Nov 11, 2019
8c816e7
remove dstr
angela97lin Nov 11, 2019
16b4c86
Merge branch 'master' into improve_describe
angela97lin Nov 11, 2019
4ec77e8
linting
angela97lin Nov 11, 2019
ee36e43
Merge branch 'master' into improve_describe
angela97lin Nov 11, 2019
d1fb91e
adding num features, removing try/except, refreshing notebooks
angela97lin Nov 12, 2019
1dda3c5
Merge branch 'master' into improve_describe
angela97lin Nov 12, 2019
dffa463
deleting unused notebook
angela97lin Nov 12, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Changelog
* Fixes
* Changes
* Refactoring pipelines :pr:`108`
* Pulling information for describe_pipeline from pipeline's new describe method :pr:`190`
* Documentation Changes
* Testing Changes

Expand Down
19 changes: 4 additions & 15 deletions evalml/models/auto_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,22 +309,11 @@ def describe_pipeline(self, pipeline_id, return_dict=False):
pipeline = self.get_pipeline(pipeline_id)
pipeline_results = self.results[pipeline_id]

self.logger.log_title("Pipeline Description")
pipeline.describe()
self.logger.log_subtitle("\nTraining")

better_string = "lower is better"
if pipeline.objective.greater_is_better:
better_string = "greater is better"

self.logger.log("Pipeline Name: %s" % pipeline.name)
self.logger.log("Model type: %s" % pipeline.model_type)
self.logger.log("Objective: %s (%s)" % (pipeline.objective.name, better_string))
self.logger.log("Total training time (including CV): %.1f seconds\n" % pipeline_results["training_time"])

self.logger.log_subtitle("Parameters")
for item in pipeline_results["parameters"].items():
self.logger.log("• %s: %s" % item)

self.logger.log_subtitle("\nCross Validation")
self.logger.log("Total training time (including CV): %.1f seconds" % pipeline_results["training_time"])
self.logger.log_subtitle("\nCross Validation", underline="-")

if pipeline_results["high_variance_cv"]:
self.logger.log("Warning! High variance within cross validation scores. " +
Expand Down
30 changes: 21 additions & 9 deletions evalml/pipelines/pipeline_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,21 @@

from .components import Estimator, handle_component

from evalml.model_types import ModelTypes
from evalml.objectives import get_objective
from evalml.problem_types import ProblemTypes
from evalml.utils import Logger


class PipelineBase:

model_type_dict = {ModelTypes.RANDOM_FOREST: "Random Forest",
angela97lin marked this conversation as resolved.
Show resolved Hide resolved
ModelTypes.XGBOOST: "XGBoost Classifier",
ModelTypes.LINEAR_MODEL: "Linear Model"}
problem_type_dict = {ProblemTypes.BINARY: "Binary Classification",
ProblemTypes.MULTICLASS: "Multiclass Classification",
ProblemTypes.REGRESSION: "Regression"}

def __init__(self, objective, component_list, n_jobs=-1, random_state=0):
"""Machine learning pipeline made out of transformers and a estimator.

Expand Down Expand Up @@ -76,24 +86,26 @@ def get_component(self, name):
return next((component for component in self.component_list if component.name == name), None)

def describe(self, return_dict=False):
"""Outputs pipeline details including component parameters and cross validation information

Returns:
"""Outputs pipeline details including component parameters

None
Arguments:
return_dict (bool): If True, return dictionary of information
about pipeline. Defaults to false

Returns:
dictionary of all component parameters if return_dict is True, else None
"""
title = "Pipeline: " + self.name
self.logger.log_title(title)

self.logger.log_title(self.name)
self.logger.log("Problem Types: {}".format(', '.join([self.problem_type_dict[problem_type] for problem_type in self.problem_types])))
kmax12 marked this conversation as resolved.
Show resolved Hide resolved
self.logger.log("Model Type: {}".format(self.model_type_dict[self.model_type]))
better_string = "lower is better"
if self.objective.greater_is_better:
better_string = "greater is better"
objective_string = "Objective: {} ({})".format(self.objective.name, better_string)
objective_string = "Objective to Optimize: {} ({})".format(self.objective.name, better_string)
self.logger.log(objective_string)

# Summary of steps
self.logger.log_subtitle("Pipeline Steps")
self.logger.log_subtitle("\nPipeline Steps")
for number, component in enumerate(self.component_list, 1):
component_string = str(number) + ". " + component.name
self.logger.log(component_string)
Expand Down