Skip to content

Commit

Permalink
AutoML aggregate exception (#5631)
Browse files Browse the repository at this point in the history
* added check for aggregate exception

* Update src/Microsoft.ML.AutoML/Experiment/Experiment.cs

Co-authored-by: Eric Erhardt <eric.erhardt@microsoft.com>

* Update src/Microsoft.ML.AutoML/Experiment/Experiment.cs

Co-authored-by: Eric Erhardt <eric.erhardt@microsoft.com>

* pulled message out to private variable so its not duplicated

* Update src/Microsoft.ML.AutoML/Experiment/Experiment.cs

Co-authored-by: Justin Ormont <justinormont@users.noreply.github.com>

Co-authored-by: Eric Erhardt <eric.erhardt@microsoft.com>
Co-authored-by: Justin Ormont <justinormont@users.noreply.github.com>
  • Loading branch information
3 people committed Mar 2, 2021
1 parent cd7c46e commit f93fa09
Showing 1 changed file with 20 additions and 2 deletions.
22 changes: 20 additions & 2 deletions src/Microsoft.ML.AutoML/Experiment/Experiment.cs
Expand Up @@ -27,6 +27,10 @@ internal class Experiment<TRunDetail, TMetrics> where TRunDetail : RunDetail
private readonly IRunner<TRunDetail> _runner;
private readonly IList<SuggestedPipelineRunDetail> _history;
private readonly IChannel _logger;

private readonly string _operationCancelledMessage = "OperationCanceledException has been caught after maximum experiment time" +
"was reached, and the running MLContext was stopped. Details: {0}";

private Timer _maxExperimentTimeTimer;
private Timer _mainContextCanceledTimer;
private bool _experimentTimerExpired;
Expand Down Expand Up @@ -192,10 +196,24 @@ public IList<TRunDetail> Execute()
// This exception is thrown when the IHost/MLContext of the trainer is canceled due to
// reaching maximum experiment time. Simply catch this exception and return finished
// iteration results.
_logger.Warning("OperationCanceledException has been caught after maximum experiment time" +
"was reached, and the running MLContext was stopped. Details: {0}", e.Message);
_logger.Warning(_operationCancelledMessage, e.Message);
return iterationResults;
}
catch (AggregateException e)
{
// This exception is thrown when the IHost/MLContext of the trainer is canceled due to
// reaching maximum experiment time. Simply catch this exception and return finished
// iteration results. For some trainers, like FastTree, because training is done in parallel
// in can throw multiple OperationCancelledExceptions. This causes them to be returned as an
// AggregateException and misses the first catch block. This is to handle that case.
if (e.InnerExceptions.All(exception => exception is OperationCanceledException))
{
_logger.Warning(_operationCancelledMessage, e.Message);
return iterationResults;
}

throw;
}
} while (_history.Count < _experimentSettings.MaxModels &&
!_experimentSettings.CancellationToken.IsCancellationRequested &&
!_experimentTimerExpired);
Expand Down

0 comments on commit f93fa09

Please sign in to comment.