Skip to content

Commit

Permalink
fix: provide better message when failed to fetch resource pool detail…
Browse files Browse the repository at this point in the history
…s. (determined-ai#398)

* fix: improve error messages when launcher service failed.

* fix: provide better message when failed to fetch resource pool details.
  • Loading branch information
CanmingCobble authored and determined-ci committed Sep 7, 2022
1 parent a4634df commit cb933e5
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions master/internal/rm/dispatcher_resource_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -968,7 +968,10 @@ func (m *dispatcherResourceManager) fetchHpcResourceDetails(ctx *actor.Context)
Impersonate(impersonatedUser).
Execute()
if err != nil {
ctx.Log().Errorf("Failed to launch Manifest.\n%v\n%v", response, err)
ctx.Log().Errorf("Failed to communicate with launcher due to error: "+
"{%v}, response: {%v}. Verify that the launcher service is up and reachable."+
" Try a restart the launcher service followed by a restart of the "+
"determined-master service. ", err, response)
return
}
ctx.Log().Debug(fmt.Sprintf("Launched Manifest with DispatchID %s", dispatchInfo.GetDispatchId()))
Expand Down Expand Up @@ -1187,7 +1190,9 @@ func (m *dispatcherResourceManager) sendManifestToDispatcher(
// So we can show the HTTP status code, if available.
httpStatus = fmt.Sprintf("(HTTP status %d)", response.StatusCode)
}
return "", errors.Wrapf(err, "LaunchApi.LaunchAsync() returned an error %s", httpStatus)
return "", errors.Wrapf(err, "LaunchApi.LaunchAsync() returned an error %s. "+
"Verify that the launcher service is up and reachable. Try a restart the "+
"launcher service followed by a restart of the determined-master service.", httpStatus)
}
return dispatchInfo.GetDispatchId(), nil
}
Expand Down

0 comments on commit cb933e5

Please sign in to comment.