Skip to content

Commit

Permalink
Merge #3114
Browse files Browse the repository at this point in the history
3114: Improve waiting for SSH r=townsend2010 a=ricab

Two main changes to improve the step of waiting for SSH, when starting/launching instances:

1. Increase the time allotted for each iteration of the loop to wait for SSH, to 1 second. Log each such iteration of the loop.
2. Retry only on specific exceptions, logging them (with `trace` verbosity, since this happens every second), and letting the rest through, in the hope that this may shed some light into cases where instances end up in unknown state.



Co-authored-by: Ricardo Abreu <ricardo.abreu@canonical.com>
  • Loading branch information
bors[bot] and ricab committed Jun 30, 2023
2 parents c334808 + d86eaf1 commit 50ed575
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 5 deletions.
41 changes: 41 additions & 0 deletions include/multipass/exceptions/internal_timeout_exception.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (C) Canonical, Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 3.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

#ifndef MULTIPASS_INTERNAL_TIMEOUT_EXCEPTION_H
#define MULTIPASS_INTERNAL_TIMEOUT_EXCEPTION_H

#include <chrono>
#include <stdexcept>
#include <string>

#include <multipass/format.h>

namespace multipass
{

class InternalTimeoutException : public std::runtime_error
{
public:
InternalTimeoutException(const std::string& action, std::chrono::milliseconds timeout)
: std::runtime_error{fmt::format("Could not {} within {}ms", action, timeout.count())}
{
}
};

} // namespace multipass

#endif // MULTIPASS_INTERNAL_TIMEOUT_EXCEPTION_H
34 changes: 34 additions & 0 deletions include/multipass/exceptions/ip_unavailable_exception.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (C) Canonical, Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 3.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

#ifndef MULTIPASS_IP_UNAVAILABLE_EXCEPTION_H
#define MULTIPASS_IP_UNAVAILABLE_EXCEPTION_H

#include <stdexcept>
#include <string>

namespace multipass
{

class IPUnavailableException : public std::runtime_error
{
using std::runtime_error::runtime_error;
};

} // namespace multipass

#endif // MULTIPASS_IP_UNAVAILABLE_EXCEPTION_H
5 changes: 3 additions & 2 deletions src/platform/backends/shared/shared_backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#ifndef MULTIPASS_SHARED_BACKEND_UTILS_H
#define MULTIPASS_SHARED_BACKEND_UTILS_H

#include <multipass/exceptions/internal_timeout_exception.h>
#include <multipass/exceptions/start_exception.h>
#include <multipass/utils.h>
#include <multipass/virtual_machine.h>
Expand Down Expand Up @@ -50,9 +51,9 @@ std::string ip_address_for(VirtualMachine* virtual_machine, Callable&& get_ip, s
}
};

auto on_timeout = [virtual_machine] {
auto on_timeout = [virtual_machine, &timeout] {
virtual_machine->state = VirtualMachine::State::unknown;
throw std::runtime_error("failed to determine IP address");
throw InternalTimeoutException{"determine IP address", timeout};
};

utils::try_action_for(on_timeout, timeout, action);
Expand Down
32 changes: 29 additions & 3 deletions src/utils/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <multipass/constants.h>
#include <multipass/exceptions/autostart_setup_exception.h>
#include <multipass/exceptions/exitless_sshprocess_exception.h>
#include <multipass/exceptions/internal_timeout_exception.h>
#include <multipass/exceptions/ip_unavailable_exception.h>
#include <multipass/exceptions/sshfs_missing_error.h>
#include <multipass/file_ops.h>
#include <multipass/format.h>
Expand Down Expand Up @@ -75,6 +77,15 @@ QString find_autostart_target(const QString& subdir, const QString& autostart_fi

return target_path;
}

template <typename ExceptionT>
mp::utils::TimeoutAction log_and_retry(const ExceptionT& e, const mp::VirtualMachine* vm,
mpl::Level log_level = mpl::Level::trace)
{
assert(vm);
mpl::log(log_level, vm->vm_name, e.what());
return mp::utils::TimeoutAction::retry;
};
} // namespace

mp::Utils::Utils(const Singleton<Utils>::PrivatePass& pass) noexcept : Singleton<Utils>::Singleton{pass}
Expand Down Expand Up @@ -312,23 +323,38 @@ bool mp::utils::valid_mac_address(const std::string& mac)
void mp::utils::wait_until_ssh_up(VirtualMachine* virtual_machine, std::chrono::milliseconds timeout,
std::function<void()> const& ensure_vm_is_running)
{
static constexpr auto wait_step = 1s;
mpl::log(mpl::Level::debug, virtual_machine->vm_name, "Waiting for SSH to be up");

auto action = [virtual_machine, &ensure_vm_is_running] {
ensure_vm_is_running();
try
{
mp::SSHSession session{virtual_machine->ssh_hostname(1ms), virtual_machine->ssh_port()};
mp::SSHSession session{virtual_machine->ssh_hostname(wait_step), virtual_machine->ssh_port()};

std::lock_guard<decltype(virtual_machine->state_mutex)> lock{virtual_machine->state_mutex};
virtual_machine->state = VirtualMachine::State::running;
virtual_machine->update_state();
return mp::utils::TimeoutAction::done;
}
catch (const std::exception&)
catch (const InternalTimeoutException& e)
{
return mp::utils::TimeoutAction::retry;
return log_and_retry(e, virtual_machine);
}
catch (const SSHException& e)
{
return log_and_retry(e, virtual_machine);
}
catch (const IPUnavailableException& e)
{
return log_and_retry(e, virtual_machine);
}
catch (const std::runtime_error& e) // transitioning away from catching generic runtime errors
{ // TODO remove once we're confident this is an anomaly
return log_and_retry(e, virtual_machine, mpl::Level::warning);
}
};

auto on_timeout = [virtual_machine] {
std::lock_guard<decltype(virtual_machine->state_mutex)> lock{virtual_machine->state_mutex};
virtual_machine->state = VirtualMachine::State::unknown;
Expand Down

0 comments on commit 50ed575

Please sign in to comment.