Skip to content

Commit

Permalink
server: event for HA vm start (#9202)
Browse files Browse the repository at this point in the history
  • Loading branch information
shwstppr committed Jun 26, 2024
1 parent 7a8066d commit b22315d
Show file tree
Hide file tree
Showing 13 changed files with 172 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,22 @@
package com.cloud.network;

import java.util.List;
import java.util.Map;

import org.apache.cloudstack.api.command.admin.router.UpgradeRouterCmd;
import org.apache.cloudstack.api.command.admin.router.UpgradeRouterTemplateCmd;

import com.cloud.deploy.DeploymentPlanner;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.OperationTimedoutException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.network.router.VirtualRouter;
import com.cloud.user.Account;
import com.cloud.utils.Pair;
import com.cloud.vm.Nic;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachineProfile;

public interface VirtualNetworkApplianceService {
/**
Expand Down Expand Up @@ -62,6 +67,10 @@ public interface VirtualNetworkApplianceService {

VirtualRouter startRouter(long id) throws ResourceUnavailableException, InsufficientCapacityException, ConcurrentOperationException;

void startRouterForHA(VirtualMachine vm, Map<VirtualMachineProfile.Param, Object> params, DeploymentPlanner planner)
throws InsufficientCapacityException, ResourceUnavailableException, ConcurrentOperationException,
OperationTimedoutException;

VirtualRouter destroyRouter(long routerId, Account caller, Long callerUserId) throws ResourceUnavailableException, ConcurrentOperationException;

VirtualRouter findRouter(long routerId);
Expand Down
5 changes: 5 additions & 0 deletions api/src/main/java/com/cloud/vm/UserVmService.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.cloudstack.api.command.user.vmgroup.DeleteVMGroupCmd;

import com.cloud.dc.DataCenter;
import com.cloud.deploy.DeploymentPlanner;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.ManagementServerException;
Expand Down Expand Up @@ -113,6 +114,10 @@ UserVm startVirtualMachine(StartVMCmd cmd) throws StorageUnavailableException, E

void startVirtualMachine(UserVm vm) throws OperationTimedoutException, ResourceUnavailableException, InsufficientCapacityException;

void startVirtualMachineForHA(VirtualMachine vm, Map<VirtualMachineProfile.Param, Object> params,
DeploymentPlanner planner) throws InsufficientCapacityException, ResourceUnavailableException,
ConcurrentOperationException, OperationTimedoutException;

UserVm rebootVirtualMachine(RebootVMCmd cmd) throws InsufficientCapacityException, ResourceUnavailableException;

UserVm updateVirtualMachine(UpdateVMCmd cmd) throws ResourceUnavailableException, InsufficientCapacityException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4802,6 +4802,18 @@ protected void HandlePowerStateReport(final String subject, final String senderA
}
}

private ApiCommandResourceType getApiCommandResourceTypeForVm(VirtualMachine vm) {
switch (vm.getType()) {
case DomainRouter:
return ApiCommandResourceType.DomainRouter;
case ConsoleProxy:
return ApiCommandResourceType.ConsoleProxy;
case SecondaryStorageVm:
return ApiCommandResourceType.SystemVm;
}
return ApiCommandResourceType.VirtualMachine;
}

private void handlePowerOnReportWithNoPendingJobsOnVM(final VMInstanceVO vm) {
Host host = _hostDao.findById(vm.getHostId());
Host poweredHost = _hostDao.findById(vm.getPowerHostId());
Expand Down Expand Up @@ -4849,7 +4861,7 @@ private void handlePowerOnReportWithNoPendingJobsOnVM(final VMInstanceVO vm) {
+ " -> Running) from out-of-context transition. VM network environment may need to be reset");

ActionEventUtils.onActionEvent(User.UID_SYSTEM, Account.ACCOUNT_ID_SYSTEM, vm.getDomainId(),
EventTypes.EVENT_VM_START, "Out of band VM power on", vm.getId(), ApiCommandResourceType.VirtualMachine.toString());
EventTypes.EVENT_VM_START, "Out of band VM power on", vm.getId(), getApiCommandResourceTypeForVm(vm).toString());
s_logger.info("VM " + vm.getInstanceName() + " is sync-ed to at Running state according to power-on report from hypervisor");
break;

Expand Down Expand Up @@ -4884,7 +4896,7 @@ private void handlePowerOffReportWithNoPendingJobsOnVM(final VMInstanceVO vm) {
case Running:
case Stopped:
ActionEventUtils.onActionEvent(User.UID_SYSTEM, Account.ACCOUNT_ID_SYSTEM,vm.getDomainId(),
EventTypes.EVENT_VM_STOP, "Out of band VM power off", vm.getId(), ApiCommandResourceType.VirtualMachine.toString());
EventTypes.EVENT_VM_STOP, "Out of band VM power off", vm.getId(), getApiCommandResourceTypeForVm(vm).toString());
case Migrating:
if (s_logger.isInfoEnabled()) {
s_logger.info(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@
import javax.naming.ConfigurationException;

import org.apache.cloudstack.consoleproxy.ConsoleAccessManager;
import org.apache.log4j.Logger;

import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.cloudstack.framework.security.keys.KeysManager;
import org.apache.cloudstack.framework.security.keystore.KeystoreManager;
import org.apache.log4j.Logger;

import com.cloud.agent.AgentManager;
import com.cloud.agent.api.GetVncPortAnswer;
import com.cloud.agent.api.GetVncPortCommand;
import com.cloud.agent.api.StartupProxyCommand;
import com.cloud.deploy.DeploymentPlanner;
import com.cloud.host.HostVO;
import com.cloud.host.dao.HostDao;
import com.cloud.info.ConsoleProxyInfo;
Expand All @@ -41,7 +41,9 @@
import com.cloud.vm.ConsoleProxyVO;
import com.cloud.vm.UserVmVO;
import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachineManager;
import com.cloud.vm.VirtualMachineProfile;
import com.cloud.vm.dao.ConsoleProxyDao;
import com.cloud.vm.dao.UserVmDao;
import com.cloud.vm.dao.VMInstanceDao;
Expand Down Expand Up @@ -182,6 +184,11 @@ public ConsoleProxyVO startProxy(long proxyVmId, boolean ignoreRestartSetting) {
return null;
}

@Override
public void startProxyForHA(VirtualMachine vm, Map<VirtualMachineProfile.Param, Object> params,
DeploymentPlanner planner) {
}

@Override
public boolean destroyProxy(long proxyVmId) {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,20 @@
// under the License.
package com.cloud.consoleproxy;

import com.cloud.utils.component.Manager;
import com.cloud.vm.ConsoleProxyVO;
import java.util.Map;

import org.apache.cloudstack.framework.config.ConfigKey;

import com.cloud.deploy.DeploymentPlanner;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.OperationTimedoutException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.utils.component.Manager;
import com.cloud.vm.ConsoleProxyVO;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachineProfile;

public interface ConsoleProxyManager extends Manager, ConsoleProxyService {

int DEFAULT_PROXY_CAPACITY = 50;
Expand Down Expand Up @@ -53,6 +62,10 @@ public interface ConsoleProxyManager extends Manager, ConsoleProxyService {

ConsoleProxyVO startProxy(long proxyVmId, boolean ignoreRestartSetting);

void startProxyForHA(VirtualMachine vm, Map<VirtualMachineProfile.Param, Object> params, DeploymentPlanner planner)
throws InsufficientCapacityException, ResourceUnavailableException, ConcurrentOperationException,
OperationTimedoutException;

boolean stopProxy(long proxyVmId);

boolean rebootProxy(long proxyVmId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@
import com.cloud.dc.dao.HostPodDao;
import com.cloud.deploy.DataCenterDeployment;
import com.cloud.deploy.DeployDestination;
import com.cloud.deploy.DeploymentPlanner;
import com.cloud.event.ActionEvent;
import com.cloud.event.EventTypes;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientAddressCapacityException;
import com.cloud.exception.InsufficientCapacityException;
Expand Down Expand Up @@ -493,6 +496,14 @@ public ConsoleProxyVO startProxy(long proxyVmId, boolean ignoreRestartSetting) {
return null;
}

@Override
@ActionEvent(eventType = EventTypes.EVENT_PROXY_START, eventDescription = "restarting console proxy VM for HA", async = true)
public void startProxyForHA(VirtualMachine vm, Map<VirtualMachineProfile.Param, Object> params,
DeploymentPlanner planner) throws InsufficientCapacityException, ResourceUnavailableException,
ConcurrentOperationException, OperationTimedoutException {
virtualMachineManager.advanceStart(vm.getUuid(), params, planner);
}

public ConsoleProxyVO assignProxyFromRunningPool(long dataCenterId) {

if (s_logger.isDebugEnabled()) {
Expand Down
50 changes: 44 additions & 6 deletions server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import javax.inject.Inject;
import javax.naming.ConfigurationException;

import org.apache.cloudstack.api.ApiCommandResourceType;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.engine.orchestration.service.VolumeOrchestrationService;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreDriver;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreProvider;
Expand Down Expand Up @@ -66,13 +68,14 @@
import com.cloud.host.Status;
import com.cloud.host.dao.HostDao;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.network.VpcVirtualNetworkApplianceService;
import com.cloud.resource.ResourceManager;
import com.cloud.server.ManagementServer;
import com.cloud.service.ServiceOfferingVO;
import com.cloud.service.dao.ServiceOfferingDao;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.storage.StorageManager;
import com.cloud.storage.VolumeVO;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.storage.dao.GuestOSCategoryDao;
import com.cloud.storage.dao.GuestOSDao;
import com.cloud.storage.dao.VolumeDao;
Expand All @@ -81,6 +84,7 @@
import com.cloud.utils.component.ManagerBase;
import com.cloud.utils.concurrency.NamedThreadFactory;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.vm.UserVmManager;
import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachineManager;
Expand Down Expand Up @@ -144,6 +148,10 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur
VolumeDao volumeDao;
@Inject
DataStoreProviderManager dataStoreProviderMgr;
@Inject
VpcVirtualNetworkApplianceService routerService;
@Inject
UserVmManager userVmManager;

long _serverId;

Expand Down Expand Up @@ -437,6 +445,36 @@ public void scheduleRestart(VMInstanceVO vm, boolean investigate) {

}

private void startVm(VirtualMachine vm, Map<VirtualMachineProfile.Param, Object> params,
DeploymentPlanner planner) throws InsufficientCapacityException, ResourceUnavailableException,
ConcurrentOperationException, OperationTimedoutException {
CallContext ctx = CallContext.register(CallContext.current(), ApiCommandResourceType.VirtualMachine);
ctx.setEventResourceId(vm.getId());
try {
switch (vm.getType()) {
case DomainRouter:
ctx.setEventResourceType(ApiCommandResourceType.DomainRouter);
routerService.startRouterForHA(vm, params, planner);
break;
case ConsoleProxy:
ctx.setEventResourceType(ApiCommandResourceType.ConsoleProxy);
consoleProxyManager.startProxyForHA(vm, params, planner);
break;
case SecondaryStorageVm:
ctx.setEventResourceType(ApiCommandResourceType.SystemVm);
secondaryStorageVmManager.startSecStorageVmForHA(vm, params, planner);
break;
case User:
userVmManager.startVirtualMachineForHA(vm, params, planner);
break;
default:
_itMgr.advanceStart(vm.getUuid(), params, planner);
}
} finally {
CallContext.unregister();
}
}

protected Long restart(final HaWorkVO work) {
s_logger.debug("RESTART with HAWORK");
List<HaWorkVO> items = _haDao.listFutureHaWorkForVm(work.getInstanceId(), work.getId());
Expand Down Expand Up @@ -628,10 +666,10 @@ protected Long restart(final HaWorkVO work) {
}
}
// First try starting the vm with its original planner, if it doesn't succeed send HAPlanner as its an emergency.
_itMgr.advanceStart(vm.getUuid(), params, null);
}catch (InsufficientCapacityException e){
startVm(vm, params, null);
} catch (InsufficientCapacityException e){
s_logger.warn("Failed to deploy vm " + vmId + " with original planner, sending HAPlanner");
_itMgr.advanceStart(vm.getUuid(), params, _haPlanners.get(0));
startVm(vm, params, _haPlanners.get(0));
}

VMInstanceVO started = _instanceDao.findById(vm.getId());
Expand All @@ -653,15 +691,15 @@ protected Long restart(final HaWorkVO work) {
} catch (final ResourceUnavailableException e) {
s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
_alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " +
hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
hostDesc, "The resource is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
} catch (ConcurrentOperationException e) {
s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
_alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " +
hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
} catch (OperationTimedoutException e) {
s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
_alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " +
hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
hostDesc, "The operation timed out while trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
}
vm = _itMgr.findById(vm.getId());
work.setUpdateTime(vm.getUpdated());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@
import com.cloud.dc.dao.HostPodDao;
import com.cloud.dc.dao.VlanDao;
import com.cloud.deploy.DeployDestination;
import com.cloud.deploy.DeploymentPlanner;
import com.cloud.domain.Domain;
import com.cloud.event.ActionEvent;
import com.cloud.event.ActionEventUtils;
Expand Down Expand Up @@ -3010,6 +3011,14 @@ public VirtualRouter startRouter(final long routerId, final boolean reprogramNet
return virtualRouter;
}

@Override
@ActionEvent(eventType = EventTypes.EVENT_ROUTER_START, eventDescription = "restarting router VM for HA", async = true)
public void startRouterForHA(VirtualMachine vm, Map<Param, Object> params, DeploymentPlanner planner)
throws InsufficientCapacityException, ResourceUnavailableException, ConcurrentOperationException,
OperationTimedoutException {
_itMgr.advanceStart(vm.getUuid(), params, planner);
}

@Override
public List<VirtualRouter> getRoutersForNetwork(final long networkId) {
final List<DomainRouterVO> routers = _routerDao.findByNetwork(networkId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,24 @@
package com.cloud.storage.secondary;

import java.util.List;
import java.util.Map;

import org.apache.cloudstack.framework.config.ConfigKey;

import com.cloud.agent.api.Command;
import com.cloud.agent.api.StartupCommand;
import com.cloud.deploy.DeploymentPlanner;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.OperationTimedoutException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.host.HostVO;
import com.cloud.utils.Pair;
import com.cloud.utils.component.Manager;
import com.cloud.vm.SecondaryStorageVm;
import com.cloud.vm.SecondaryStorageVmVO;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachineProfile;

public interface SecondaryStorageVmManager extends Manager {

Expand All @@ -47,6 +55,10 @@ public interface SecondaryStorageVmManager extends Manager {

public SecondaryStorageVmVO startSecStorageVm(long ssVmVmId);

void startSecStorageVmForHA(VirtualMachine vm, Map<VirtualMachineProfile.Param, Object> params,
DeploymentPlanner planner) throws InsufficientCapacityException, ResourceUnavailableException,
ConcurrentOperationException, OperationTimedoutException;

public boolean stopSecStorageVm(long ssVmVmId);

public boolean rebootSecStorageVm(long ssVmVmId);
Expand Down
8 changes: 8 additions & 0 deletions server/src/main/java/com/cloud/vm/UserVmManagerImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -3230,6 +3230,14 @@ public void startVirtualMachine(UserVm vm) throws OperationTimedoutException, Re
_itMgr.advanceStart(vm.getUuid(), null, null);
}

@Override
@ActionEvent(eventType = EventTypes.EVENT_VM_START, eventDescription = "restarting VM for HA", async = true)
public void startVirtualMachineForHA(VirtualMachine vm, Map<VirtualMachineProfile.Param, Object> params,
DeploymentPlanner planner) throws InsufficientCapacityException, ResourceUnavailableException,
ConcurrentOperationException, OperationTimedoutException {
_itMgr.advanceStart(vm.getUuid(), params, planner);
}

@Override
@ActionEvent(eventType = EventTypes.EVENT_VM_REBOOT, eventDescription = "rebooting Vm", async = true)
public UserVm rebootVirtualMachine(RebootVMCmd cmd) throws InsufficientCapacityException, ResourceUnavailableException {
Expand Down
Loading

0 comments on commit b22315d

Please sign in to comment.