Skip to content

Commit

Permalink
only update powerstate is sure it is the latest
Browse files Browse the repository at this point in the history
  • Loading branch information
DaanHoogland committed Dec 5, 2019
1 parent 07d64cb commit ee718bc
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 27 deletions.
Expand Up @@ -1242,7 +1242,7 @@ protected void processRequest(final Link link, final Request request) {
if (s_logger.isDebugEnabled()) {
if (cmd instanceof PingRoutingCommand) {
logD = false;
s_logger.debug("Ping from " + hostId + "(" + hostName + ")");
s_logger.debug("Ping from Routing host " + hostId + "(" + hostName + ")");
s_logger.trace("SeqA " + hostId + "-" + request.getSequence() + ": Processing " + request);
} else if (cmd instanceof PingCommand) {
logD = false;
Expand Down
Expand Up @@ -4212,7 +4212,8 @@ protected void HandlePowerStateReport(final String subject, final String senderA
break;

case PowerOff:
case PowerReportMissing:
case PowerReportMissing: // rigorously set to Migrating? or just do nothing until...? or create a missing state?
// for now handle in line with legacy as power off
handlePowerOffReportWithNoPendingJobsOnVM(vm);
break;

Expand Down Expand Up @@ -4323,8 +4324,15 @@ private void handlePowerOffReportWithNoPendingJobsOnVM(final VMInstanceVO vm) {
case Running:
case Stopped:
case Migrating:
s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-off report while there is no pending jobs on it");
if(vm.isHaEnabled() && vm.getState() == State.Running && HaVmRestartHostUp.value() && vm.getHypervisorType() != HypervisorType.VMware && vm.getHypervisorType() != HypervisorType.Hyperv) {
if (s_logger.isInfoEnabled()) {
s_logger.info(
String.format("VM %s is at %s and we received a %s report while there is no pending jobs on it"
, vm.getInstanceName(), vm.getState(), vm.getPowerState()));
}
if(vm.isHaEnabled() && vm.getState() == State.Running
&& HaVmRestartHostUp.value()
&& vm.getHypervisorType() != HypervisorType.VMware
&& vm.getHypervisorType() != HypervisorType.Hyperv) {
s_logger.info("Detected out-of-band stop of a HA enabled VM " + vm.getInstanceName() + ", will schedule restart");
if(!_haMgr.hasPendingHaWork(vm.getId())) {
_haMgr.scheduleRestart(vm, true);
Expand All @@ -4334,11 +4342,14 @@ private void handlePowerOffReportWithNoPendingJobsOnVM(final VMInstanceVO vm) {
return;
}

final VirtualMachineGuru vmGuru = getVmGuru(vm);
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
if (!sendStop(vmGuru, profile, true, true)) {
// In case StopCommand fails, don't proceed further
return;
// not when report is missing
if(PowerState.PowerOff.equals(vm.getPowerState())) {
final VirtualMachineGuru vmGuru = getVmGuru(vm);
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
if (!sendStop(vmGuru, profile, true, true)) {
// In case StopCommand fails, don't proceed further
return;
}
}

try {
Expand Down
Expand Up @@ -38,7 +38,6 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat

@Inject MessageBus _messageBus;
@Inject VMInstanceDao _instanceDao;
@Inject VirtualMachineManager _vmMgr;
@Inject ManagementServiceConfiguration mgmtServiceConf;

public VirtualMachinePowerStateSyncImpl() {
Expand Down Expand Up @@ -69,25 +68,30 @@ public void processHostVmStatePingReport(long hostId, Map<String, HostVmStateRep

private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> translatedInfo) {

if (s_logger.isDebugEnabled())
if (s_logger.isDebugEnabled()) {
s_logger.debug("Process VM state report. host: " + hostId + ", number of records in report: " + translatedInfo.size());
}

for (Map.Entry<Long, VirtualMachine.PowerState> entry : translatedInfo.entrySet()) {

if (s_logger.isDebugEnabled())
s_logger.debug("VM state report. host: " + hostId + ", vm id: " + entry.getKey() + ", power state: " + entry.getValue());

if (_instanceDao.updatePowerState(entry.getKey(), hostId, entry.getValue())) {
if (s_logger.isDebugEnabled())
s_logger.debug("VM state report is updated. host: " + hostId + ", vm id: " + entry.getKey() + ", power state: " + entry.getValue());
if (_instanceDao.updatePowerState(entry.getKey(), hostId, entry.getValue(), DateUtil.currentGMTTime())) {
if (s_logger.isInfoEnabled()) {
s_logger.info("VM state report is updated. host: " + hostId + ", vm id: " + entry.getKey() + ", power state: " + entry.getValue());
}

_messageBus.publish(null, VirtualMachineManager.Topics.VM_POWER_STATE, PublishScope.GLOBAL, entry.getKey());
} else {
if (s_logger.isDebugEnabled())
s_logger.debug("VM power state does not change, skip DB writing. vm id: " + entry.getKey());
if (s_logger.isTraceEnabled()) {
s_logger.trace("VM power state does not change, skip DB writing. vm id: " + entry.getKey());
}
}
}

// any state outdates should be checked against the time before this list was retrieved
Date startTime = DateUtil.currentGMTTime();
// for all running/stopping VMs, we provide monitoring of missing report
List<VMInstanceVO> vmsThatAreMissingReport = _instanceDao.findByHostInStates(hostId, VirtualMachine.State.Running,
VirtualMachine.State.Stopping, VirtualMachine.State.Starting);
Expand All @@ -98,10 +102,12 @@ private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> tra
it.remove();
}

// here we need to be wary of out of band migration as opposed to other, more unexpected state changes
if (vmsThatAreMissingReport.size() > 0) {
Date currentTime = DateUtil.currentGMTTime();
if (s_logger.isDebugEnabled())
if (s_logger.isDebugEnabled()) {
s_logger.debug("Run missing VM report. current time: " + currentTime.getTime());
}

// 2 times of sync-update interval for graceful period
long milliSecondsGracefullPeriod = mgmtServiceConf.getPingInterval() * 2000L;
Expand Down Expand Up @@ -130,23 +136,34 @@ private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> tra
}
}

if (s_logger.isDebugEnabled())
s_logger.debug("Detected missing VM. host: " + hostId + ", vm id: " + instance.getId() +
", power state: PowerReportMissing, last state update: " + vmStateUpdateTime.getTime());
if (s_logger.isInfoEnabled()) {
s_logger.info(
String.format("Detected missing VM. host: %l, vm id: %l(%s), power state: %s, last state update: %l"
, hostId
, instance.getId()
, instance.getUuid()
, VirtualMachine.PowerState.PowerReportMissing
, vmStateUpdateTime.getTime()));
}

long milliSecondsSinceLastStateUpdate = currentTime.getTime() - vmStateUpdateTime.getTime();

if (milliSecondsSinceLastStateUpdate > milliSecondsGracefullPeriod) {
s_logger.debug("vm id: " + instance.getId() + " - time since last state update(" + milliSecondsSinceLastStateUpdate + "ms) has passed graceful period");

if (_instanceDao.updatePowerState(instance.getId(), hostId, VirtualMachine.PowerState.PowerReportMissing)) {
if (s_logger.isDebugEnabled())
// this is were a race condition might have happened if we don't re-fetch the instance;
// between the startime of this job and the currentTime of this missing-branch
// an update might have occurred that we should not override in case of out of band migration
if (_instanceDao.updatePowerState(instance.getId(), hostId, VirtualMachine.PowerState.PowerReportMissing, startTime)) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("VM state report is updated. host: " + hostId + ", vm id: " + instance.getId() + ", power state: PowerReportMissing ");
}

_messageBus.publish(null, VirtualMachineManager.Topics.VM_POWER_STATE, PublishScope.GLOBAL, instance.getId());
} else {
if (s_logger.isDebugEnabled())
if (s_logger.isDebugEnabled()) {
s_logger.debug("VM power state does not change, skip DB writing. vm id: " + instance.getId());
}
}
} else {
s_logger.debug("vm id: " + instance.getId() + " - time since last state update(" + milliSecondsSinceLastStateUpdate + "ms) has not passed graceful period yet");
Expand Down
Expand Up @@ -139,7 +139,7 @@ public interface VMInstanceDao extends GenericDao<VMInstanceVO, Long>, StateDao<

List<VMInstanceVO> listStartingWithNoHostId();

boolean updatePowerState(long instanceId, long powerHostId, VirtualMachine.PowerState powerState);
boolean updatePowerState(long instanceId, long powerHostId, VirtualMachine.PowerState powerState, Date wisdomEra);

void resetVmPowerStateTracking(long instanceId);

Expand Down
Expand Up @@ -828,13 +828,14 @@ public List<VMInstanceVO> listStartingWithNoHostId() {
}

@Override
public boolean updatePowerState(final long instanceId, final long powerHostId, final VirtualMachine.PowerState powerState) {
public boolean updatePowerState(final long instanceId, final long powerHostId, final VirtualMachine.PowerState powerState, Date wisdomEra) {
return Transaction.execute(new TransactionCallback<Boolean>() {
@Override
public Boolean doInTransaction(TransactionStatus status) {
boolean needToUpdate = false;
VMInstanceVO instance = findById(instanceId);
if (instance != null) {
if (instance != null
&& instance.getPowerStateUpdateTime().before(wisdomEra)) {
Long savedPowerHostId = instance.getPowerHostId();
if (instance.getPowerState() != powerState || savedPowerHostId == null
|| savedPowerHostId.longValue() != powerHostId) {
Expand Down
Expand Up @@ -3817,7 +3817,7 @@ protected Answer execute(StopCommand cmd) {
}
} else {

String msg = "VM " + cmd.getVmName() + " is no longer in vSphere";
String msg = "VM " + cmd.getVmName() + " is no longer on the expected host in vSphere";
s_logger.info(msg);
return new StopAnswer(cmd, msg, true);
}
Expand Down

0 comments on commit ee718bc

Please sign in to comment.