Skip to content

Commit

Permalink
Fix: PE: Correctly handle resources that recover before we operate on…
Browse files Browse the repository at this point in the history
… them

Ie. A recurring monitor reports a failure and then success

Mostly only happens for on-fail=block
  • Loading branch information
beekhof committed May 10, 2013
1 parent 2e10db8 commit d87de1b
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 13 deletions.
12 changes: 9 additions & 3 deletions lib/pengine/unpack.c
Expand Up @@ -2078,7 +2078,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op,
if (rsc->failure_timeout > 0) {
int last_run = 0;

if (crm_element_value_int(xml_op, "last-rc-change", &last_run) == 0) {
if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) {
time_t now = get_effective_time(data_set);

if (now > (last_run + rsc->failure_timeout)) {
Expand Down Expand Up @@ -2343,6 +2343,12 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op,
if (actual_rc_i == PCMK_EXECRA_NOT_RUNNING) {
clear_past_failure = TRUE;

} else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
clear_past_failure = TRUE;
if (rsc->role < RSC_ROLE_STARTED) {
set_active(rsc);
}

} else if (safe_str_eq(task, CRMD_ACTION_START)) {
rsc->role = RSC_ROLE_STARTED;
clear_past_failure = TRUE;
Expand Down Expand Up @@ -2450,15 +2456,14 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op,
}

} else if (rsc->role < RSC_ROLE_STARTED) {
/* start, migrate_to and migrate_from will land here */
/* migrate_to and migrate_from will land here */
pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
set_active(rsc);
}

/* clear any previous failure actions */
if (clear_past_failure) {
switch (*on_fail) {
case action_fail_block:
case action_fail_stop:
case action_fail_fence:
case action_fail_migrate:
Expand All @@ -2467,6 +2472,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op,
rsc->id, fail2text(*on_fail));
break;

case action_fail_block:
case action_fail_ignore:
case action_fail_recover:
*on_fail = action_fail_ignore;
Expand Down
1 change: 1 addition & 0 deletions pengine/regression.core.sh.in
Expand Up @@ -312,5 +312,6 @@ else
failed=.single
> $failed
do_test $single_test "Single shot" $*
cat $failed
exit $?
fi
4 changes: 2 additions & 2 deletions pengine/test10/monitor-onfail-restart.xml
Expand Up @@ -30,7 +30,7 @@
<lrm id="fc16-builder">
<lrm_resources>
<lrm_resource id="A" type="Dummy" class="ocf" provider="pacemaker">
<lrm_rsc_op id="A_monitor_20000" operation_key="A_monitor_20000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="5:2:0:3fda26dc-7465-44d0-8b73-35bd2847ee00" transition-magic="0:0;5:2:0:3fda26dc-7465-44d0-8b73-35bd2847ee00" call-id="7" rc-code="0" op-status="0" interval="20000" last-rc-change="1334242313" exec-time="10" queue-time="0" op-digest="02a5bcf940fc8d3239701acb11438d6a"/>
<lrm_rsc_op id="A_monitor_20000" operation_key="A_monitor_20000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="5:2:0:3fda26dc-7465-44d0-8b73-35bd2847ee00" transition-magic="0:0;5:2:0:3fda26dc-7465-44d0-8b73-35bd2847ee00" call-id="4" rc-code="0" op-status="0" interval="20000" last-rc-change="1334242312" exec-time="10" queue-time="0" op-digest="02a5bcf940fc8d3239701acb11438d6a"/>
<lrm_rsc_op id="A_last_failure_0" operation_key="A_monitor_20000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="6:0:0:3fda26dc-7465-44d0-8b73-35bd2847ee00" transition-magic="0:7;6:0:0:3fda26dc-7465-44d0-8b73-35bd2847ee00" call-id="4" rc-code="7" op-status="0" interval="20000" last-rc-change="1334242313" exec-time="10" queue-time="0" op-digest="02a5bcf940fc8d3239701acb11438d6a"/>
</lrm_resource>
</lrm_resources>
Expand All @@ -45,4 +45,4 @@
</node_state>
<node_state id="fc16-builder2" uname="fc16-builder2" ha="dead" in_ccm="true" crmd="offline" join="down" crm-debug-origin="do_state_transition"/>
</status>
</cib>
</cib>
12 changes: 6 additions & 6 deletions pengine/test10/whitebox-fail2.exp
Expand Up @@ -8,13 +8,13 @@
</action_set>
<inputs>
<trigger>
<rsc_op id="6" operation="stop" operation_key="container1_stop_0" on_node="18node2" on_node_uuid="2"/>
<rsc_op id="5" operation="stop" operation_key="container1_stop_0" on_node="18node2" on_node_uuid="2"/>
</trigger>
</inputs>
</synapse>
<synapse id="1">
<action_set>
<rsc_op id="6" operation="stop" operation_key="container1_stop_0" on_node="18node2" on_node_uuid="2">
<rsc_op id="5" operation="stop" operation_key="container1_stop_0" on_node="18node2" on_node_uuid="2">
<primitive id="container1" class="ocf" provider="heartbeat" type="VirtualDomain"/>
<attributes CRM_meta_timeout="20000" config="/home/dvossel/virtual_machines/lxc/lxc1.xml" crm_feature_set="3.0.7" force_stop="true" hypervisor="lxc:///"/>
</rsc_op>
Expand Down Expand Up @@ -68,7 +68,7 @@
</action_set>
<inputs>
<trigger>
<rsc_op id="6" operation="stop" operation_key="container1_stop_0" on_node="18node2" on_node_uuid="2"/>
<rsc_op id="5" operation="stop" operation_key="container1_stop_0" on_node="18node2" on_node_uuid="2"/>
</trigger>
<trigger>
<pseudo_event id="34" operation="stop" operation_key="M-clone_stop_0"/>
Expand Down Expand Up @@ -165,7 +165,7 @@
</action_set>
<inputs>
<trigger>
<rsc_op id="6" operation="stop" operation_key="container1_stop_0" on_node="18node2" on_node_uuid="2"/>
<rsc_op id="5" operation="stop" operation_key="container1_stop_0" on_node="18node2" on_node_uuid="2"/>
</trigger>
</inputs>
</synapse>
Expand Down Expand Up @@ -196,7 +196,7 @@
</synapse>
<synapse id="14">
<action_set>
<rsc_op id="5" operation="monitor" operation_key="lxc1_monitor_30000" on_node="18node2" on_node_uuid="2">
<rsc_op id="6" operation="monitor" operation_key="lxc1_monitor_30000" on_node="18node2" on_node_uuid="2">
<primitive id="lxc1" class="ocf" provider="pacemaker" type="remote"/>
<attributes CRM_meta_interval="30000" CRM_meta_name="monitor" CRM_meta_timeout="30000" crm_feature_set="3.0.7"/>
</rsc_op>
Expand All @@ -215,7 +215,7 @@
</action_set>
<inputs>
<trigger>
<rsc_op id="6" operation="stop" operation_key="container1_stop_0" on_node="18node2" on_node_uuid="2"/>
<rsc_op id="5" operation="stop" operation_key="container1_stop_0" on_node="18node2" on_node_uuid="2"/>
</trigger>
<trigger>
<pseudo_event id="29" operation="stop" operation_key="M_stop_0" internal_operation_key="M:4_stop_0"/>
Expand Down
4 changes: 2 additions & 2 deletions pengine/test10/whitebox-fail2.xml
Expand Up @@ -146,13 +146,13 @@
<lrm_rsc_op id="M_monitor_10000" operation_key="M_monitor_10000" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.7" transition-key="23:9:0:de2c4c7e-59bb-41af-80a0-c9110f04e18f" transition-magic="0:0;23:9:0:de2c4c7e-59bb-41af-80a0-c9110f04e18f" call-id="31" rc-code="0" op-status="0" interval="10000" last-rc-change="0" exec-time="12" queue-time="1" op-digest="4811cef7f7f94e3a35a70be7916cb2fd"/>
</lrm_resource>
<lrm_resource id="container1" type="VirtualDomain" class="ocf" provider="heartbeat">
<lrm_rsc_op id="container1_last_0" operation_key="container1_start_0" operation="start" crm-debug-origin="do_update_resource" crm_feature_set="3.0.7" transition-key="16:6:0:6bd4d441-8951-48b5-8ad9-f63502349363" transition-magic="0:0;16:6:0:6bd4d441-8951-48b5-8ad9-f63502349363" call-id="62" rc-code="0" op-status="0" interval="0" last-run="1361562571" last-rc-change="0" exec-time="398" queue-time="0" op-digest="cb566b7e956afa7b41c4c8c4f119ff13"/>
<lrm_rsc_op id="container1_last_0" operation_key="container1_start_0" operation="start" crm-debug-origin="do_update_resource" crm_feature_set="3.0.7" transition-key="16:6:0:6bd4d441-8951-48b5-8ad9-f63502349363" transition-magic="0:0;16:6:0:6bd4d441-8951-48b5-8ad9-f63502349363" call-id="30" rc-code="0" op-status="0" interval="0" last-run="1361562571" last-rc-change="0" exec-time="398" queue-time="0" op-digest="cb566b7e956afa7b41c4c8c4f119ff13"/>
<lrm_rsc_op id="container1_last_failure_0" operation_key="container1_asyncmon_0" operation="asyncmon" crm-debug-origin="do_update_resource" crm_feature_set="3.0.7" transition-key="40:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:1;40:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" call-id="40" rc-code="1" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
</lrm_resource>
<lrm_resource id="lxc1" type="remote" class="ocf" provider="pacemaker">
<lrm_rsc_op id="lxc1_last_0" operation_key="lxc1_start_0" operation="start" crm-debug-origin="do_update_resource" crm_feature_set="3.0.7" transition-key="46:6:0:6bd4d441-8951-48b5-8ad9-f63502349363" transition-magic="0:0;46:6:0:6bd4d441-8951-48b5-8ad9-f63502349363" call-id="8" rc-code="0" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
<lrm_rsc_op id="lxc1_monitor_30000" operation_key="lxc1_monitor_30000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.7" transition-key="44:7:0:6bd4d441-8951-48b5-8ad9-f63502349363" transition-magic="0:0;44:7:0:6bd4d441-8951-48b5-8ad9-f63502349363" call-id="9" rc-code="0" op-status="0" interval="30000" op-digest="02a5bcf940fc8d3239701acb11438d6a"/>
<lrm_rsc_op id="lxc1_last_failure_0" operation_key="lxc1_asyncmon_0" operation="asyncmon" crm-debug-origin="do_update_resource" crm_feature_set="3.0.7" transition-key="9:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:1;9:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" call-id="9" rc-code="1" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
<lrm_rsc_op id="lxc1_last_failure_0" operation_key="lxc1_asyncmon_0" operation="asyncmon" crm-debug-origin="do_update_resource" crm_feature_set="3.0.7" transition-key="9:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:1;9:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" call-id="10" rc-code="1" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
</lrm_resource>
<lrm_resource id="lxc2" type="remote" class="ocf" provider="pacemaker">
<lrm_rsc_op id="lxc2_last_0" operation_key="lxc2_start_0" operation="start" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.7" transition-key="14:7:0:de2c4c7e-59bb-41af-80a0-c9110f04e18f" transition-magic="0:0;14:7:0:de2c4c7e-59bb-41af-80a0-c9110f04e18f" call-id="3" rc-code="0" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
Expand Down

0 comments on commit d87de1b

Please sign in to comment.