forked from ClusterLabs/pacemaker
/
join_dc.c
717 lines (592 loc) · 24.9 KB
/
join_dc.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
#include "tengine.h"
char *max_epoch = NULL;
char *max_generation_from = NULL;
xmlNode *max_generation_xml = NULL;
void initialize_join(gboolean before);
void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
static int current_join_id = 0;
unsigned long long saved_ccm_membership_id = 0;
void
crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
{
enum crm_join_phase last = 0;
if(node == NULL) {
crm_err("Could not update join because node not specified" CRM_XS
" join-%u source=%s phase=%d", source, current_join_id, phase);
return;
}
/* Remote nodes do not participate in joins */
if (is_set(node->flags, crm_remote_node)) {
return;
}
last = node->join;
if(phase == last) {
crm_trace("%s: Node %s[%u] - join-%u phase still %u",
source, node->uname, node->id, current_join_id, last);
} else if (phase <= crm_join_none) {
node->join = phase;
crm_info("%s: Node %s[%u] - join-%u phase %u -> %u",
source, node->uname, node->id, current_join_id, last, phase);
} else if(phase == last + 1) {
node->join = phase;
crm_info("%s: Node %s[%u] - join-%u phase %u -> %u",
source, node->uname, node->id, current_join_id, last, phase);
} else {
crm_err("Could not update join for node %s because phase transition invalid "
CRM_XS " join-%u source=%s node_id=%u last=%u new=%u",
node->uname, current_join_id, source, node->id, last, phase);
}
}
void
initialize_join(gboolean before)
{
GHashTableIter iter;
crm_node_t *peer = NULL;
/* clear out/reset a bunch of stuff */
crm_debug("join-%d: Initializing join data (flag=%s)",
current_join_id, before ? "true" : "false");
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
crm_update_peer_join(__FUNCTION__, peer, crm_join_none);
}
if (before) {
if (max_generation_from != NULL) {
free(max_generation_from);
max_generation_from = NULL;
}
if (max_generation_xml != NULL) {
free_xml(max_generation_xml);
max_generation_xml = NULL;
}
clear_bit(fsa_input_register, R_HAVE_CIB);
clear_bit(fsa_input_register, R_CIB_ASKED);
}
}
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *offer = NULL;
crm_node_t *member = (crm_node_t *)value;
CRM_ASSERT(member != NULL);
if (crm_is_peer_active(member) == FALSE) {
crm_info("Not making an offer to %s: not active (%s)", member->uname, member->state);
if(member->expected == NULL && safe_str_eq(member->state, CRM_NODE_LOST)) {
/* You would think this unsafe, but in fact this plus an
* active resource is what causes it to be fenced.
*
* Yes, this does mean that any node that dies at the same
* time as the old DC and is not running resource (still)
* won't be fenced.
*
* I'm not happy about this either.
*/
crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_DOWN);
}
return;
}
if (member->uname == NULL) {
crm_info("No recipient for welcome message.(Node uuid:%s)", member->uuid);
return;
}
if (saved_ccm_membership_id != crm_peer_seq) {
saved_ccm_membership_id = crm_peer_seq;
crm_info("Making join offers based on membership %llu", crm_peer_seq);
}
if(user_data && member->join > crm_join_none) {
crm_info("Skipping %s: already known %d", member->uname, member->join);
return;
}
crm_update_peer_join(__FUNCTION__, (crm_node_t*)member, crm_join_none);
offer = create_request(CRM_OP_JOIN_OFFER, NULL, member->uname,
CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id);
/* send the welcome */
crm_info("join-%d: Sending offer to %s", current_join_id, member->uname);
send_cluster_message(member, crm_msg_crmd, offer, TRUE);
free_xml(offer);
crm_update_peer_join(__FUNCTION__, member, crm_join_welcomed);
/* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */
}
/* A_DC_JOIN_OFFER_ALL */
void
do_dc_join_offer_all(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* reset everyone's status back to down or in_ccm in the CIB
*
* any nodes that are active in the CIB but not in the CCM list
* will be seen as offline by the PE anyway
*/
current_join_id++;
initialize_join(TRUE);
/* do_update_cib_nodes(TRUE, __FUNCTION__); */
update_dc(NULL);
if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
crm_info("A new node joined the cluster");
}
g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
/* don't waste time by invoking the PE yet; */
crm_info("join-%d: Waiting on %d outstanding join acks",
current_join_id, crmd_join_phase_count(crm_join_welcomed));
}
/* A_DC_JOIN_OFFER_ONE */
void
do_dc_join_offer_one(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_node_t *member;
ha_msg_input_t *welcome = NULL;
const char *op = NULL;
const char *join_to = NULL;
if (msg_data->data) {
welcome = fsa_typed_data(fsa_dt_ha_msg);
} else {
crm_info("An unknown node joined - (re-)offer to any unconfirmed nodes");
g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
check_join_state(cur_state, __FUNCTION__);
return;
}
if (welcome == NULL) {
crm_err("Attempt to send welcome message without a message to reply to!");
return;
}
join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
if (join_to == NULL) {
crm_err("Attempt to send welcome message without a host to reply to!");
return;
}
member = crm_get_peer(0, join_to);
op = crm_element_value(welcome->msg, F_CRM_TASK);
if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) {
/* note: it _is_ possible that a node will have been
* sick or starting up when the original offer was made.
* however, it will either re-announce itself in due course
* _or_ we can re-store the original offer on the client.
*/
crm_trace("(Re-)offering membership to %s...", join_to);
}
crm_info("join-%d: Processing %s request from %s in state %s",
current_join_id, op, join_to, fsa_state2string(cur_state));
crm_update_peer_join(__FUNCTION__, member, crm_join_none);
join_make_offer(NULL, member, NULL);
/* always offer to the DC (ourselves)
* this ensures the correct value for max_generation_from
*/
member = crm_get_peer(0, fsa_our_uname);
join_make_offer(NULL, member, NULL);
/* this was a genuine join request, cancel any existing
* transition and invoke the PE
*/
abort_transition(INFINITY, tg_restart, "Node join", NULL);
/* don't waste time by invoking the PE yet; */
crm_debug("Waiting on %d outstanding join acks for join-%d",
crmd_join_phase_count(crm_join_welcomed), current_join_id);
}
static int
compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
{
const char *elem_l = crm_element_value(left, field);
const char *elem_r = crm_element_value(right, field);
int int_elem_l = crm_int_helper(elem_l, NULL);
int int_elem_r = crm_int_helper(elem_r, NULL);
if (int_elem_l < int_elem_r) {
return -1;
} else if (int_elem_l > int_elem_r) {
return 1;
}
return 0;
}
/* A_DC_JOIN_PROCESS_REQ */
void
do_dc_join_filter_offer(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *generation = NULL;
int cmp = 0;
int join_id = -1;
gboolean ack_nack_bool = TRUE;
const char *ack_nack = CRMD_JOINSTATE_MEMBER;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
crm_node_t *join_node = crm_get_peer(0, join_from);
crm_debug("Processing req from %s", join_from);
generation = join_ack->xml;
crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
if (max_generation_xml != NULL && generation != NULL) {
int lpc = 0;
const char *attributes[] = {
XML_ATTR_GENERATION_ADMIN,
XML_ATTR_GENERATION,
XML_ATTR_NUMUPDATES,
};
for (lpc = 0; cmp == 0 && lpc < DIMOF(attributes); lpc++) {
cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
}
}
if (join_id != current_join_id) {
crm_debug("Invalid response from %s: join-%d vs. join-%d",
join_from, join_id, current_join_id);
check_join_state(cur_state, __FUNCTION__);
return;
} else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) {
crm_err("Node %s is not a member", join_from);
ack_nack_bool = FALSE;
} else if (generation == NULL) {
crm_err("Generation was NULL");
ack_nack_bool = FALSE;
} else if (max_generation_xml == NULL) {
max_generation_xml = copy_xml(generation);
max_generation_from = strdup(join_from);
} else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) {
crm_debug("%s has a better generation number than"
" the current max %s", join_from, max_generation_from);
if (max_generation_xml) {
crm_log_xml_debug(max_generation_xml, "Max generation");
}
crm_log_xml_debug(generation, "Their generation");
free(max_generation_from);
free_xml(max_generation_xml);
max_generation_from = strdup(join_from);
max_generation_xml = copy_xml(join_ack->xml);
}
if (ack_nack_bool == FALSE) {
/* NACK this client */
ack_nack = CRMD_JOINSTATE_NACK;
crm_update_peer_join(__FUNCTION__, join_node, crm_join_nack);
crm_err("Rejecting cluster join request from %s " CRM_XS
" NACK join-%d ref=%s", join_from, join_id, ref);
} else {
crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref);
crm_update_peer_join(__FUNCTION__, join_node, crm_join_integrated);
}
crm_update_peer_expected(__FUNCTION__, join_node, ack_nack);
crm_debug("%u nodes have been integrated into join-%d",
crmd_join_phase_count(crm_join_integrated), join_id);
if (check_join_state(cur_state, __FUNCTION__) == FALSE) {
/* don't waste time by invoking the PE yet; */
crm_debug("join-%d: Still waiting on %d outstanding offers",
join_id, crmd_join_phase_count(crm_join_welcomed));
}
}
/* A_DC_JOIN_FINALIZE */
void
do_dc_join_finalize(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
char *sync_from = NULL;
int rc = pcmk_ok;
/* This we can do straight away and avoid clients timing us out
* while we compute the latest CIB
*/
crm_debug("Finializing join-%d for %d clients",
current_join_id, crmd_join_phase_count(crm_join_integrated));
crmd_join_phase_log(LOG_INFO);
if (crmd_join_phase_count(crm_join_welcomed) != 0) {
crm_info("Waiting for %d more nodes", crmd_join_phase_count(crm_join_welcomed));
/* crmd_fsa_stall(FALSE); Needed? */
return;
} else if (crmd_join_phase_count(crm_join_integrated) == 0) {
/* Nothing to do */
check_join_state(fsa_state, __FUNCTION__);
return;
}
clear_bit(fsa_input_register, R_HAVE_CIB);
if (max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)) {
set_bit(fsa_input_register, R_HAVE_CIB);
}
if (is_set(fsa_input_register, R_IN_TRANSITION)) {
crm_warn("Delaying response to cluster join offer while transition in progress "
CRM_XS " join-%d", current_join_id);
crmd_fsa_stall(FALSE);
return;
}
if (max_generation_from && is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
/* ask for the agreed best CIB */
sync_from = strdup(max_generation_from);
set_bit(fsa_input_register, R_CIB_ASKED);
crm_notice("Syncing the Cluster Information Base from %s to rest of cluster "
CRM_XS " join-%d", sync_from, current_join_id);
crm_log_xml_notice(max_generation_xml, "Requested version");
} else {
/* Send _our_ CIB out to everyone */
sync_from = strdup(fsa_our_uname);
crm_info("join-%d: Syncing our CIB to the rest of the cluster",
current_join_id);
crm_log_xml_debug(max_generation_xml, "Requested version");
}
rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override);
fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback);
}
void
finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
CRM_LOG_ASSERT(-EPERM != rc);
clear_bit(fsa_input_register, R_CIB_ASKED);
if (rc != pcmk_ok) {
do_crm_log((rc == -pcmk_err_old_data ? LOG_WARNING : LOG_ERR),
"Sync from %s failed: %s", (char *)user_data, pcmk_strerror(rc));
/* restart the whole join process */
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__);
} else if (AM_I_DC && fsa_state == S_FINALIZE_JOIN) {
set_bit(fsa_input_register, R_HAVE_CIB);
clear_bit(fsa_input_register, R_CIB_ASKED);
/* make sure dc_uuid is re-set to us */
if (check_join_state(fsa_state, __FUNCTION__) == FALSE) {
crm_debug("Notifying %d clients of join-%d results",
crmd_join_phase_count(crm_join_integrated), current_join_id);
g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
}
} else {
crm_debug("No longer the DC in S_FINALIZE_JOIN: %s/%s",
AM_I_DC ? "DC" : "CRMd", fsa_state2string(fsa_state));
}
}
static void
join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
if (rc == pcmk_ok) {
crm_debug("Join update %d complete", call_id);
check_join_state(fsa_state, __FUNCTION__);
} else {
crm_err("Join update %d failed", call_id);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
/* A_DC_JOIN_PROCESS_ACK */
void
do_dc_join_ack(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int join_id = -1;
int call_id = 0;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
crm_node_t *peer = crm_get_peer(0, join_from);
if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM) || peer == NULL) {
crm_debug("Ignoring op=%s message from %s", op, join_from);
return;
}
crm_trace("Processing ack from %s", join_from);
crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
if (peer->join != crm_join_finalized) {
crm_info("Join not in progress: ignoring join-%d from %s (phase = %d)",
join_id, join_from, peer->join);
return;
} else if (join_id != current_join_id) {
crm_err("Invalid response from %s: join-%d vs. join-%d",
join_from, join_id, current_join_id);
crm_update_peer_join(__FUNCTION__, peer, crm_join_nack);
return;
}
crm_update_peer_join(__FUNCTION__, peer, crm_join_confirmed);
crm_info("join-%d: Updating node state to %s for %s",
join_id, CRMD_JOINSTATE_MEMBER, join_from);
/* update CIB with the current LRM status from the node
* We don't need to notify the TE of these updates, a transition will
* be started in due time
*/
erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local);
if (safe_str_eq(join_from, fsa_our_uname)) {
xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname);
if (now_dc_lrmd_state != NULL) {
crm_debug("LRM state is updated from do_lrm_query.(%s)", join_from);
fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state,
cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
free_xml(now_dc_lrmd_state);
} else {
crm_warn("Could not get our LRM state. LRM state is updated from join_ack->xml.(%s)", join_from);
fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
}
} else {
crm_debug("LRM state is updated from join_ack->xml.(%s)", join_from);
fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
}
fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback);
crm_debug("join-%d: Registered callback for LRM update %d", join_id, call_id);
}
void
finalize_join_for(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *acknak = NULL;
xmlNode *tmp1 = NULL;
crm_node_t *join_node = value;
const char *join_to = join_node->uname;
if(join_node->join != crm_join_integrated) {
crm_trace("Skipping %s in state %d", join_to, join_node->join);
return;
}
/* make sure a node entry exists for the new node */
crm_trace("Creating node entry for %s", join_to);
tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
set_uuid(tmp1, XML_ATTR_UUID, join_node);
crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1,
cib_scope_local | cib_quorum_override | cib_can_create);
free_xml(tmp1);
join_node = crm_get_peer(0, join_to);
if (crm_is_peer_active(join_node) == FALSE) {
/*
* NACK'ing nodes that the membership layer doesn't know about yet
* simply creates more churn
*
* Better to leave them waiting and let the join restart when
* the new membership event comes in
*
* All other NACKs (due to versions etc) should still be processed
*/
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_PENDING);
return;
}
/* send the ack/nack to the node */
acknak = create_request(CRM_OP_JOIN_ACKNAK, NULL, join_to,
CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
crm_xml_add_int(acknak, F_CRM_JOIN_ID, current_join_id);
crm_debug("join-%d: ACK'ing join request from %s",
current_join_id, join_to);
crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE);
crm_update_peer_join(__FUNCTION__, join_node, crm_join_finalized);
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER);
send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, acknak, TRUE);
free_xml(acknak);
return;
}
void ghash_print_node(gpointer key, gpointer value, gpointer user_data);
gboolean
check_join_state(enum crmd_fsa_state cur_state, const char *source)
{
static unsigned long long highest_seq = 0;
crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state));
if (saved_ccm_membership_id != crm_peer_seq) {
crm_debug("%s: Membership changed since join started: %llu -> %llu (%llu)",
source, saved_ccm_membership_id, crm_peer_seq, highest_seq);
if(highest_seq < crm_peer_seq) {
/* Don't spam the FSA with duplicates */
highest_seq = crm_peer_seq;
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
}
} else if (cur_state == S_INTEGRATION) {
if (crmd_join_phase_count(crm_join_welcomed) == 0) {
crm_debug("join-%d: Integration of %d peers complete: %s",
current_join_id, crmd_join_phase_count(crm_join_integrated), source);
register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
return TRUE;
}
} else if (cur_state == S_FINALIZE_JOIN) {
if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id);
return TRUE;
} else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
crm_debug("join-%d: Still waiting on %d welcomed nodes",
current_join_id, crmd_join_phase_count(crm_join_welcomed));
crmd_join_phase_log(LOG_DEBUG);
} else if (crmd_join_phase_count(crm_join_integrated) != 0) {
crm_debug("join-%d: Still waiting on %d integrated nodes",
current_join_id, crmd_join_phase_count(crm_join_integrated));
crmd_join_phase_log(LOG_DEBUG);
} else if (crmd_join_phase_count(crm_join_finalized) != 0) {
crm_debug("join-%d: Still waiting on %d finalized nodes",
current_join_id, crmd_join_phase_count(crm_join_finalized));
crmd_join_phase_log(LOG_DEBUG);
} else {
crm_debug("join-%d complete: %s", current_join_id, source);
register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
return TRUE;
}
}
return FALSE;
}
void
do_dc_join_final(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
update_attrd(NULL, NULL, NULL, NULL, FALSE);
crm_update_quorum(crm_have_quorum, TRUE);
}
int crmd_join_phase_count(enum crm_join_phase phase)
{
int count = 0;
crm_node_t *peer;
GHashTableIter iter;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
if(peer->join == phase) {
count++;
}
}
return count;
}
void crmd_join_phase_log(int level)
{
crm_node_t *peer;
GHashTableIter iter;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
const char *state = "unknown";
switch(peer->join) {
case crm_join_nack:
state = "nack";
break;
case crm_join_none:
state = "none";
break;
case crm_join_welcomed:
state = "welcomed";
break;
case crm_join_integrated:
state = "integrated";
break;
case crm_join_finalized:
state = "finalized";
break;
case crm_join_confirmed:
state = "confirmed";
break;
}
do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname, state);
}
}