-
Notifications
You must be signed in to change notification settings - Fork 112
/
fsm_transition.c
1520 lines (1329 loc) · 43.2 KB
/
fsm_transition.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* src/bin/pg_autoctl/fsm_transition.c
* Implementation of transitions in the keeper state machine
*
* To move from a current state to a goal state, the pg_autoctl state machine
* will call the functions defined in this file, which are referenced from
* fsm.c
*
* Every transition must be idempotent such that it can safely be repeated
* until it succeeds.
*
* As the keeper could fail or be interrupted in-flight, it's important that
* every transition can be tried again (is idempotent). When interrupted (by
* a bug or a signal, user interrupt or system reboot), the current and
* assigned roles have not changed and on the next keeper's start the FSM
* will kick in a call the transition that failed again. The transition might
* have successfully implemented the first parts of its duties... and we must
* not fail because of that. Idempotency is achieved by only calling
* idempotent subroutines or checking whether the goal of the subroutine
* (e.g. "postgres is promoted") has been achieved already.
*
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the PostgreSQL License.
*
*/
#include <inttypes.h>
#include <time.h>
#include <unistd.h>
#include "defaults.h"
#include "env_utils.h"
#include "pgctl.h"
#include "fsm.h"
#include "keeper.h"
#include "keeper_pg_init.h"
#include "log.h"
#include "monitor.h"
#include "pghba.h"
#include "primary_standby.h"
#include "state.h"
static bool fsm_init_standby_from_upstream(Keeper *keeper);
/*
* fsm_init_primary initializes the postgres server as primary.
*
* This function actually covers the transition from INIT to SINGLE.
*
* pg_ctl initdb (if necessary)
* && create database + create extension (if necessary)
* && start_postgres
* && promote_standby (if applicable)
* && add_default_settings
* && create_monitor_user
* && create_replication_user
*/
bool
fsm_init_primary(Keeper *keeper)
{
KeeperConfig *config = &(keeper->config);
LocalPostgresServer *postgres = &(keeper->postgres);
PGSQL *pgsql = &(postgres->sqlClient);
bool inRecovery = false;
KeeperStateInit *initState = &(keeper->initState);
PostgresSetup *pgSetup = &(postgres->postgresSetup);
bool postgresInstanceExists = pg_setup_pgdata_exists(pgSetup);
log_info("Initialising postgres as a primary");
/*
* When initialializing the local node on-top of an empty (or non-existing)
* PGDATA directory, now is the time to `pg_ctl initdb`.
*/
if (!keeper_init_state_read(initState, config->pathnames.init))
{
log_error("Failed to read init state file \"%s\", which is required "
"for the transition from INIT to SINGLE.",
config->pathnames.init);
return false;
}
/*
* When initState is PRE_INIT_STATE_RUNNING, double check that Postgres is
* still running. After all the end-user could just stop Postgres and then
* give the install to us. We ought to support that.
*/
if (initState->pgInitState >= PRE_INIT_STATE_RUNNING)
{
if (!keeper_init_state_discover(initState,
pgSetup,
keeper->config.pathnames.init))
{
/* errors have already been logged */
return false;
}
/* did the user try again after having stopped Postgres maybe? */
if (initState->pgInitState < PRE_INIT_STATE_RUNNING)
{
log_info("PostgreSQL state has changed since registration time: %s",
PreInitPostgreInstanceStateToString(initState->pgInitState));
}
}
bool pgInstanceIsOurs =
initState->pgInitState == PRE_INIT_STATE_EMPTY ||
initState->pgInitState == PRE_INIT_STATE_EXISTS;
if (initState->pgInitState == PRE_INIT_STATE_EMPTY &&
!postgresInstanceExists)
{
Monitor *monitor = &(keeper->monitor);
PostgresSetup newPgSetup = { 0 };
bool missingPgdataIsOk = false;
bool postgresNotRunningIsOk = true;
if (!pg_ctl_initdb(pgSetup->pg_ctl, pgSetup->pgdata))
{
log_fatal("Failed to initialize a PostgreSQL instance at \"%s\""
", see above for details", pgSetup->pgdata);
return false;
}
if (!pg_setup_init(&newPgSetup,
pgSetup,
missingPgdataIsOk,
postgresNotRunningIsOk))
{
/* errors have already been logged */
log_error("pg_setup_wait_until_is_ready: pg_setup_init is false");
return false;
}
*pgSetup = newPgSetup;
/*
* We managed to initdb, refresh our configuration file location with
* the realpath(3) from pg_setup_update_config_with_absolute_pgdata:
* we might have been given a relative pathname.
*/
if (!keeper_config_update_with_absolute_pgdata(&(keeper->config)))
{
/* errors have already been logged */
return false;
}
if (!config->monitorDisabled)
{
/*
* We have a new system_identifier, we need to publish it now.
*/
if (!monitor_set_node_system_identifier(
monitor,
keeper->state.current_node_id,
pgSetup->control.system_identifier))
{
log_error("Failed to update the new node system_identifier");
return false;
}
}
}
else if (initState->pgInitState >= PRE_INIT_STATE_RUNNING)
{
log_error("PostgreSQL is already running at \"%s\", refusing to "
"initialize a new cluster on-top of the current one.",
pgSetup->pgdata);
return false;
}
/*
* When the PostgreSQL instance either did not exist, or did exist but was
* not running when creating the pg_autoctl node the first time, then we
* can restart the instance without fear of disturbing the service.
*/
if (pgInstanceIsOurs)
{
/* create the target database and install our extension there */
if (!create_database_and_extension(keeper))
{
/* errors have already been logged */
return false;
}
}
/*
* Now is the time to make sure Postgres is running, as our next steps to
* prepare a SINGLE from INIT are depending on being able to connect to the
* local Postgres service.
*/
if (!ensure_postgres_service_is_running(postgres))
{
log_error("Failed to initialize postgres as primary because "
"starting postgres failed, see above for details");
return false;
}
/*
* When dealing with a pg_autoctl create postgres command with a
* pre-existing PGDATA directory, make sure we can start the cluster
* without being in sync-rep already. The target state here is SINGLE
* after all.
*/
if (!fsm_disable_replication(keeper))
{
log_error("Failed to disable synchronous replication in order to "
"initialize as a primary, see above for details");
return false;
}
/*
* FIXME: In the current FSM, I am not sure this can happen anymore. That
* said we might want to remain compatible with initializing a SINGLE from
* an pre-existing standby. I wonder why/how it would come to that though.
*/
if (pgsql_is_in_recovery(pgsql, &inRecovery) && inRecovery)
{
log_info("Initialising a postgres server in recovery mode as the primary, "
"promoting");
if (!standby_promote(postgres))
{
log_error("Failed to initialize postgres as primary because promoting "
"postgres failed, see above for details");
return false;
}
}
/*
* We just created the local Postgres cluster, make sure it has our minimum
* configuration deployed.
*
* When --ssl-self-signed has been used, now is the time to build a
* self-signed certificate for the server. We place the certificate and
* private key in $PGDATA/server.key and $PGDATA/server.crt
*/
if (!keeper_create_self_signed_cert(keeper))
{
/* errors have already been logged */
return false;
}
if (!postgres_add_default_settings(postgres, config->hostname))
{
log_error("Failed to initialize postgres as primary because "
"adding default settings failed, see above for details");
return false;
}
/*
* Now add the role and HBA entries necessary for the monitor to run health
* checks on the local Postgres node.
*/
if (!config->monitorDisabled)
{
char monitorHostname[_POSIX_HOST_NAME_MAX];
int monitorPort = 0;
int connlimit = 1;
if (!hostname_from_uri(config->monitor_pguri,
monitorHostname, _POSIX_HOST_NAME_MAX,
&monitorPort))
{
/* developer error, this should never happen */
log_fatal("BUG: monitor_pguri should be validated before calling "
"fsm_init_primary");
return false;
}
/*
* We need to add the monitor host:port in the HBA settings for the
* node to enable the health checks.
*
* Node that we forcibly use the authentication method "trust" for the
* pgautofailover_monitor user, which from the monitor also uses the
* hard-coded password PG_AUTOCTL_HEALTH_PASSWORD. The idea is to avoid
* leaking information from the passfile, environment variable, or
* other places.
*/
if (!primary_create_user_with_hba(postgres,
PG_AUTOCTL_HEALTH_USERNAME,
PG_AUTOCTL_HEALTH_PASSWORD,
monitorHostname,
"trust",
pgSetup->hbaLevel,
connlimit))
{
log_error(
"Failed to initialise postgres as primary because "
"creating the database user that the pg_auto_failover monitor "
"uses for health checks failed, see above for details");
return false;
}
}
/*
* This node is intended to be used as a primary later in the setup, when
* we have a standby node to register, so prepare the replication user now.
*/
if (!primary_create_replication_user(postgres, PG_AUTOCTL_REPLICA_USERNAME,
config->replication_password))
{
log_error("Failed to initialize postgres as primary because creating the "
"replication user for the standby failed, see above for details");
return false;
}
/*
* What remains to be done is either opening the HBA for a test setup, or
* when we are initializing pg_auto_failover on an existing PostgreSQL
* primary server instance, making sure that the parameters are all set.
*/
if (pgInstanceIsOurs)
{
if (env_found_empty("PG_REGRESS_SOCK_DIR"))
{
/*
* In test environements allow nodes from the same network to
* connect. The network is discovered automatically.
*/
if (!pghba_enable_lan_cidr(&keeper->postgres.sqlClient,
keeper->config.pgSetup.ssl.active,
HBA_DATABASE_ALL, NULL,
keeper->config.hostname,
NULL,
DEFAULT_AUTH_METHOD,
HBA_EDIT_MINIMAL,
NULL))
{
log_error("Failed to grant local network connections in HBA");
return false;
}
}
}
else
{
/*
* As we are registering a previsouly existing PostgreSQL
* instance, we now check that our mininum configuration
* requirements for pg_auto_failover are in place. If not, tell
* the user they must restart PostgreSQL at their next
* maintenance window to fully enable pg_auto_failover.
*/
bool settings_are_ok = false;
if (!check_postgresql_settings(&(keeper->postgres),
&settings_are_ok))
{
log_fatal("Failed to check local PostgreSQL settings "
"compliance with pg_auto_failover, "
"see above for details");
return false;
}
else if (!settings_are_ok)
{
log_fatal("Current PostgreSQL settings are not compliant "
"with pg_auto_failover requirements, "
"please restart PostgreSQL at the next "
"opportunity to enable pg_auto_failover changes, "
"and redo `pg_autoctl create`");
return false;
}
}
/* and we're done with this connection. */
pgsql_finish(pgsql);
return true;
}
/*
* fsm_disable_replication is used when other node was forcibly removed, now
* single.
*
* disable_synchronous_replication
* && keeper_create_and_drop_replication_slots
*
* TODO: We currently use a separate session for each step. We should use
* a single connection.
*/
bool
fsm_disable_replication(Keeper *keeper)
{
LocalPostgresServer *postgres = &(keeper->postgres);
if (!ensure_postgres_service_is_running(postgres))
{
/* errors have already been logged */
return false;
}
if (!primary_disable_synchronous_replication(postgres))
{
log_error("Failed to disable replication because disabling synchronous "
"failed, see above for details");
return false;
}
/* cache invalidation in case we're doing WAIT_PRIMARY to SINGLE */
bzero((void *) postgres->standbyTargetLSN, PG_LSN_MAXLENGTH);
/* when a standby has been removed, remove its replication slot */
return keeper_create_and_drop_replication_slots(keeper);
}
/*
* fsm_resume_as_primary is used when the local node was demoted after a
* failure, but standby was forcibly removed.
*
* start_postgres
* && disable_synchronous_replication
* && keeper_create_and_drop_replication_slots
*
* So we reuse fsm_disable_replication() here, rather than copy/pasting the same
* bits code in the fsm_resume_as_primary() function body. If the definition of
* the fsm_resume_as_primary transition ever came to diverge from whatever
* fsm_disable_replication() is doing, we'd have to copy/paste and maintain
* separate code path.
*/
bool
fsm_resume_as_primary(Keeper *keeper)
{
if (!fsm_disable_replication(keeper))
{
log_error("Failed to disable synchronous replication in order to "
"resume as a primary, see above for details");
return false;
}
return true;
}
/*
* fsm_prepare_replication is used when a new standby was added.
*
* add_standby_to_hba && create_replication_slot
*
* Those operations are now done eagerly rather than just in time. So it's been
* taken care of aready, nothing to do within this state transition.
*/
bool
fsm_prepare_replication(Keeper *keeper)
{
return true;
}
/*
* fsm_stop_replication is used to forcefully stop replication, in case the
* primary is on the other side of a network split.
*/
bool
fsm_stop_replication(Keeper *keeper)
{
LocalPostgresServer *postgres = &(keeper->postgres);
PGSQL *client = &(postgres->sqlClient);
/*
* We can't control if the client is still sending writes to our PostgreSQL
* instance or not. To avoid split-brains situation, we need to make some
* efforts:
*
* - set default_transaction_read_only to 'on' on this server (a
* standby being promoted) so that it can't be the target of
* connection strings requiring target_session_attrs=read-write yet
*
* - shut down the replication stream (here by promoting the replica)
*
* - have the primary server realize it's alone on the network: can't
* communicate with the monitor (which triggered the failover), can't
* communicate with the standby (now absent from pg_stat_replication)
*
* When the keeper on the primary realizes they are alone in the dark,
* it will go to DEMOTE state on its own and shut down PostgreSQL,
* protecting againts split brain.
*/
log_info("Prevent writes to the promoted standby while the primary "
"is not demoted yet, by making the service incompatible with "
"target_session_attrs = read-write");
if (!pgsql_set_default_transaction_mode_read_only(client))
{
log_error("Failed to switch to read-only mode");
return false;
}
return fsm_promote_standby(keeper);
}
/*
* fsm_disable_sync_rep is used when standby became unhealthy.
*/
bool
fsm_disable_sync_rep(Keeper *keeper)
{
LocalPostgresServer *postgres = &(keeper->postgres);
return primary_disable_synchronous_replication(postgres);
}
/*
* fsm_promote_standby_to_primary is used when the standby should become the
* new primary. It also prepares for the old primary to become the new standby.
*
* The promotion of the standby has already happened in the previous
* transition:
*
* 1. secondary ➜ prepare_promotion : block writes
* 2. prepare_promotion ➜ stop_replication : promote
* 3. stop_replication ➜ wait_primary : resume writes
*
* Resuming writes is done by setting default_transaction_read_only to off,
* thus allowing libpq to establish connections when target_session_attrs is
* read-write.
*/
bool
fsm_promote_standby_to_primary(Keeper *keeper)
{
bool forceCacheInvalidation = true;
LocalPostgresServer *postgres = &(keeper->postgres);
PGSQL *client = &(postgres->sqlClient);
if (!pgsql_set_default_transaction_mode_read_write(client))
{
log_error("Failed to set default_transaction_read_only to off "
"which is needed to accept libpq connections with "
"target_session_attrs read-write");
return false;
}
/* now is a good time to make sure we invalidate other nodes cache */
if (!keeper_refresh_other_nodes(keeper, forceCacheInvalidation))
{
log_error("Failed to update HBA rules after resuming writes");
return false;
}
return true;
}
/*
* fsm_enable_sync_rep is used when a healthy standby appeared.
*/
bool
fsm_enable_sync_rep(Keeper *keeper)
{
LocalPostgresServer *postgres = &(keeper->postgres);
PostgresSetup *pgSetup = &(postgres->postgresSetup);
PGSQL *pgsql = &(postgres->sqlClient);
/*
* First, we need to fetch and apply the synchronous_standby_names setting
* value from the monitor...
*/
if (!fsm_apply_settings(keeper))
{
/* errors have already been logged */
return false;
}
/*
* If we don't have any standby with replication-quorum true, then we don't
* actually enable sync rep here. In that case don't bother making sure the
* standbys have reached a meaningful LSN target before continuing.
*/
if (streq(postgres->synchronousStandbyNames, ""))
{
return true;
}
/* first time in that state, fetch most recent metadata */
if (IS_EMPTY_STRING_BUFFER(postgres->standbyTargetLSN))
{
if (!pgsql_get_postgres_metadata(pgsql,
&pgSetup->is_in_recovery,
postgres->pgsrSyncState,
postgres->currentLSN,
&(postgres->postgresSetup.control)))
{
log_error("Failed to update the local Postgres metadata");
return false;
}
/*
* Our standbyTargetLSN needs to be set once we have at least one
* standby that's known to participate in the synchronous replication
* quorum.
*/
if (!(streq(postgres->pgsrSyncState, "quorum") ||
streq(postgres->pgsrSyncState, "sync")))
{
/* it's an expected situation here, don't fill-up the logs */
log_warn("Failed to set the standby Target LSN because we don't "
"have a quorum candidate yet");
return false;
}
strlcpy(postgres->standbyTargetLSN,
postgres->currentLSN,
PG_LSN_MAXLENGTH);
log_info("Waiting until standby node has caught-up to LSN %s",
postgres->standbyTargetLSN);
}
/*
* Now, we have set synchronous_standby_names and have one standby that's
* expected to be caught-up. Make sure that is the case by checking the LSN
* positions in much the same way as Postgres does when committing a
* transaction on the primary: get the current LSN, and wait until the
* reported LSN from the secondary has advanced past the current point.
*/
return primary_standby_has_caught_up(postgres);
}
/*
* fsm_apply_settings is used when a pg_auto_failover setting has changed, such
* as number_sync_standbys or node priorities and replication quorum
* properties.
*
* So we have to fetch the current synchronous_standby_names setting value from
* the monitor and apply it (reload) to the current node.
*/
bool
fsm_apply_settings(Keeper *keeper)
{
Monitor *monitor = &(keeper->monitor);
KeeperConfig *config = &(keeper->config);
LocalPostgresServer *postgres = &(keeper->postgres);
/* get synchronous_standby_names value from the monitor */
if (!config->monitorDisabled)
{
if (!monitor_synchronous_standby_names(
monitor,
config->formation,
keeper->state.current_group,
postgres->synchronousStandbyNames,
sizeof(postgres->synchronousStandbyNames)))
{
log_error("Failed to enable synchronous replication because "
"we failed to get the synchronous_standby_names value "
"from the monitor, see above for details");
return false;
}
}
else
{
/* no monitor: use the generic value '*' */
strlcpy(postgres->synchronousStandbyNames, "*",
sizeof(postgres->synchronousStandbyNames));
}
return primary_set_synchronous_standby_names(postgres);
}
/*
* fsm_start_postgres is used when we detected a network partition, but monitor
* didn't do failover.
*/
bool
fsm_start_postgres(Keeper *keeper)
{
LocalPostgresServer *postgres = &(keeper->postgres);
if (!ensure_postgres_service_is_running(postgres))
{
log_error("Failed to promote postgres because the server could not "
"be started before promotion, see above for details");
return false;
}
/* fetch synchronous_standby_names setting from the monitor */
if (!fsm_apply_settings(keeper))
{
/* errors have already been logged */
return false;
}
return true;
}
/*
* fsm_stop_postgres is used when local node was demoted, need to be dead now.
*/
bool
fsm_stop_postgres(Keeper *keeper)
{
LocalPostgresServer *postgres = &(keeper->postgres);
return ensure_postgres_service_is_stopped(postgres);
}
/*
* fsm_stop_postgres_for_primary_maintenance is used when pg_autoctl enable
* maintenance has been used on the primary server, we do a couple CHECKPOINT
* before stopping Postgres to ensure a smooth transition.
*/
bool
fsm_stop_postgres_for_primary_maintenance(Keeper *keeper)
{
return fsm_checkpoint_and_stop_postgres(keeper);
}
/*
* fsm_stop_postgres_and_setup_standby is used when the primary is put to
* maintenance. Not only do we stop Postgres, we also prepare a setup as a
* secondary.
*/
bool
fsm_stop_postgres_and_setup_standby(Keeper *keeper)
{
LocalPostgresServer *postgres = &(keeper->postgres);
ReplicationSource *upstream = &(postgres->replicationSource);
PostgresSetup *pgSetup = &(postgres->postgresSetup);
KeeperConfig *config = &(keeper->config);
NodeAddress upstreamNode = { 0 };
if (!ensure_postgres_service_is_stopped(postgres))
{
/* errors have already been logged */
return false;
}
/* Move the Postgres controller out of the way */
if (!local_postgres_unlink_status_file(postgres))
{
/* highly unexpected */
log_error("Failed to remove our Postgres status file "
"see above for details");
return false;
}
/* prepare a standby setup */
if (!standby_init_replication_source(postgres,
&upstreamNode,
PG_AUTOCTL_REPLICA_USERNAME,
config->replication_password,
config->replication_slot_name,
config->maximum_backup_rate,
config->backupDirectory,
NULL, /* no targetLSN */
config->pgSetup.ssl,
keeper->state.current_node_id))
{
/* can't happen at the moment */
return false;
}
/* make the Postgres setup for a standby node before reaching maintenance */
if (!pg_setup_standby_mode(pgSetup->control.pg_control_version,
pgSetup->pgdata,
pgSetup->pg_ctl,
upstream))
{
log_error("Failed to setup Postgres as a standby to go to maintenance");
return false;
}
return true;
}
/*
* fsm_checkpoint_and_stop_postgres is used when shutting down Postgres as part
* of some FSM step when we have a controlled situation. We do a couple
* CHECKPOINT before stopping Postgres to ensure a smooth transition.
*/
bool
fsm_checkpoint_and_stop_postgres(Keeper *keeper)
{
LocalPostgresServer *postgres = &(keeper->postgres);
PostgresSetup *pgSetup = &(postgres->postgresSetup);
PGSQL *pgsql = &(postgres->sqlClient);
if (pg_setup_is_running(pgSetup))
{
/*
* Starting with Postgres 12, pg_basebackup sets the recovery
* configuration parameters in the postgresql.auto.conf file. We need
* to make sure to RESET this value so that our own configuration
* setting takes effect.
*/
if (pgSetup->control.pg_control_version >= 1200)
{
if (!pgsql_reset_primary_conninfo(pgsql))
{
log_error("Failed to RESET primary_conninfo");
return false;
}
}
/*
* PostgreSQL shutdown sequence includes a CHECKPOINT, that is issued
* by the checkpointer process one every query backend has stopped
* already. During this final CHECKPOINT no work can be done, so it's
* best to reduce the amount of work needed there. To reduce the
* checkpointer shutdown activity, we perform a manual shutdown while
* still having concurrent activity.
*
* The first checkpoint writes all the in-memory buffers, the second
* checkpoint writes everything that was added during the first one.
*/
log_info("Preparing Postgres shutdown: CHECKPOINT;");
for (int i = 0; i < 2; i++)
{
if (!pgsql_checkpoint(pgsql))
{
log_warn("Failed to checkpoint before stopping Postgres");
}
}
}
log_info("Stopping Postgres at \"%s\"", pgSetup->pgdata);
return ensure_postgres_service_is_stopped(postgres);
}
/*
* fsm_init_standby_from_upstream is the work horse for both fsm_init_standby
* and fsm_init_from_standby. The replication source must have been setup
* already.
*/
static bool
fsm_init_standby_from_upstream(Keeper *keeper)
{
KeeperConfig *config = &(keeper->config);
Monitor *monitor = &(keeper->monitor);
LocalPostgresServer *postgres = &(keeper->postgres);
/*
* At pg_autoctl create time when PGDATA already exists and we were
* successful in registering the node, then we can proceed without a
* pg_basebackup: we already have a copy of PGDATA on-disk.
*
* The existence of PGDATA at pg_autoctl create time is tracked in our init
* state as the PRE_INIT_STATE_EXISTS enum value. Once init is finished, we
* remove our init file: then we need to pg_basebackup again to init a
* standby.
*/
bool skipBaseBackup = file_exists(keeper->config.pathnames.init) &&
keeper->initState.pgInitState == PRE_INIT_STATE_EXISTS;
if (!standby_init_database(postgres, config->hostname, skipBaseBackup))
{
log_error("Failed to initialize standby server, see above for details");
return false;
}
if (!skipBaseBackup)
{
bool forceCacheInvalidation = true;
/* write our own HBA rules, pg_basebackup copies pg_hba.conf too */
if (!keeper_refresh_other_nodes(keeper, forceCacheInvalidation))
{
log_error("Failed to update HBA rules after a base backup");
return false;
}
}
/*
* Publish our possibly new system_identifier now.
*/
if (!config->monitorDisabled)
{
if (!monitor_set_node_system_identifier(
monitor,
keeper->state.current_node_id,
postgres->postgresSetup.control.system_identifier))
{
log_error("Failed to update the new node system_identifier");
return false;
}
}
/* ensure the SSL setup is synced with the keeper config */
if (!keeper_create_self_signed_cert(keeper))
{
/* errors have already been logged */
return false;
}
/* now, in case we have an init state file around, remove it */
return unlink_file(config->pathnames.init);
}
/*
* fsm_init_standby is used when the primary is now ready to accept a standby,
* we're the standby.
*/
bool
fsm_init_standby(Keeper *keeper)
{
KeeperConfig *config = &(keeper->config);
LocalPostgresServer *postgres = &(keeper->postgres);
NodeAddress *primaryNode = NULL;
/* get the primary node to follow */
if (!keeper_get_primary(keeper, &(postgres->replicationSource.primaryNode)))
{
log_error("Failed to initialize standby for lack of a primary node, "
"see above for details");
return false;
}
if (!standby_init_replication_source(postgres,
primaryNode,
PG_AUTOCTL_REPLICA_USERNAME,
config->replication_password,
config->replication_slot_name,
config->maximum_backup_rate,
config->backupDirectory,
NULL, /* no targetLSN */
config->pgSetup.ssl,
keeper->state.current_node_id))
{
/* can't happen at the moment */
return false;
}
return fsm_init_standby_from_upstream(keeper);
}
/*
* fsm_rewind_or_init is used when a new primary is available. First, try to
* rewind. If that fails, do a pg_basebackup.
*/
bool
fsm_rewind_or_init(Keeper *keeper)
{
KeeperConfig *config = &(keeper->config);
LocalPostgresServer *postgres = &(keeper->postgres);
ReplicationSource *upstream = &(postgres->replicationSource);
NodeAddress *primaryNode = NULL;
/* get the primary node to follow */
if (!keeper_get_primary(keeper, &(postgres->replicationSource.primaryNode)))
{
log_error("Failed to initialize standby for lack of a primary node, "
"see above for details");
return false;
}
if (!standby_init_replication_source(postgres,
primaryNode,
PG_AUTOCTL_REPLICA_USERNAME,
config->replication_password,
config->replication_slot_name,
config->maximum_backup_rate,
config->backupDirectory,
NULL, /* no targetLSN */
config->pgSetup.ssl,
keeper->state.current_node_id))
{
/* can't happen at the moment */
return false;
}
/* first, make sure we can connect with "replication" */
if (!pgctl_identify_system(upstream))
{
log_error("Failed to connect to the primary node " NODE_FORMAT
"with a replication connection string. "
"See above for details",
upstream->primaryNode.nodeId,
upstream->primaryNode.name,
upstream->primaryNode.host,
upstream->primaryNode.port);
return false;
}
if (!primary_rewind_to_standby(postgres))
{