This repository has been archived by the owner on Jan 30, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 41
/
govuk_env_sync.sh
807 lines (691 loc) · 28.3 KB
/
govuk_env_sync.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
#!/bin/bash
set -u
set -o pipefail
set -o errtrace
#
# Script to synchronise databases via a storage backend
#
# Options:
# f) configfile
# Optional argument and alternative way to pass configuration.
# Template can be found in govuk_env_sync/templates/govuk_env_sync_conf.erb
#
# a) action
# 'push' or 'pull' relative to storage backend (e.g. push to S3). Push
# means dump/backup and upload. Pull means download and restore.
#
# D) dbms
# Database management system / data source (One of: mongo,
# elasticsearch,postgresql,mysql)
# This is used to construct script names called, e.g. dump_mongo
# If dbms is elasticsearch, storagebackend must be elasticsearch.
#
# S) storagebackend
# Storage backend (One of: s3,elasticsearch)
# This is used to construct script names called, e.g. push_s3
# If storagebackend is elasticsearch, dbms must be elasticsearch.
#
# T) temppath
# Path to create temporary directory in. Directory will be created if
# sufficient rights are granted to the govuk-backup user.
#
# d) database
# Name of the database to be copied/sync'd, if dbms is "files", this is the path
# to the directory to copy/sync, if dbms is "elasticsearch", this is the hostname
# of the domain.
#
# H) database_hostname
# Name of the database_hostname. This is required for PostgreSQL/MySQL.
#
# u) url
# URL of storage backend, bucket name in case of S3, repository name in case of
# elasticsearch.
#
# p) path
# Path to use on storage backend, prefix in case of S3.
#
# s) transformation_sql_file
# Optional path to a file containing additional SQL statements to
# run within the transaction when restoring a Postgres or MySQL
# database, after the data has been inserted. Intended for data
# scrubbing / anonymisation when restoring to the Integration
# environment.
#
# F) pre_dump_transformation_sql_file
# Optional path to a file containing additional SQL statements to run
# before dumping a MySQL database. Intended for data scrubbing /
# anonymisation when dumping the Whitehall database from Staging. Unlike
# for Postgres, the script is *not* wrapped in a transaction.
#
# t) timestamp
# Optional provide specific timestamp to restore.
#
#
# Store provided arguments for debugging (error log) output.
#
args=("$@")
# Get local ip addr, avoiding using puppet templating of this script.
ip_address=$(ip addr show dev eth0 | grep -Eo 'inet ?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*')
# LOCAL_DOMAIN as defined in /etc/govuk_env_sync/env.d
# No, it is not a typo
# shellcheck disable=SC2153
local_domain="${LOCAL_DOMAIN}"
ORIGINAL_DOMAIN="publishing.service.gov.uk"
# Don't spawn `less`, even when there is a tty. Avoids one-off runs getting
# stuck waiting on user input.
PSQL='psql -P pager=off'
function log {
echo -ne "$(date +%Y-%m-%dT%H:%M:%S): $1\\n" | tee --append "/var/log/govuk_env_sync/govuk_env_sync.log"
logger --priority "${2:-"user.info"}" --tag "$(basename "$0")" "$1"
}
function filter_pg_stderr {
# We have spurious warnings in pg_restore, relating to plugins and the postgres user not accessible on RDS instances
# This function filters out the known errors and triggers exit 1 if encountering a different error.
#
# This reads the postgres stderr errors identified by the "Command was:" string into an array
IFS="#" read -r -a pg_errors <<< "$( echo "${pg_stderr}" | grep -B 1 "Command was:" | tr -d "\\n" | sed s/--/#/g)"
if ! [ -z "${pg_stderr:-}" ]; then
for pg_error in "${pg_errors[@]}"; do
# The removal of the newlines in grep output above causes unset array elements, filter those
if [ "${pg_error}" != "" ]; then
# Calculate the checksum rather than rely on exact replication of the error string in this script
# If you require to add more error messages to be igored, do run sth like the below and add to cases.
# pg_restore ... | grep -B 1 "Command was: <COMMAND CAUSING SPURIOUS ERROR>" | tr -d "\n" | sha256sum | awk '{ print $1 }'
case $(echo "${pg_error}" | sha256sum | awk '{ print $1 }') in
a1d79e0711e23f137373425d704b68116005439c59502dac4d68a616bec9ef46);;
b863dfa39e930334d9163a9a3e4269b18bfd363cf25e894e0b35d512d98581c4);;
a5b0047fcfeb0e0a57fd8750ebc28808f4ed407bae59c4644e8c8614b5d3a079);;
a6028cd4e6e01ccda0bb415e2a66b7a2ca5cef8c469ca0ef4b3de0bdb954dde1);;
a24d3736ce830147868c0cea77f0935dc5d7b8137ac16396a63ad96b44b16520);;
54df370bdbba3aa3badc2b0841b106267ec38c69d89eb600aeee6aa391369571);;
178ca929da5a769f1d9d7af5466866db23fd6f9b632cf907594824cb022a943b);;
*)
log "Error running \"$0 ${args[*]:-''}\" in function ${FUNCNAME[1]} on line $1 executing \"${BASH_COMMAND}\", error hash: $(echo "${pg_error}" | sha256sum | awk '{ print $1 }')" "user.err"
log "${pg_error}" "user.err"
exit 1
;;
esac
fi
done
# Empty pg_output to route following errors through the standard error handler
unset "${pg_stderr}"
fi
}
function report_error {
if [ -n "${pg_stderr:-""}" ]; then
# Ignore spurious warnings of PG (see above for more detail)
filter_pg_stderr "$@"
else
log "Error running \"$0 ${args[*]:-''}\" in function ${FUNCNAME[1]} on line $1 executing \"${BASH_COMMAND}\"" "user.err"
exit 1
fi
}
function nagios_passive {
# We require to map the monitored services to the configuration files/govuk_env_sync::tasks
if [ -n "${configfile:-""}" ]; then
local nagios_service_description
nagios_service_description="GOV.UK environment sync $(basename "${configfile%.cfg}")"
local max_retries=4
local retries_count=0
local send_failed=""
while [ $retries_count -lt $max_retries ]; do
printf "%s\\t%s\\t%s\\t%s\\n" "${ip_address}" "${nagios_service_description}" "${nagios_code}" "${nagios_message}" | /usr/sbin/send_nsca -H alert.cluster >/dev/null || send_failed=$?
if [ -z "${send_failed:-}" ]; then
break
fi
log "nagios_passive failed: $send_failed"
retries_count=$((retries_count+1))
sleep 20
done
fi
# If arguments are provided manually, do not report to nagios/icinga
}
# Trap all errors and log them
#
trap 'report_error $LINENO' ERR
function create_timestamp {
timestamp="$(date +%Y-%m-%dT%H:%M:%S)"
}
function create_tempdir {
mkdir -p "${temppath}" || { echo "Could not access ${temppath}"; exit 1; }
tempdir="$(mktemp --directory -p "${temppath}")"
}
function remove_tempdir {
if [ ! -z "${tempdir:-}" ]; then
rm -rf "${tempdir}"
fi
}
function on_exit {
remove_tempdir
nagios_passive
}
trap on_exit EXIT
function set_filename {
filename="${timestamp}-${database}.gz"
}
function is_writable_mongo {
mongo --quiet --eval "print(db.isMaster()[\"ismaster\"]);" "localhost/$database"
}
function normalize_documentdb_database_name {
echo "${database//-/_}" | awk '{ print toupper($0) }'
}
function setup_documentdb_credentials {
database_normalized=$(normalize_documentdb_database_name)
local documentdb_host_env_var_name="${database_normalized}_DOCUMENTDB_HOST"
DOCUMENTDB_HOST="${!documentdb_host_env_var_name}"
local documentdb_passwd_env_var_name="${database_normalized}_DOCUMENTDB_PASSWD"
DOCUMENTDB_PASSWD="${!documentdb_passwd_env_var_name}"
}
function is_writable_documentdb {
setup_documentdb_credentials
database_normalized=$(normalize_documentdb_database_name)
mongo --quiet \
--host "${DOCUMENTDB_HOST}" \
--username "master" \
--password "${DOCUMENTDB_PASSWD}" \
"$database" \
--eval "print(db.isMaster()[\"ismaster\"]);"
}
function is_writable_elasticsearch {
echo "true"
}
function is_writable_postgresql {
# db-admin is always writable
echo "true"
}
function is_writable_mysql {
# db-admin is always writable
echo "true"
}
function dump_mongo {
readarray -t collections < \
<(mongo --quiet --eval 'rs.slaveOk(); printjson(db.getCollectionNames());' "localhost/$database" | jq -r '.[]')
for collection in "${collections[@]}"; do
mongodump \
--db "${database}" \
--collection "${collection}" \
--out "${tempdir}"
done
cd "${tempdir}" || exit 1
tar --create --gzip --force-local --file "${filename}" "${database}"
}
function restore_mongo {
cd "${tempdir}" || exit 1
tar --extract --gzip --force-local --file "${filename}"
mongorestore --drop \
--db "${database}" \
"${tempdir}/${database}"
}
function dump_documentdb {
setup_documentdb_credentials
database_normalized=$(normalize_documentdb_database_name)
readarray -t collections < \
<(mongo --quiet \
--host "${DOCUMENTDB_HOST}" \
--username "master" \
--password "${DOCUMENTDB_PASSWD}" \
"$database" \
--eval 'rs.slaveOk(); printjson(db.getCollectionNames());' | jq -r '.[]')
for collection in "${collections[@]}"; do
mongodump \
--host "${DOCUMENTDB_HOST}" \
--username "master" \
--password "${DOCUMENTDB_PASSWD}" \
--db "${database}" \
--collection "${collection}" \
--out "${tempdir}"
done
cd "${tempdir}" || exit 1
tar --create --gzip --force-local --file "${filename}" "${database}"
}
function restore_documentdb {
cd "${tempdir}" || exit 1
tar --extract --gzip --force-local --file "${filename}"
setup_documentdb_credentials
database_normalized=$(normalize_documentdb_database_name)
for bson_file_path in ${tempdir}/${database}/*.bson; do
filename=$(basename -- "$bson_file_path")
collection_name="${filename%.*}"
if [ "${collection_name}" == "system.profile" ]; then
continue
fi
max_retries=5
retries_count=0
while [ $retries_count -lt $max_retries ]; do
mongorestore --drop \
--host "${DOCUMENTDB_HOST}" \
--username "master" \
--password "${DOCUMENTDB_PASSWD}" \
--db "${database}" \
--collection "${collection_name}" \
"${tempdir}/${database}/${collection_name}.bson" || restore_failed=$?
if [ -z "${restore_failed:-}" ]; then
break
fi
retries_count=$((retries_count+1))
sleep $((retries_count * 2))
done
done
}
function dump_files {
tar --create --gzip --force-local --file "${tempdir}/$filename" "${database}"
}
function restore_files {
mkdir -p "${database}"
cd "${database}" || exit 1
tar --extract --gzip --force-local --file "${tempdir}/${filename}"
}
function dump_elasticsearch {
snapshot_name="$(echo "$filename" | sed 's/.gz//' | tr "[:upper:]" "[:lower:]")"
# attempting to start multiple snapshots at once (which happens
# because this script runs on three machines at the same time)
# throws an error - so unconditionally ignore curl errors, but check
# that there is a snapshot being created.
/usr/bin/curl --connect-timeout 10 -sSf -XPUT "http://${database}/_snapshot/${url}/${snapshot_name}" || true
/usr/bin/curl "http://${database}/_snapshot/${url}/_all" | grep -q "IN_PROGRESS"
}
function restore_elasticsearch {
snapshot_name="${filename//.gz/}"
curl -XDELETE "http://${database}/_all"
/usr/bin/curl --connect-timeout 10 -sSf -XPOST "http://${database}/_snapshot/${url}/${snapshot_name}/_restore" || true
/bin/sleep 1
/usr/bin/curl --connect-timeout 10 -sSf -XGET "http://${database}/_cat/recovery" | grep -q "${snapshot_name}"
}
function dump_postgresql {
# shellcheck disable=SC2086
pg_server_version=$(sudo $PSQL -U aws_db_admin -h "${database_hostname}" --no-password postgres -c 'show server_version;' --no-align --tuples-only)
if [[ $pg_server_version =~ ^[0-9]+\.[0-9]+(\.[0-9]+)?$ ]]; then
# We're using docker to run pgdump because different versions of postgres need different versions of pgdump.
# For example, pgdump for postgres 9 doesn't work for postgres 13 databases.
# Docker lets us run whichever version we want, without having to build packages for our Ubuntu version
sudo docker run --rm --net=host \
-v "${tempdir}:/tmp/" \
-v "/root/.pgpass:/tmp/.pgpass" -e PGPASSFILE=/tmp/.pgpass \
"postgres:$pg_server_version" \
pg_dump -U aws_db_admin -h "${database_hostname}" --no-password -F c "${database}" -f "/tmp/${filename}"
else
echo "$pg_server_version doesn't look like a proper Postgresql version"
exit 1
fi
}
function output_restore_sql {
PG_RESTORE_VERSION='' # this gets set in the case statement below
DUMPFILE_VERSION=$(file "${dumpfile}" | awk '{print $NF}')
case $DUMPFILE_VERSION in
v1.13-0)
PG_RESTORE_VERSION='9.6.22'
;;
v1.14-0)
PG_RESTORE_VERSION='13.4'
;;
*)
>&2 echo "${DUMPFILE_VERSION} is not a supported dump file version"
exit 1
;;
esac
sudo docker run --rm --net=host \
-v "${tempdir}:/tmp/" \
-v "/root/.pgpass:/tmp/.pgpass" \
-e PGPASSFILE=/tmp/.pgpass \
"postgres:$PG_RESTORE_VERSION" \
pg_restore -j 2 -f "/tmp/sed_pipe" "/tmp/${filename}" &
sed -r "${sed_cmds}" < "${tempdir}/sed_pipe" > "${tempdir}/output_pipe"
if [ "${transformation_sql_file:-}" ]; then
# pg_dump/pg_restore sets search_path to ''. Reset it to the default so
# that the transform script doesn't need to prefix table names with
# 'public.'. The string "$user" is intentionally output verbatim.
# shellcheck disable=SC2016
echo 'SET search_path="$user",public;' >> "${tempdir}/output_pipe"
cat "${transformation_sql_file}" >> "${tempdir}/output_pipe"
fi
}
# Translate the binary dump file into text (SQL DDL/DML), filter out extension
# comments (which would cause the restore to fail), fix up references to the
# `postgres` user (which differs between actual Postgres and RDS), then pipe
# the output into psql to do the actual restore.
#
# If transformation_sql_file (from config) is non-empty then the content of
# that file is appended to the data which is sent to psql.
function filtered_postgresql_restore {
dumpfile="${tempdir}/${filename}"
sed_cmds='/^COMMENT ON EXTENSION/d'
sed_cmds+='; s/(SCHEMA public (TO|FROM)) postgres/\1 aws_db_admin/g'
local single_transaction='-1'
if [ "${database}" == 'ckan_production' ]; then
single_transaction=''
fi
output_restore_sql &
# shellcheck disable=SC2086
sudo $PSQL -U aws_db_admin -h "${database_hostname}" "${single_transaction}" \
--no-password -d "${database}" -f "${tempdir}/output_pipe" 2>&1
}
function restore_postgresql {
# Drop the target database if it already exists.
DB_OWNER=''
# shellcheck disable=SC2086
if sudo $PSQL -U aws_db_admin -h "${database_hostname}" --no-password --list --quiet --tuples-only | awk '{print $1}' | grep -v "|" | grep -qw "${database}"; then
log "Database ${database} exists, we will drop it before continuing"
log "Disconnect existing connections to database"
# shellcheck disable=SC2086
sudo $PSQL -U aws_db_admin -h "${database_hostname}" -c "ALTER DATABASE \"${database}\" CONNECTION LIMIT 0;" postgres
# shellcheck disable=SC2086
sudo $PSQL -U aws_db_admin -h "${database_hostname}" -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${database}';" postgres
# shellcheck disable=SC2086
DB_OWNER=$(sudo $PSQL -U aws_db_admin -h "${database_hostname}" --no-password --list --quiet --tuples-only | awk '{print $1 " " $3}'| grep -v "|" | grep -w "${database}" | awk '{print $2}')
sudo dropdb -U aws_db_admin -h "${database_hostname}" --no-password "${database}"
fi
sudo createdb -U aws_db_admin -h "${database_hostname}" --no-password "${database}"
mkfifo "${tempdir}/sed_pipe"
mkfifo "${tempdir}/output_pipe"
pg_stderr=$(filtered_postgresql_restore)
if [ "$DB_OWNER" != '' ] ; then
# shellcheck disable=SC2086
echo "GRANT ALL ON DATABASE \"$database\" TO \"$DB_OWNER\"" | sudo $PSQL -U aws_db_admin -h "${database_hostname}" --no-password "${database}"
# shellcheck disable=SC2086
echo "ALTER DATABASE \"$database\" OWNER TO \"$DB_OWNER\"" | sudo $PSQL -U aws_db_admin -h "${database_hostname}" --no-password "${database}"
fi
}
function dump_mysql {
# If the AWS_ACCESS_KEY_ID is unset, then we're running in
# AWS. Otherwise, we're running in Carrenza.
if [ -z "${AWS_ACCESS_KEY_ID:-}" ] ; then
DB_USER='aws_db_admin'
else
DB_USER='root'
fi
if [ "${pre_dump_transformation_sql_file:-}" ]; then
log "Running pre-dump SQL script ${pre_dump_transformation_sql_file}..."
# We can read the file without being root, but shellcheck doesn't know it.
# shellcheck disable=SC2024
sudo -H mysql "${database}" < "${pre_dump_transformation_sql_file}"
log "completed."
fi
# --single-transaction --quick is recommended for dumping large tables
# without holding locks for the duration of the dump.
# https://dev.mysql.com/doc/refman/5.6/en/mysqldump.html#option_mysqldump_single-transaction
log "Running mysqldump..."
if [ -z "${excluded_tables:-}" ] ; then
sudo -H mysqldump -u "$DB_USER" --single-transaction --quick "${database}" | gzip > "${tempdir}/${filename}"
else
log "excluded tables specified: ${excluded_tables}"
IFS=',' read -r -a excluded_tables_array <<< "${excluded_tables}"
ignored_tables_string=''
for table in "${excluded_tables_array[@]}" ; do
ignored_tables_string+=" --ignore-table=${database}.${table}"
done
# shellcheck disable=SC2086
sudo -H mysqldump -u "$DB_USER" --single-transaction --quick "${database}" ${ignored_tables_string} | gzip > "${tempdir}/${filename}"
fi
log "mysqldump completed."
}
function restore_mysql {
gunzip < "${tempdir}/${filename}" | sudo -H mysql -h "${database_hostname}" "${database}"
if [ "${transformation_sql_file:-}" ]; then
# shellcheck disable=SC2024
sudo -H mysql -h "${database_hostname}" "${database}" < "${transformation_sql_file}"
fi
}
function push_s3 {
log "Upload to s3://${url}/${path}/${filename}..."
aws s3 cp "${tempdir}/${filename}" "s3://${url}/${path}/${filename}" --sse AES256
log "completed."
}
function pull_s3 {
log "Download from s3://${url}/${path}/${filename}..."
aws s3 cp "s3://${url}/${path}/${filename}" "${tempdir}/${filename}" --sse AES256
log "completed."
}
function get_timestamp_s3 {
timestamp="$(aws s3 ls "s3://${url}/${path}/" \
| grep "\\-${database}\." | tail -1 \
| grep -o '[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}')"
}
function push_elasticsearch {
# there is no file to push
true
}
function pull_elasticsearch {
# there is no file to pull
true
}
function get_timestamp_elasticsearch {
timestamp="$(/usr/bin/curl -XGET "http://${database}/_snapshot/${url}/_all" | \
/usr/bin/jq -r '.snapshots | .[] | .snapshot' | \
grep "\\-${database}" | \
sort | \
tail -1 | \
sed "s/-${database}$//")"
}
function postprocess_mysl_cmd_signon_production {
source_domain="${1}"
new_domain="${2}"
update_home_uri_query="UPDATE oauth_applications\
SET home_uri = REPLACE(home_uri, '${source_domain}', '${new_domain}')\
WHERE home_uri LIKE '%${source_domain}%'"
echo "${update_home_uri_query}" | sudo -H mysql -h "${database_hostname}" --database=signon_production
update_redirect_uri_query="UPDATE oauth_applications\
SET redirect_uri = REPLACE(redirect_uri, '${source_domain}', '${new_domain}')\
WHERE redirect_uri LIKE '%${source_domain}%'"
echo "${update_redirect_uri_query}" | sudo -H mysql -h "${database_hostname}" --database=signon_production
}
function postprocess_signon_production {
log "Starting the postprocessing for Signon..."
aws_environment="$(get_aws_environment)"
if [ "${aws_environment}" == "production" ] || [ "${aws_environment}" == "integration" ] || [ "${aws_environment}" == "" ] ; then
# For production, we don't want any processing to be done for production as the URLs are the originals
# For integration, we don't want any processing as integration has its own signon database which is not derived
# from production or staging
log "No postprocessing for Signon because in AWS Production or Integration or Carrenza"
return
fi
postprocess_mysl_cmd_signon_production "publishing.service.gov.uk" "staging.publishing.service.gov.uk"
postprocess_mysl_cmd_signon_production "performance.service.gov.uk" "staging.performance.service.gov.uk"
postprocess_mysl_cmd_signon_production "-production.cloudapps.digital" "-staging.cloudapps.digital"
postprocess_mysl_cmd_signon_production "-production.london.cloudapps.digital" "-staging.london.cloudapps.digital"
postprocess_mysl_cmd_signon_production "admin.forms.service.gov.uk" "admin.staging.forms.service.gov.uk"
log "Completed the postprocessing for Signon"
}
function get_aws_environment {
aws_environment=""
if [ -e "/etc/facter/facts.d/aws_environment.txt" ]; then
aws_environment=$(cut -d'=' -f2 < /etc/facter/facts.d/aws_environment.txt)
fi
echo "${aws_environment}"
}
## function: mongo_backend_domain_manipulator
## Parameters:
## 1. backend_id for which the domain will be replaced
## 2. new domain to be applied for the given backend_id
## Dependencies:
## 1. external variables: database, local_domain, ORIGINAL_DOMAIN
## 2. external database: mongo
function mongo_backend_domain_manipulator {
if [ $# != 2 ]; then
echo "number of parameters must be 2 for mongo_backend_domain_manipulator: got $# parmeters"
exit 1
fi
log "starting mongo manipulation backend domain $1 manipulation..."
domain_to_replace="${local_domain}"
aws_environment="$(get_aws_environment)"
if [ "${aws_environment}" = "integration" ]; then
domain_to_replace="staging.${ORIGINAL_DOMAIN}"
else
domain_to_replace="${ORIGINAL_DOMAIN}"
fi
mongo --quiet --eval \
"db = db.getSiblingDB(\"${database}\"); \
db.backends.find( { \"backend_id\": \"$1\" } ).forEach( \
function(b) { b.backend_url = b.backend_url.replace(\".${domain_to_replace}\", \".$2\"); \
db.backends.save(b); } );"
echo "successful finished mongo manipulation backend domain $1 manipulation"
}
function postprocess_router {
static_domain=$(mongo --quiet --eval \
"db = db.getSiblingDB(\"${database}\"); \
db.backends.distinct( \"backend_url\", { \"backend_id\": \"static\" });" \
| sed s#https://##g | tr -d '/')
# router and draft-router hostnames differ - snip off up to first dot.
source_domain="${static_domain#*.}"
unmigrated_source_domain="${ORIGINAL_DOMAIN}"
aws_environment="$(get_aws_environment)"
if [ "${aws_environment}" = "integration" ] || [ "${aws_environment}" = "staging" ]; then
unmigrated_source_domain="${aws_environment}.${ORIGINAL_DOMAIN}"
fi
# local_domain comes from env.d/LOCAL_DOMAIN (see above).
mongo --quiet --eval \
"db = db.getSiblingDB(\"${database}\"); \
db.backends.find().forEach( \
function(b) { b.backend_url = b.backend_url.replace(\".${source_domain}\", \".${local_domain}\"); \
db.backends.save(b); } ); "
# licensify has been migrated in only integration and staging so far,
# remove this once production is migrated too.
if [ "${aws_environment}" == "integration" ] || [ "${aws_environment}" == "staging" ]; then
licensify_domain="${local_domain}"
fi
mongo_backend_domain_manipulator "licensify" "${licensify_domain}"
# whitehall has been migrated in only integration and staging so far
if [ "${aws_environment}" == "integration" ] || [ "${aws_environment}" == "staging" ]; then
whitehall_domain="${local_domain}"
else
whitehall_domain="${unmigrated_source_domain}"
fi
mongo_backend_domain_manipulator "whitehall-frontend" "${whitehall_domain}"
mongo_backend_domain_manipulator "whitehall" "${whitehall_domain}"
# spotlight has been migrated in only integration so far
if [ "${aws_environment}" == "integration" ]; then
spotlight_proxy_domain="${local_domain}"
else
spotlight_proxy_domain="${unmigrated_source_domain}"
fi
mongo_backend_domain_manipulator "spotlight-proxy" "${spotlight_proxy_domain}"
}
function postprocess_govuk_assets_production {
setup_documentdb_credentials
mongo \
--host "${DOCUMENTDB_HOST}" \
--username "master" \
--password "${DOCUMENTDB_PASSWD}" \
--quiet \
--eval \
"db = db.getSiblingDB(\"${database}\"); \
db.assets.find({ access_limited: { \$exists: true, \$nin: [[], false] }, legacy_url_path: { \$exists: true } }) \
.forEach(function(asset) { \
splitPath = asset.legacy_url_path.split('/'); \
splitPath[splitPath.length - 1] = 'redacted.pdf'; \
asset.legacy_url_path = splitPath.join('/'); \
db.assets.save(asset); \
});"
}
function postprocess_database {
case "${database}" in
router) postprocess_router;;
# re-using postprocess_router below is not a typo - the script checks $database to determine where to apply changes.
draft_router) postprocess_router;;
signon_production) postprocess_signon_production;;
govuk_assets_production) postprocess_govuk_assets_production;;
*) log "No post processing needed for ${database}" ;;
esac
}
function s3_sync {
if "${delete}"; then
aws s3 sync --acl bucket-owner-full-control --delete --only-show-errors s3://"${source_bucket}" s3://"${destination_bucket}"
else
aws s3 sync --acl bucket-owner-full-control --only-show-errors s3://"${source_bucket}" s3://"${destination_bucket}"
fi
}
usage() {
printf "Usage: %s [-f configfile | -a action -D DBMS -S storagebackend -T temppath -d db_name -H db_hostname -u storage_url -p storage_path] [-t timestamp_to_restore]\\n" "$(basename "$0")"
exit 0
}
while getopts "f:a:D:S:T:d:H:u:p:s:F:t:h" opt
do
case "$opt" in
f) configfile="$OPTARG";
# shellcheck disable=SC1090
source "${configfile}" ;;
a) action="$OPTARG" ;;
D) dbms="$OPTARG" ;;
S) storagebackend="$OPTARG" ;;
T) temppath="$OPTARG" ;;
d) database="$OPTARG" ;;
H) database_hostname="$OPTARG" ;;
u) url="$OPTARG" ;;
p) path="$OPTARG" ;;
s) transformation_sql_file="$OPTARG" ;;
F) pre_dump_transformation_sql_file="$OPTARG" ;;
e) excluded_tables="$OPTARG" ;;
t) timestamp="$OPTARG" ;;
*) usage ;;
esac
done
: "${action?"No action specified (pass -a option)"}"
if [ "${action}" == "s3_sync" ]; then
: "${source_bucket?"No source S3 bucket specified (set in config file)"}"
: "${destination_bucket?"No destination S3 bucket specified (set in config file)"}"
else
: "${dbms?"No DBMS specified (pass -D option)"}"
: "${storagebackend?"No storagebackend specified (pass -S option)"}"
: "${temppath?"No temppath specified (pass -T option)"}"
: "${database?"No database name specified (pass -d option)"}"
: "${url?"No storage url specified (pass -u option)"}"
: "${path?"No storage path specified (pass -p option)"}"
if [ -z "${database_hostname:-}" ] && ([ "$dbms" == "mysql" ] || [ "$dbms" == "postgresql" ]); then
echo "$dbms usage requires a database hostname argument"
exit 1
fi
if [[ "$dbms" == "elasticsearch" ]] && [[ "$storagebackend" != "elasticsearch" ]]; then
echo "$dbms is only compatible with the elasticsearch storage backend"
exit 1
fi
if [[ "$storagebackend" == "elasticsearch" ]] && [[ "$dbms" != "elasticsearch" ]]; then
echo "$dbms is not compatible with the $storagebackend storage backend"
exit 1
fi
fi
# Let syslog know we are here
log "Starting \"$0 ${args[*]:-''}\""
# Setting default nagios response to failed
if [ "${action}" == "s3_sync" ]; then
nagios_message="CRITICAL: govuk_env_sync.sh ${action} of ${destination_bucket} from ${source_bucket}"
else
nagios_message="CRITICAL: govuk_env_sync.sh ${action} ${database}: ${storagebackend}://${url}/${path}/ <-> $dbms"
fi
nagios_code=2
case ${action} in
push)
if [ "${dbms}" == "mongo" ] && [ "$(is_writable_mongo)" != "true" ]; then
log "This machine is not a mongo master. Skipping."
else
create_tempdir
create_timestamp
set_filename
"dump_${dbms}"
"push_${storagebackend}"
remove_tempdir
fi
;;
pull)
if [ "$("is_writable_${dbms}")" != "true" ]; then
log "${dbms} is not writeable. Skipping."
else
create_tempdir
"get_timestamp_${storagebackend}"
set_filename
"pull_${storagebackend}"
"restore_${dbms}"
remove_tempdir
postprocess_database
fi
;;
s3_sync)
s3_sync
;;
esac
# The script arrived here without detour to throw_error/exit
if [ "${action}" == "s3_sync" ]; then
nagios_message="OK: govuk_env_sync.sh ${action} of ${destination_bucket} from ${source_bucket}"
else
nagios_message="OK: govuk_env_sync.sh ${action} ${database}: ${storagebackend}://${url}/${path}/ <-> $dbms"
fi
nagios_code=0
log "Completed \"$0 ${args[*]:-''}\""