Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Change multiple exit points in archive_func to a simple try ... excep…

…t block. Note that this changes the behavior of the archiver from ignoring the hosts that we failed to both rsync the archive file to and copy the archive file to the queue to exiting on the first failure and letting PostgreSQL retry the whole operation (for every host) later. That's probably ok, cause the former behavior lead to the loss of archive segments, in case the process failed for one of the slaves and succeeded for subsequent ones, the new code is free from this defect.
  • Loading branch information...
commit f72a36ed26cafcdfa1366c789b626d78021fa61c 1 parent 537f1f0
@alexeyklyukin alexeyklyukin authored soulhunter committed
Showing with 78 additions and 78 deletions.
  1. +78 −78 cmd_archiver
View
156 cmd_archiver
@@ -78,6 +78,10 @@ pgdata = config.defaults()['pgdata']
rsync_version = config.defaults()['rsync_version']
ssh_debug = get_conf(config, 'ssh_debug', 'off')
+class ArchiveFailure(Exception):
+ """ Class to propagate archiving failures """
+ pass
+
"""
If we are not online, exit immediately
"""
@@ -261,94 +265,90 @@ def archive_func():
"""
if debug == 'on':
print "NOTICE: archive_func()"
-
- # First we send the queue files (if any). If we can't we exit
- queue = send_queue_func()
- if queue:
- if debug == 'on':
- print "NOTICE: queue = " + str(queue)
- print "ERROR: Unable to send queued archived files, queueing"
- system("%s" % (str(notify_warning)))
- slaves = generate_slave_list_func()
- if debug == 'on':
- print "NOTICE: slaves = generate_slave_list_func() " + str(slaves)
- for host in slaves:
+ try:
+ # First we send the queue files (if any). If we can't we exit
+ queue = send_queue_func()
+ if queue:
if debug == 'on':
- print "NOTICE: " + host + " in " + str(slaves)
+ print "NOTICE: queue = " + str(queue)
+ print "ERROR: Unable to send queued archived files, queueing"
+ system("%s" % (str(notify_warning)))
+ slaves = generate_slave_list_func()
+ if debug == 'on':
+ print "NOTICE: slaves = generate_slave_list_func() " + str(slaves)
+ for host in slaves:
+ if debug == 'on':
+ print "NOTICE: " + host + " in " + str(slaves)
- # If the host returned is in the list, we automatically
- # archive to the queue.
+ # If the host returned is in the list, we automatically
+ # archive to the queue.
- # Olek's fix
- #if host == queue:
- if host in queue:
- if debug == 'on':
- print "NOTICE: Saving archives to queue"
+ if host in queue:
+ if debug == 'on':
+ print "NOTICE: Saving archives to queue"
+ queue_dir = l_archivedir + "/" + str(host)
+ queue_transfer = """%s %s %s""" % (str(rsync_bin), str(archivefile), str(queue_dir))
+ retval = system(queue_transfer)
+ if retval:
+ system("%s %d" % (str(notify_critical), retval))
+ raise ArchiveFailure
+ else:
+ if debug == 'on':
+ print "NOTICE: Sending OK alert"
+ system("%s %d" % (str(notify_ok), retval))
+
+ # If the host returned is not in the list, we attempt to
+ # archive normally. If we can not, we archive to the queue. If we
+ # can not archive to the queue, we exit critical.
+
+ # You may end up with files out of order on the slave if the
+ # slave comes online after the queue check but before the current
+ # transfer. This is not a problem because pg_standby will only restore
+ # files in order, so on the next queue check the slave will receive
+ # the missing files and pg_standby will correctly restore them.
+ if debug == 'on':
+ print "NOTICE: Entering single file archive transfer"
+ for host in generate_slave_list_func():
+ if debug == 'on':
+ print "NOTICE: Archiving for: " + str(host)
+ if flush:
+ rsync_transfer = """%s %s %s/pg_xlog/* -e "ssh %s" %s@%s:%s""" % (str(rsync_bin), str(rsync_flags), str(pgdata), str(ssh_flags), str(user), str(host), str(r_archivedir))
+ flush_check_func()
+ check = check_pgpid_func()
+ if check == 0:
+ print "ERROR: Can not enter flush mode if PG is already running"
+ raise ArchiveFailure
+ else:
+ rsync_transfer = """%s %s -q -e "ssh %s" %s %s@%s:%s""" % (str(rsync_bin), str(rsync_flags), str(ssh_flags), str(archivefile), str(user), str(host), str(r_archivedir))
+ if debug == 'on':
+ print "NOTICE: Shipping archive to: " + str(host)
+ print "NOTICE: Using: " + rsync_transfer
+ retval = system("%s" % (rsync_transfer))
+ if retval:
+ print "NOTICE: no luck shipping archive"
queue_dir = l_archivedir + "/" + str(host)
queue_transfer = """%s %s %s""" % (str(rsync_bin), str(archivefile), str(queue_dir))
retval = system(queue_transfer)
if retval:
+ print "FATAL: Unabled to rsync_transfer or queue_transfer"
system("%s %d" % (str(notify_critical), retval))
- exit(1)
+ raise ArchiveFailure
else:
- if debug == 'on':
- print "NOTICE: Sending OK alert"
- system("%s %d" % (str(notify_ok), retval))
-
- # If the host returned is not in the list, we attempt to
- # archive normally. If we can not, we archive to the queue. If we
- # can not archive to the queue, we exit critical.
-
- # You may end up with files out of order on the slave if the
- # slave comes online after the queue check but before the current
- # transfer. This is not a problem because pg_standby will only restore
- # files in order, so on the next queue check the slave will receive
- # the missing files and pg_standby will correctly restore them.
-
- exit_with = None
-
- if debug == 'on':
- print "NOTICE: Entering single file archive transfer"
- for host in generate_slave_list_func():
- if debug == 'on':
- print "NOTICE: Archiving for: " + str(host)
- if flush:
- rsync_transfer = """%s %s %s/pg_xlog/* -e "ssh %s" %s@%s:%s""" % (str(rsync_bin), str(rsync_flags), str(pgdata), str(ssh_flags), str(user), str(host), str(r_archivedir))
- flush_check_func()
- check = check_pgpid_func()
- if check == 0:
- print "ERROR: Can not enter flush mode if PG is already running"
- exit(1)
- else:
- rsync_transfer = """%s %s -q -e "ssh %s" %s %s@%s:%s""" % (str(rsync_bin), str(rsync_flags), str(ssh_flags), str(archivefile), str(user), str(host), str(r_archivedir))
- if debug == 'on':
- print "NOTICE: Shipping archive to: " + str(host)
- print "NOTICE: Using: " + rsync_transfer
- retval = system("%s" % (rsync_transfer))
- if retval:
- # Olek's fix
- print "NOTICE: no luck shipping archive"
- queue_dir = l_archivedir + "/" + str(host)
- queue_transfer = """%s %s %s""" % (str(rsync_bin), str(archivefile), str(queue_dir))
- retval = system(queue_transfer)
- if retval:
- print "FATAL: Unabled to rsync_transfer or queue_transfer"
- system("%s %d" % (str(notify_critical), retval))
- # Olek's fix
- #exit(1)
- exit_with = 1
+ retval = system("%s %d" % (str(notify_warning), retval))
else:
- retval = system("%s %d" % (str(notify_warning), retval))
- # Olek's fix
- #exit(0)
- exit_with = 0
- else:
- if debug == 'on':
- print "NOTICE: Sending OK alert"
- system("%s %d" % (str(notify_ok), retval))
- # Olek's fix
- if exit_with:
- exit(exit_with)
+ if debug == 'on':
+ print "NOTICE: Sending OK alert"
+ system("%s %d" % (str(notify_ok), retval))
+ except ArchiveFailure:
+ # archiver process was unable to archive the wal segment
+ exit(1)
+ except Exception, e:
+ # generic exception
+ print "ERROR: %s" % (e,)
+ exit(2)
+ else:
+ # WAL segment successfully archived or queued
+ exit(0)
# set up our transfer commands
Please sign in to comment.
Something went wrong with that request. Please try again.