From 11d984585f60796ba2c0cc6f7c5fe9efde93bdf0 Mon Sep 17 00:00:00 2001 From: Joerg Steffens Date: Sat, 28 Apr 2018 16:51:26 +0200 Subject: [PATCH 01/13] chunked_device: fix inflight counter --- src/stored/backends/chunked_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stored/backends/chunked_device.c b/src/stored/backends/chunked_device.c index d9205ed5afa..4c56a22c572 100644 --- a/src/stored/backends/chunked_device.c +++ b/src/stored/backends/chunked_device.c @@ -301,7 +301,7 @@ void chunked_device::clear_inflight_chunk(chunk_io_request *request) } P(mutex); - m_inflight_chunks++; + m_inflight_chunks--; V(mutex); } From 5b6f33e394eb6b8de682ba185e5cb169cb4c6728 Mon Sep 17 00:00:00 2001 From: Joerg Steffens Date: Tue, 10 Apr 2018 18:54:24 +0200 Subject: [PATCH 02/13] bareos-storage-droplet: improve error handling Previos version did not always return an error, if data could not be written. Especially the load_chunk ignored EIO errors, properly because of a typo. As the droplet_device in iothread mode relies on asynchronious write-backs, the new device method flush() has been introduced. If a droplet_device is configured to use iothreads and unlimited retries, this will do busy waiting until all data is written to the droplet backend. In case of a connection problems to the droplet_device, this will be forever. Note that a bconsole "status storage=..." command will inform about "Pending IO flush requests". Fixes #892: bareos-storage-droplet: if configured with unreachable S3 system, backup will terminate with OK --- src/stored/acquire.c | 20 ++++- src/stored/backends/Makefile.in | 10 +-- src/stored/backends/chunked_device.c | 116 +++++++++++++++++++++++++-- src/stored/backends/chunked_device.h | 11 +++ src/stored/backends/droplet_device.c | 96 +++++++++++++++++++++- src/stored/backends/droplet_device.h | 5 +- src/stored/dev.c | 2 +- src/stored/dev.h | 4 +- 8 files changed, 246 insertions(+), 18 deletions(-) diff --git a/src/stored/acquire.c b/src/stored/acquire.c index b0cb8a993bd..9e787a33643 100644 --- a/src/stored/acquire.c +++ b/src/stored/acquire.c @@ -3,7 +3,7 @@ Copyright (C) 2002-2013 Free Software Foundation Europe e.V. Copyright (C) 2011-2012 Planets Communications B.V. - Copyright (C) 2013-2013 Bareos GmbH & Co. KG + Copyright (C) 2013-2018 Bareos GmbH & Co. KG This program is Free Software; you can redistribute it and/or modify it under the terms of version three of the GNU Affero General Public @@ -496,6 +496,24 @@ bool release_device(DCR *dcr) now = (utime_t)time(NULL); update_job_statistics(jcr, now); + /* + * Some devices do cache write operations (e.g. droplet_device). + * Therefore flushing the cache is required to determine + * if a job have been written successfully. + * As a flush operation can take quite a long time, + * this must be done before acquiring locks. + * A previous implementation did the flush inside dev->close(), + * which resulted in various locking problems. + */ + if (!job_canceled(jcr)) { + Jmsg(jcr, M_INFO, 0, "Flushing device %s.\n", dev->print_name()); + if (!dev->flush(dcr)) { + Jmsg(jcr, M_FATAL, 0, "Failed to flush device %s.\n", dev->print_name()); + } else { + Jmsg(jcr, M_INFO, 0, "Device %s flushed.\n", dev->print_name()); + } + } + dev->Lock(); if (!dev->is_blocked()) { block_device(dev, BST_RELEASING); diff --git a/src/stored/backends/Makefile.in b/src/stored/backends/Makefile.in index d714a0517bb..03d085c2f8c 100644 --- a/src/stored/backends/Makefile.in +++ b/src/stored/backends/Makefile.in @@ -80,27 +80,27 @@ STORED_RESTYPES = autochanger device director ndmp messages storage $(NO_ECHO)$(LIBTOOL_COMPILE) $(CXX) $(DEFS) $(DEBUG) -c $(WCFLAGS) $(CPPFLAGS) $(INCLUDES) $(DINCLUDE) $(CXXFLAGS) $< if [ -d "$(@:.lo=.d)" ]; then $(MKDIR) $(CONF_EXTRA_DIR); $(CP) -r $(@:.lo=.d)/. $(CONF_EXTRA_DIR)/.; fi -$(CHEPHFS_LOBJS): +$(CHEPHFS_LOBJS): $(CHEPHFS_SRCS) @echo "Compiling $(@:.lo=.c)" $(NO_ECHO)$(LIBTOOL_COMPILE) $(CXX) $(DEFS) $(DEBUG) -c $(WCFLAGS) $(CPPFLAGS) $(INCLUDES) $(CEPHFS_INC) $(DINCLUDE) $(CXXFLAGS) $(@:.lo=.c) if [ -d "$(@:.lo=.d)" ]; then $(MKDIR) $(CONF_EXTRA_DIR); $(CP) -r $(@:.lo=.d)/. $(CONF_EXTRA_DIR)/.; fi -$(DROPLET_LOBJS): +$(DROPLET_LOBJS): $(DROPLET_SRCS) @echo "Compiling $(@:.lo=.c)" $(NO_ECHO)$(LIBTOOL_COMPILE) $(CXX) $(DEFS) $(DEBUG) -c $(WCFLAGS) $(CPPFLAGS) $(INCLUDES) $(DROPLET_INC) $(DINCLUDE) $(CXXFLAGS) $(@:.lo=.c) if [ -d "$(@:.lo=.d)" ]; then $(MKDIR) $(CONF_EXTRA_DIR); $(CP) -r $(@:.lo=.d)/. $(CONF_EXTRA_DIR)/.; fi -$(ELASTO_LOBJS): +$(ELASTO_LOBJS): $(ELASTO_SRCS) @echo "Compiling $(@:.lo=.c)" $(NO_ECHO)$(LIBTOOL_COMPILE) $(CXX) $(DEFS) $(DEBUG) -c $(WCFLAGS) $(CPPFLAGS) $(INCLUDES) $(ELASTO_INC) $(DINCLUDE) $(CXXFLAGS) $(@:.lo=.c) if [ -d "$(@:.lo=.d)" ]; then $(MKDIR) $(CONF_EXTRA_DIR); $(CP) -r $(@:.lo=.d)/. $(CONF_EXTRA_DIR)/.; fi -$(GFAPI_LOBJS): +$(GFAPI_LOBJS): $(GFAPI_SRCS) @echo "Compiling $(@:.lo=.c)" $(NO_ECHO)$(LIBTOOL_COMPILE) $(CXX) $(DEFS) $(DEBUG) -c $(WCFLAGS) $(CPPFLAGS) $(INCLUDES) $(GLUSTER_INC) $(DINCLUDE) $(CXXFLAGS) $(@:.lo=.c) if [ -d "$(@:.lo=.d)" ]; then $(MKDIR) $(CONF_EXTRA_DIR); $(CP) -r $(@:.lo=.d)/. $(CONF_EXTRA_DIR)/.; fi -$(RADOS_LOBJS): +$(RADOS_LOBJS): $(RADOS_SRCS) @echo "Compiling $(@:.lo=.c)" $(NO_ECHO)$(LIBTOOL_COMPILE) $(CXX) $(DEFS) $(DEBUG) -c $(WCFLAGS) $(CPPFLAGS) $(INCLUDES) $(RADOS_INC) $(DINCLUDE) $(CXXFLAGS) $(@:.lo=.c) if [ -d "$(@:.lo=.d)" ]; then $(MKDIR) $(CONF_EXTRA_DIR); $(CP) -r $(@:.lo=.d)/. $(CONF_EXTRA_DIR)/.; fi diff --git a/src/stored/backends/chunked_device.c b/src/stored/backends/chunked_device.c index 4c56a22c572..a2bc9baa1eb 100644 --- a/src/stored/backends/chunked_device.c +++ b/src/stored/backends/chunked_device.c @@ -2,6 +2,7 @@ BAREOS® - Backup Archiving REcovery Open Sourced Copyright (C) 2015-2017 Planets Communications B.V. + Copyright (C) 2017-2018 Bareos GmbH & Co. KG This program is Free Software; you can redistribute it and/or modify it under the terms of version three of the GNU Affero General Public @@ -71,7 +72,7 @@ static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; * * flush_remote_chunk() - Flush a chunk to the remote backing store. * read_remote_chunk() - Read a chunk from the remote backing store. - * chunked_remote_volume_size - Return the current size of a volume. + * chunked_remote_volume_size() - Return the current size of a volume. * truncate_remote_chunked_volume() - Truncate a chunked volume on the * remote backing store. */ @@ -657,16 +658,26 @@ bool chunked_device::read_chunk() /* * Setup a chunked volume for reading or writing. + * return: + * -1: failure + * 0: success */ int chunked_device::setup_chunk(const char *pathname, int flags, int mode) { + int retval = -1; /* * If device is (re)opened and we are put into readonly mode because * of problems flushing chunks to the backing store we return EROFS * to the upper layers. */ if ((flags & O_RDWR) && m_readonly) { - dev_errno = EROFS; + dev_errno = EROFS; /**< Read-only file system */ + return -1; + } + + if (!check_remote()) { + Dmsg0(100, "setup_chunk failed, as remote device is not available\n"); + dev_errno = EIO; /**< I/O error */ return -1; } @@ -698,7 +709,6 @@ int chunked_device::setup_chunk(const char *pathname, int flags, int mode) m_current_chunk->writing = true; } - m_current_chunk->opened = true; m_current_chunk->chunk_setup = false; /* @@ -727,7 +737,23 @@ int chunked_device::setup_chunk(const char *pathname, int flags, int mode) m_current_volname = bstrdup(getVolCatName()); - return 0; + /* + * in principle it is not required to load_chunk(), + * but we need a secure way to determine, + * if the chunk already exists. + */ + if (load_chunk()) { + m_current_chunk->opened = true; + retval = 0; + } else if (flags & O_CREAT) { + /* create a chunk */ + if (flush_chunk(false /* release */, false /* move_to_next_chunk */)) { + m_current_chunk->opened = true; + retval = 0; + } + } + + return retval; } /* @@ -1013,6 +1039,7 @@ int chunked_device::close_chunk() m_current_chunk->buflen = 0; m_current_chunk->start_offset = -1; m_current_chunk->end_offset = -1; + } else { errno = EBADF; } @@ -1151,6 +1178,62 @@ ssize_t chunked_device::chunked_volume_size() return chunked_remote_volume_size(); } + +bool chunked_device::is_written() +{ + /* + * See if we are using io-threads or not and the ordered circbuf is created. + * We try to make sure that nothing of the volume being requested is still inflight as then + * the chunked_remote_volume_size() method will fail to determine the size of the data as + * its not fully stored on the backing store yet. + */ + + /* + * Make sure there is also nothing inflight to the backing store anymore. + */ + if (nr_inflight_chunks() > 0) { + Dmsg0(100, "is_written = false, as there are inflight chunks\n"); + return false; + } + + if (m_io_threads > 0 && m_cb) { + + if (!m_cb->empty()) { + + chunk_io_request *request; + + /* + * Peek on the ordered circular queue if there are any pending IO-requests + * for this volume. If there are use that as the indication of the size of + * the volume and don't contact the remote storage as there is still data + * inflight and as such we need to look at the last chunk that is still not + * uploaded of the volume. + */ + request = (chunk_io_request *)m_cb->peek(PEEK_FIRST, m_current_volname, compare_volume_name); + if (request) { + free(request); + Dmsg0(100, "is_written = false, as there are queued write requests\n"); + return false; + } + } + } + + return true; +} + + +/* + * Busy waits until write buffer is empty. + */ +bool chunked_device::wait_until_chunks_written() +{ + while (!is_written()) { + bmicrosleep(DEFAULT_RECHECK_INTERVAL_WRITE_BUFFER, 0); + } + return true; +} + + static int clone_io_request(void *item1, void *item2) { chunk_io_request *src = (chunk_io_request *)item1; @@ -1271,6 +1354,7 @@ bool chunked_device::load_chunk() if (m_current_chunk->writing) { m_current_chunk->end_offset = start_offset + (m_current_chunk->chunk_size - 1); } + return false; break; default: return false; @@ -1290,7 +1374,7 @@ static int list_io_request(void *request, void *data) bsdDevStatTrig *dst = (bsdDevStatTrig *)data; POOL_MEM status(PM_MESSAGE); - status.bsprintf(" /%s/%04d - %ld\n", io_request->volname, io_request->chunk, io_request->wbuflen); + status.bsprintf(" /%s/%04d - %ld (try=%d)\n", io_request->volname, io_request->chunk, io_request->wbuflen, io_request->tries); dst->status_length = pm_strcat(dst->status, status.c_str()); return 0; @@ -1304,20 +1388,36 @@ bool chunked_device::device_status(bsdDevStatTrig *dst) /* * See if we are using io-threads or not and the ordered circbuf is created and not empty. */ + bool pending = false; + POOL_MEM inflights(PM_MESSAGE); + dst->status_length = 0; + if (check_remote()) { + dst->status_length = pm_strcpy(dst->status, _("Backend connection is working.\n")); + } else { + dst->status_length = pm_strcpy(dst->status, _("Backend connection is not working.\n")); + } if (m_io_threads > 0 && m_cb) { + if (nr_inflight_chunks() > 0) { + pending = true; + inflights.bsprintf("Inflight chunks: %d\n", nr_inflight_chunks()); + dst->status_length = pm_strcat(dst->status, inflights.c_str()); + } if (!m_cb->empty()) { - dst->status_length = pm_strcpy(dst->status, _("Pending IO flush requests:\n")); + pending = true; + dst->status_length = pm_strcat(dst->status, _("Pending IO flush requests:\n")); /* * Peek on the ordered circular queue and list all pending requests. */ m_cb->peek(PEEK_LIST, dst, list_io_request); - } else { - dst->status_length = pm_strcpy(dst->status, _("No Pending IO flush requests\n")); } } + if (!pending) { + dst->status_length += pm_strcat(dst->status, _("No Pending IO flush requests.\n")); + } + return (dst->status_length > 0); } diff --git a/src/stored/backends/chunked_device.h b/src/stored/backends/chunked_device.h index 957106790a3..0329b7e8d2c 100644 --- a/src/stored/backends/chunked_device.h +++ b/src/stored/backends/chunked_device.h @@ -2,6 +2,7 @@ BAREOS® - Backup Archiving REcovery Open Sourced Copyright (C) 2015-2017 Planets Communications B.V. + Copyright (C) 2018-2018 Bareos GmbH & Co. KG This program is Free Software; you can redistribute it and/or modify it under the terms of version three of the GNU Affero General Public @@ -32,6 +33,12 @@ */ #define DEFAULT_RECHECK_INTERVAL 300 +/* + * Recheck interval when waiting that buffer gets written + * (write buffer is empty). + */ +#define DEFAULT_RECHECK_INTERVAL_WRITE_BUFFER 10 + /* * Chunk the volume into chunks of this size. * This is the lower limit used the exact chunksize is @@ -112,6 +119,7 @@ class chunked_device: public DEVICE { bool enqueue_chunk(chunk_io_request *request); bool flush_chunk(bool release_chunk, bool move_to_next_chunk); bool read_chunk(); + bool is_written(); protected: /* @@ -138,10 +146,13 @@ class chunked_device: public DEVICE { bool truncate_chunked_volume(DCR *dcr); ssize_t chunked_volume_size(); bool load_chunk(); + bool wait_until_chunks_written(); /* * Methods implemented by inheriting class. */ + virtual bool check_remote() = 0; + virtual bool remote_chunked_volume_exists() = 0; virtual bool flush_remote_chunk(chunk_io_request *request) = 0; virtual bool read_remote_chunk(chunk_io_request *request) = 0; virtual ssize_t chunked_remote_volume_size() = 0; diff --git a/src/stored/backends/droplet_device.c b/src/stored/backends/droplet_device.c index b6a3faa9ef0..f05f18282e9 100644 --- a/src/stored/backends/droplet_device.c +++ b/src/stored/backends/droplet_device.c @@ -2,7 +2,7 @@ BAREOS® - Backup Archiving REcovery Open Sourced Copyright (C) 2014-2017 Planets Communications B.V. - Copyright (C) 2014-2014 Bareos GmbH & Co. KG + Copyright (C) 2014-2018 Bareos GmbH & Co. KG This program is Free Software; you can redistribute it and/or modify it under the terms of version three of the GNU Affero General Public @@ -140,6 +140,9 @@ static inline int droplet_errno_to_system_errno(dpl_status_t status) case DPL_EPERM: errno = EPERM; break; + case DPL_FAILURE: /**< General failure */ + errno = EIO; + break; default: errno = EINVAL; break; @@ -270,6 +273,91 @@ static bool walk_dpl_directory(dpl_ctx_t *ctx, const char *dirname, t_call_back return true; } + +/* + * + * Checks is connection to backend storage system is possible. + * + * Returns true - if connection can be established + * false - otherwise + * + * FIXME: currently, check_remote() returns true, + * after an initial connection could be made, + * even if the system is now no more reachable. + * Seams to be some caching effect. + */ +bool droplet_device::check_remote() +{ + bool retval = false; + dpl_status_t status; + dpl_sysmd_t *sysmd = NULL; + + if (!m_ctx) { + if (!initialize()) { + return false; + } + } + + sysmd = dpl_sysmd_dup(&m_sysmd); + status = dpl_getattr(m_ctx, /* context */ + "", /* locator */ + NULL, /* metadata */ + sysmd); /* sysmd */ + + switch (status) { + case DPL_SUCCESS: + Dmsg0(100, "check_remote: ok\n"); + retval = true; + break; + case DPL_ENOENT: + case DPL_FAILURE: + default: + Dmsg0(100, "check_remote: failed\n"); + break; + } + + return retval; +} + + + +bool droplet_device::remote_chunked_volume_exists() +{ + bool retval = false; + dpl_status_t status; + dpl_sysmd_t *sysmd = NULL; + POOL_MEM chunk_dir(PM_FNAME); + + if (!check_remote()) { + return false; + } + + Mmsg(chunk_dir, "/%s", getVolCatName()); + + Dmsg1(100, "checking remote_chunked_volume_exists %s\n", chunk_dir.c_str()); + + sysmd = dpl_sysmd_dup(&m_sysmd); + status = dpl_getattr(m_ctx, /* context */ + chunk_dir.c_str(), /* locator */ + NULL, /* metadata */ + sysmd); /* sysmd */ + + switch (status) { + case DPL_SUCCESS: + Dmsg1(100, "remote_chunked_volume %s exists\n", chunk_dir.c_str()); + retval = true; + break; + case DPL_ENOENT: + case DPL_FAILURE: + default: + Dmsg1(100, "remote_chunked_volume %s does not exists\n", chunk_dir.c_str()); + break; + } + + return retval; +} + + /* * Internal method for flushing a chunk to the backing store. * This does the real work either by being called from a @@ -514,6 +602,12 @@ bool droplet_device::truncate_remote_chunked_volume(DCR *dcr) return true; } + +bool droplet_device::d_flush(DCR *dcr) +{ + return wait_until_chunks_written(); +}; + /* * Initialize backend. */ diff --git a/src/stored/backends/droplet_device.h b/src/stored/backends/droplet_device.h index 7e1b25df3a0..1ecb15e17ad 100644 --- a/src/stored/backends/droplet_device.h +++ b/src/stored/backends/droplet_device.h @@ -2,7 +2,7 @@ BAREOS® - Backup Archiving REcovery Open Sourced Copyright (C) 2014-2017 Planets Communications B.V. - Copyright (C) 2014-2014 Bareos GmbH & Co. KG + Copyright (C) 2014-2018 Bareos GmbH & Co. KG This program is Free Software; you can redistribute it and/or modify it under the terms of version three of the GNU Affero General Public @@ -53,6 +53,8 @@ class droplet_device: public chunked_device { /* * Interface from chunked_device */ + bool check_remote(); + bool remote_chunked_volume_exists(); bool flush_remote_chunk(chunk_io_request *request); bool read_remote_chunk(chunk_io_request *request); ssize_t chunked_remote_volume_size(); @@ -75,5 +77,6 @@ class droplet_device: public chunked_device { ssize_t d_read(int fd, void *buffer, size_t count); ssize_t d_write(int fd, const void *buffer, size_t count); bool d_truncate(DCR *dcr); + bool d_flush(DCR *dcr); }; #endif /* OBJECTSTORE_DEVICE_H */ diff --git a/src/stored/dev.c b/src/stored/dev.c index 141e8d4f80c..cb67e29375b 100644 --- a/src/stored/dev.c +++ b/src/stored/dev.c @@ -704,7 +704,7 @@ bool DEVICE::rewind(DCR *dcr) if (lseek(dcr, (boffset_t)0, SEEK_SET) < 0) { berrno be; dev_errno = errno; - Mmsg2(errmsg, _("lseek error on %s. ERR=%s.\n"), print_name(), be.bstrerror()); + Mmsg2(errmsg, _("lseek error on %s. ERR=%s"), print_name(), be.bstrerror()); return false; } diff --git a/src/stored/dev.h b/src/stored/dev.h index 4a87b9f6203..940c25635bf 100644 --- a/src/stored/dev.h +++ b/src/stored/dev.h @@ -3,7 +3,7 @@ Copyright (C) 2000-2012 Free Software Foundation Europe e.V. Copyright (C) 2011-2012 Planets Communications B.V. - Copyright (C) 2013-2017 Bareos GmbH & Co. KG + Copyright (C) 2013-2018 Bareos GmbH & Co. KG This program is Free Software; you can redistribute it and/or modify it under the terms of version three of the GNU Affero General Public @@ -517,6 +517,7 @@ class DEVICE: public SMARTALLOC { virtual bool device_status(bsdDevStatTrig *dst) { return false; }; boffset_t lseek(DCR *dcr, boffset_t offset, int whence) { return d_lseek(dcr, offset, whence); }; bool truncate(DCR *dcr) { return d_truncate(dcr); }; + bool flush(DCR *dcr) { return d_flush(dcr); }; /* * Low level operations @@ -528,6 +529,7 @@ class DEVICE: public SMARTALLOC { virtual ssize_t d_write(int fd, const void *buffer, size_t count) = 0; virtual boffset_t d_lseek(DCR *dcr, boffset_t offset, int whence) = 0; virtual bool d_truncate(DCR *dcr) = 0; + virtual bool d_flush(DCR *dcr) { return true; }; /* * Locking and blocking calls From 703e87759e5f1785d8f8f808dfd16d114d8ff932 Mon Sep 17 00:00:00 2001 From: Joerg Steffens Date: Sat, 28 Apr 2018 16:46:05 +0200 Subject: [PATCH 03/13] generate fatal error instead of normal error if connection between director and storage daemons fails this results to failed jobs, instead of terminated with warnings. --- src/dird/msgchan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dird/msgchan.c b/src/dird/msgchan.c index 7fa1d3206d9..acbc435e06a 100644 --- a/src/dird/msgchan.c +++ b/src/dird/msgchan.c @@ -471,7 +471,8 @@ extern "C" void *msg_thread(void *arg) * This probably should be M_FATAL, but I am not 100% sure * that this return *always* corresponds to a dropped line. */ - Qmsg(jcr, M_ERROR, 0, _("Director's comm line to SD dropped.\n")); + //Qmsg(jcr, M_ERROR, 0, _("Director's comm line to SD dropped.\n")); + Qmsg(jcr, M_FATAL, 0, _("Director's comm line to SD dropped.\n")); } if (is_bnet_error(sd)) { jcr->SDJobStatus = JS_ErrorTerminated; From bb2741942be1ee036bbbbfa21aa47c97e7ec74e8 Mon Sep 17 00:00:00 2001 From: Joerg Steffens Date: Sat, 28 Apr 2018 16:48:25 +0200 Subject: [PATCH 04/13] calculate job time after releasing a device This results in a more accurate time period. --- src/stored/append.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/stored/append.c b/src/stored/append.c index ade450a4cf1..2bd63a08e12 100644 --- a/src/stored/append.c +++ b/src/stored/append.c @@ -2,6 +2,7 @@ BAREOS® - Backup Archiving REcovery Open Sourced Copyright (C) 2000-2012 Free Software Foundation Europe e.V. + Copyright (C) 2018-2018 Bareos GmbH & Co. KG This program is Free Software; you can redistribute it and/or modify it under the terms of version three of the GNU Affero General Public @@ -314,6 +315,11 @@ bool do_append_data(JCR *jcr, BSOCK *bs, const char *what) commit_data_spool(dcr); } + /* + * Release the device -- and send final Vol info to DIR and unlock it. + */ + release_device(dcr); + /* * Don't use time_t for job_elapsed as time_t can be 32 or 64 bits, * and the subsequent Jmsg() editing will break @@ -323,15 +329,10 @@ bool do_append_data(JCR *jcr, BSOCK *bs, const char *what) job_elapsed = 1; } - Jmsg(dcr->jcr, M_INFO, 0, _("Elapsed time=%02d:%02d:%02d, Transfer rate=%s Bytes/second\n"), + Jmsg(jcr, M_INFO, 0, _("Elapsed time=%02d:%02d:%02d, Transfer rate=%s Bytes/second\n"), job_elapsed / 3600, job_elapsed % 3600 / 60, job_elapsed % 60, edit_uint64_with_suffix(jcr->JobBytes / job_elapsed, ec)); - /* - * Release the device -- and send final Vol info to DIR and unlock it. - */ - release_device(dcr); - if ((!ok || jcr->is_job_canceled()) && !jcr->is_JobStatus(JS_Incomplete)) { discard_attribute_spool(jcr); } else { From 346907ff228e1d9ac9fc30947d493cb01704aee0 Mon Sep 17 00:00:00 2001 From: Joerg Steffens Date: Sat, 28 Apr 2018 16:52:36 +0200 Subject: [PATCH 05/13] write_block_to_dev: don't retry on EIO, only on EBUSY EIO (io error) is normally permanent, so a retry will not help. When doing a retry on a EIO on the droplet_device, this results in lost data (because of the chunked_device caching). This could be fixed, however this is the quicker solution. --- src/stored/block.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/stored/block.c b/src/stored/block.c index 98777c73de4..14ca5582309 100644 --- a/src/stored/block.c +++ b/src/stored/block.c @@ -3,7 +3,7 @@ Copyright (C) 2001-2012 Free Software Foundation Europe e.V. Copyright (C) 2011-2012 Planets Communications B.V. - Copyright (C) 2013-2013 Bareos GmbH & Co. KG + Copyright (C) 2013-2018 Bareos GmbH & Co. KG This program is Free Software; you can redistribute it and/or modify it under the terms of version three of the GNU Affero General Public @@ -593,7 +593,8 @@ bool DCR::write_block_to_dev() } status = dev->write(block->buf, (size_t)wlen); - } while (status == -1 && (errno == EBUSY || errno == EIO) && retry++ < 3); + //} while (status == -1 && (errno == EBUSY || errno == EIO) && retry++ < 3); + } while (status == -1 && (errno == EBUSY) && retry++ < 3); if (debug_block_checksum) { uint32_t achecksum = ser_block_header(block, dev->do_checksum()); From a9f890ce1225c8b62a4af90c04b747a6c4a178c8 Mon Sep 17 00:00:00 2001 From: Joerg Steffens Date: Sun, 29 Apr 2018 16:55:57 +0200 Subject: [PATCH 06/13] update droplet readme --- README.droplet | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.droplet b/README.droplet index 1e0050b68a2..6bd8ac53013 100644 --- a/README.droplet +++ b/README.droplet @@ -25,11 +25,12 @@ Device { # profile= - Droplet profile to use either absolute PATH or logical name (e.g. ~/.droplet/.profile # location= - AWS location (e.g. us-east etc.) # acl= - Canned ACL - # storageclass - Storage Class to use. + # storageclass= - Storage Class to use. # bucket= - Bucket to store objects in. # chunksize= - Size of Volume Chunks (default = 10 Mb) - # iothreads= - Number of IO-threads to use for upload (use blocking uploads if not defined.) - # ioslots= - Number of IO-slots per IO-thread (default 10) + # iothreads= - Number of IO-threads to use for upload (use blocking uploads if not defined) + # ioslots= - Number of IO-slots per IO-thread (0-255, default 10) + # retries= - Number of retires if a write fails (0-255, default = 0, which means unlimited retries) # mmap - Use mmap to allocate Chunk memory instead of malloc(). # Device Options = "profile=/etc/bareos/bareos-sd.d/.droplet/droplet.profile,bucket=backup-bareos,iothreads=3,ioslots=3,chunksize=100M" From 77792e67be1bbebee9a52e861780a4b23705bf90 Mon Sep 17 00:00:00 2001 From: Joerg Steffens Date: Thu, 3 May 2018 17:37:19 +0200 Subject: [PATCH 07/13] droplet readme: added troubleshooting section --- README.droplet | 84 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 13 deletions(-) diff --git a/README.droplet b/README.droplet index 6bd8ac53013..ec791af911f 100644 --- a/README.droplet +++ b/README.droplet @@ -2,24 +2,22 @@ Using droplet S3 as a backingstore for backups. The droplet S3 storage backend writes chunks of data in an S3 bucket. -For this you need to install the libdroplet-devel and the storage-droplet packages which contains -the libbareossd-chunked*.so and libbareossd-droplet*.so shared objects and the droplet storage backend which implements a dynamic loaded -storage backend. +For this you need to install the the bareos-storage-droplet packages which contains +the libbareossd-chunked*.so and libbareossd-droplet*.so shared objects and the droplet storage backend which implements a dynamic loaded storage backend. In the following example all the backup data is placed in the "bareos-backup" bucket on the defined S3 storage. -A Volume is a sub-directory in the defined bucket, and every chunk is placed in the Volume directory withe the filename 0000-9999 and a size -that is defined in the chunksize. +A volume is a sub-directory in the defined bucket, and every chunk is placed in the Volume directory withe the filename 0000-9999 and a size that is defined in the chunksize. The droplet S3 can only be used with virtual-hosted-style buckets like http://./object Path-style buckets are not supported when using the droplet S3. On the Storage Daemon the following configuration is needed. -Example bareos-sd.d/device file: +Example bareos-sd.d/device/S3_1-00.conf file: Device { Name = "S3_1-00" Media Type = "S3_File_1" - Archive Device = Object S3 Storage + Archive Device = "Object S3 Storage" # # Config options: # profile= - Droplet profile to use either absolute PATH or logical name (e.g. ~/.droplet/.profile @@ -33,22 +31,22 @@ Device { # retries= - Number of retires if a write fails (0-255, default = 0, which means unlimited retries) # mmap - Use mmap to allocate Chunk memory instead of malloc(). # - Device Options = "profile=/etc/bareos/bareos-sd.d/.droplet/droplet.profile,bucket=backup-bareos,iothreads=3,ioslots=3,chunksize=100M" + Device Options = "profile=/etc/bareos/bareos-sd.d/droplet/droplet.profile,bucket=backup-bareos,chunksize=100M" Device Type = droplet LabelMedia = yes # lets Bareos label unlabeled media Random Access = yes AutomaticMount = yes # when device opened, read it RemovableMedia = no AlwaysOpen = no - Description = "Object S3 device. A connecting Director must have the same Name and MediaType." - Maximum File Size = 500M # 500 MB (Allows for seeking to small portions of the Volume) + Description = "S3 device" + Maximum File Size = 500M # 500 MB (allows for seeking to small portions of the Volume) Maximum Concurrent Jobs = 1 Maximum Spool Size = 15000M } The droplet.profile file holds the credentials for S3 storage -Example /etc/bareos/bareos-sd.d/.droplet/droplet.profile file: +Example /etc/bareos/bareos-sd.d/droplet/droplet.profile file: Make sure the file is only readable for bareos, credentials for S3 are listed here. @@ -70,12 +68,72 @@ The parameter "aws_auth_sign_version = 2" is for the connection to a CEPH AWS co For use with AWS S3 the aws_auth_sign_version, must be set to "4". On the Director you connect to the Storage Device with the following configuration -Example bareos-dir.d/storage file: +Example bareos-dir.d/storage/S3_1-00.conf file: Storage { Name = S3_1-00 Address = "Replace this by the Bareos Storage Daemon FQDN or IP address" Password = "Replace this by the Bareos Storage Daemon director password" - Device = S3_ObjectStorage + Device = S3_1-00 Media Type = S3_File_1 } + + +Troubleshooting +=============== + +S3 Backend Unreachable +---------------------- + +The droplet device can run in two modes: + * direct writing (iothreads = 0) + * cached writing (iothreads >= 1) + +If iothreads >= 1, retries = 0 (unlimited retries) and the droplet backend (e.g. S3 storage) is not available, a job will continue running until the backend problem is fixed. +If this is the case and the job is canceled, it will only be canceled on the Director. It continues running on the Storage Daemon, until the S3 backend is available again or the Storage Daemon itself is restarted. + +If iothreads >= 1, retries != 0 and the droplet backend (e.g. S3 storage) is not available, write operation will be silently discarded after the specified number of retries. +*Don't use this combination of options*. + +Caching when S3 backend is not available: +This behaviour have not changed, but I fear problems can arise, if the backend is not available and all write operations are stored in memory. + +The status of the cache can be determined with the "status storage=..." command. + + +Pending IO chunks (and inflight chunks): +``` +... +Device "S3_ObjectStorage" (S3) is mounted with: + Volume: Full-0085 + Pool: Full + Media type: S3_Object1 +Backend connection is working. +Inflight chunks: 2 +Pending IO flush requests: + /Full-0085/0002 - 10485760 (try=0) + /Full-0085/0003 - 10485760 (try=0) + /Full-0085/0004 - 10485760 (try=0) +... +Attached Jobs: 175 +... +``` + +If try > 0, problems did already occur. The system will continue retrying. + + +Status without pending IO chunks: +``` +Device "S3_ObjectStorage" (S3) is mounted with: + Volume: Full-0084 + Pool: Full + Media type: S3_Object1 +Backend connection is working. +No Pending IO flush requests. +Configured device capabilities: + EOF BSR BSF FSR FSF EOM !REM RACCESS AUTOMOUNT LABEL !ANONVOLS !ALWAYSOPEN +Device state: + OPENED !TAPE LABEL !MALLOC APPEND !READ EOT !WEOT !EOF !NEXTVOL !SHORT MOUNTED + num_writers=0 reserves=0 block=8 +Attached Jobs: +``` From df75779019be92d98f214913caf7be37f572c485 Mon Sep 17 00:00:00 2001 From: Frank Bergkemper Date: Thu, 24 Aug 2017 16:24:50 +0200 Subject: [PATCH 08/13] Reset JobStatus to previous JobStatus in status SD and FD loops We need to reset the JobStatus value to its previous after each storage or client status call, otherwise if we receive an JS_Error for one storage or client, all the following status calls will fail as well. This could happen for example if a client or storage is offline but more status calls to other clients and storages will follow in the loop. (cherry picked from commit 751787aa81c60b1003b3c75c1a0fc6d38eb967ee) --- src/dird/ua_status.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/dird/ua_status.c b/src/dird/ua_status.c index 90d648d8409..046f4c5d378 100644 --- a/src/dird/ua_status.c +++ b/src/dird/ua_status.c @@ -251,6 +251,7 @@ static void do_all_status(UAContext *ua) CLIENTRES *client, **unique_client; int i, j; bool found; + int32_t previous_JobStatus = 0; do_director_status(ua); @@ -282,9 +283,12 @@ static void do_all_status(UAContext *ua) } UnlockRes(); + previous_JobStatus = ua->jcr->JobStatus; + /* Call each unique Storage daemon */ for (j = 0; j < i; j++) { storage_status(ua, unique_store[j], NULL); + ua->jcr->JobStatus = previous_JobStatus; } free(unique_store); @@ -316,9 +320,12 @@ static void do_all_status(UAContext *ua) } UnlockRes(); + previous_JobStatus = ua->jcr->JobStatus; + /* Call each unique File daemon */ for (j = 0; j < i; j++) { client_status(ua, unique_client[j], NULL); + ua->jcr->JobStatus = previous_JobStatus; } free(unique_client); From b29bd509dad6b882a89f12015b6b4905ca6115e6 Mon Sep 17 00:00:00 2001 From: Joerg Steffens Date: Thu, 14 Jun 2018 16:57:25 +0200 Subject: [PATCH 09/13] droplet: update example --- README.droplet | 38 +++++++++++-------- .../device/S3_ObjectStorage.conf.example | 27 ++++++++----- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/README.droplet b/README.droplet index ec791af911f..f865abfcde6 100644 --- a/README.droplet +++ b/README.droplet @@ -12,16 +12,17 @@ The droplet S3 can only be used with virtual-hosted-style buckets like http://.profile - # location= - AWS location (e.g. us-east etc.) + # Device Options: + # profile= - Droplet profile path, e.g. /etc/bareos/bareos-sd.d/droplet/droplet.profile + # location= - AWS location (e.g. us-east etc.). Optional. # acl= - Canned ACL # storageclass= - Storage Class to use. # bucket= - Bucket to store objects in. @@ -31,7 +32,13 @@ Device { # retries= - Number of retires if a write fails (0-255, default = 0, which means unlimited retries) # mmap - Use mmap to allocate Chunk memory instead of malloc(). # - Device Options = "profile=/etc/bareos/bareos-sd.d/droplet/droplet.profile,bucket=backup-bareos,chunksize=100M" + + # testing: + Device Options = "profile=/etc/bareos/bareos-sd.d/droplet/droplet.profile,bucket=bareos-bucket,chunksize=100M,iothreads=0,retries=1" + + # performance: + #Device Options = "profile=/etc/bareos/bareos-sd.d/droplet/droplet.profile,bucket=bareos-bucket,chunksize=100M" + Device Type = droplet LabelMedia = yes # lets Bareos label unlabeled media Random Access = yes @@ -44,7 +51,6 @@ Device { Maximum Spool Size = 15000M } - The droplet.profile file holds the credentials for S3 storage Example /etc/bareos/bareos-sd.d/droplet/droplet.profile file: @@ -60,22 +66,22 @@ pricing_dir = "" backend = s3 aws_auth_sign_version = 2 -If the pricing_dir is not empty, it will create an /droplet.csv file wich -will record all S3 operations. -See the 'libdroplet/src/pricing.c' code for an explanation. +If the pricing_dir is not empty, +it will create an /droplet.csv file which will record all S3 operations. +See the code at https://github.com/bareos/Droplet/blob/bareos-master/libdroplet/src/pricing.c for an explanation. -The parameter "aws_auth_sign_version = 2" is for the connection to a CEPH AWS connection. +The parameter "aws_auth_sign_version = 2" is for the connection to a CEPH S3 gateway. For use with AWS S3 the aws_auth_sign_version, must be set to "4". On the Director you connect to the Storage Device with the following configuration Example bareos-dir.d/storage/S3_1-00.conf file: Storage { - Name = S3_1-00 + Name = S3_Object Address = "Replace this by the Bareos Storage Daemon FQDN or IP address" Password = "Replace this by the Bareos Storage Daemon director password" - Device = S3_1-00 - Media Type = S3_File_1 + Device = S3_ObjectStorage + Media Type = S3_Object1 } diff --git a/src/stored/backends/droplet_device.d/bareos-sd.d/device/S3_ObjectStorage.conf.example b/src/stored/backends/droplet_device.d/bareos-sd.d/device/S3_ObjectStorage.conf.example index 1b94a0537da..3eb765cb166 100644 --- a/src/stored/backends/droplet_device.d/bareos-sd.d/device/S3_ObjectStorage.conf.example +++ b/src/stored/backends/droplet_device.d/bareos-sd.d/device/S3_ObjectStorage.conf.example @@ -2,26 +2,35 @@ Device { Name = S3_ObjectStorage Media Type = S3_Object1 Archive Device = S3 Object Storage + # - # Config options: - # profile= - Droplet profile to use either absolute PATH or logical name (e.g. ~/.droplet/.profile - # location= - AWS location (e.g. us-east etc.) + # Device Options: + # profile= - Droplet profile path, e.g. /etc/bareos/bareos-sd.d/droplet/droplet.profile + # location= - AWS location (e.g. us-east etc.). Optional. # acl= - Canned ACL # storageclass= - Storage Class to use. # bucket= - Bucket to store objects in. # chunksize= - Size of Volume Chunks (default = 10 Mb) - # iothreads= - Number of IO-threads to use for uploads (use blocking uploads if not set.) - # ioslots= - Number of IO-slots per IO-thread (default 10) + # iothreads= - Number of IO-threads to use for upload (use blocking uploads if not defined) + # ioslots= - Number of IO-slots per IO-thread (0-255, default 10) + # retries= - Number of retires if a write fails (0-255, default = 0, which means unlimited retries) # mmap - Use mmap to allocate Chunk memory instead of malloc(). # - Device Options = "profile=/etc/bareos/bareos-sd.d/device/droplet/droplet.profile,bucket=bareos,iothreads=2" + + # testing: + Device Options = "profile=/etc/bareos/bareos-sd.d/droplet/droplet.profile,bucket=bareos-bucket,chunksize=100M,iothreads=0,retries=1" + + # performance: + #Device Options = "profile=/etc/bareos/bareos-sd.d/droplet/droplet.profile,bucket=bareos-bucket,chunksize=100M" + Device Type = droplet LabelMedia = yes # lets Bareos label unlabeled media Random Access = yes AutomaticMount = yes # when device opened, read it RemovableMedia = no AlwaysOpen = no - Description = "S3 Object device. A connecting Director must have the same Name and MediaType." - Maximum File Size = 200000000 # 200 MB (Allows for seeking to small portions of the Volume) + Description = "S3 device" + Maximum File Size = 500M # 500 MB (allows for seeking to small portions of the Volume) + Maximum Concurrent Jobs = 1 + Maximum Spool Size = 15000M } - From aa34bddacc6c5242b85c84e6a895451aaf591a14 Mon Sep 17 00:00:00 2001 From: Joerg Steffens Date: Thu, 5 Jul 2018 17:01:23 +0200 Subject: [PATCH 10/13] added comment about changed behavior --- src/dird/msgchan.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/dird/msgchan.c b/src/dird/msgchan.c index acbc435e06a..5cd39fd30db 100644 --- a/src/dird/msgchan.c +++ b/src/dird/msgchan.c @@ -468,10 +468,11 @@ extern "C" void *msg_thread(void *arg) } if (n == BNET_HARDEOF) { /* - * This probably should be M_FATAL, but I am not 100% sure - * that this return *always* corresponds to a dropped line. + * A lost connection to the storage daemon is FATAL. + * This is required, as otherwise + * the job could failed to write data + * but still end as JS_Warnings (OK -- with warnings). */ - //Qmsg(jcr, M_ERROR, 0, _("Director's comm line to SD dropped.\n")); Qmsg(jcr, M_FATAL, 0, _("Director's comm line to SD dropped.\n")); } if (is_bnet_error(sd)) { From c533f28be9c3c96e73d422059803e08407ac0afb Mon Sep 17 00:00:00 2001 From: Stephan Duehr Date: Fri, 6 Jul 2018 15:08:32 +0200 Subject: [PATCH 11/13] Build: Bump version number. --- configure | 18 +++++++++--------- src/include/version.h | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/configure b/configure index 6f4d542dfb6..f2550522ebb 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for bareos 16.2.7. +# Generated by GNU Autoconf 2.69 for bareos 16.2.8. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -587,8 +587,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='bareos' PACKAGE_TARNAME='bareos' -PACKAGE_VERSION='16.2.7' -PACKAGE_STRING='bareos 16.2.7' +PACKAGE_VERSION='16.2.8' +PACKAGE_STRING='bareos 16.2.8' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1652,7 +1652,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures bareos 16.2.7 to adapt to many kinds of systems. +\`configure' configures bareos 16.2.8 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1721,7 +1721,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of bareos 16.2.7:";; + short | recursive ) echo "Configuration of bareos 16.2.8:";; esac cat <<\_ACEOF @@ -1956,7 +1956,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -bareos configure 16.2.7 +bareos configure 16.2.8 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2911,7 +2911,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by bareos $as_me 16.2.7, which was +It was created by bareos $as_me 16.2.8, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -34357,7 +34357,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by bareos $as_me 16.2.7, which was +This file was extended by bareos $as_me 16.2.8, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -34423,7 +34423,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -bareos config.status 16.2.7 +bareos config.status 16.2.8 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/src/include/version.h b/src/include/version.h index 4647e1aa02f..10a1d73c454 100644 --- a/src/include/version.h +++ b/src/include/version.h @@ -1,12 +1,12 @@ #undef VERSION -#define VERSION "16.2.7" -#define BDATE "09 October 2017" -#define LSMDATE "09Oct17" +#define VERSION "16.2.8" +#define BDATE "06 July 2018" +#define LSMDATE "06Jul18" #define PROG_COPYRIGHT "Copyright (C) %d-2012 Free Software Foundation Europe e.V.\n" \ "Copyright (C) 2010-2017 Planets Communications B.V.\n" \ - "Copyright (C) 2013-2017 Bareos GmbH & Co. KG\n" -#define BYEAR "2017" /* year for copyright messages in programs */ + "Copyright (C) 2013-2018 Bareos GmbH & Co. KG\n" +#define BYEAR "2018" /* year for copyright messages in programs */ /* BAREOS® - Backup Archiving REcovery Open Sourced From 4bd389cbbc848fc9861a23c9e46ff0cdaa4769b8 Mon Sep 17 00:00:00 2001 From: Joerg Steffens Date: Mon, 9 Jul 2018 12:49:02 +0200 Subject: [PATCH 12/13] modify job messages when releasing device --- src/stored/acquire.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/stored/acquire.c b/src/stored/acquire.c index 9e787a33643..4fad7aca571 100644 --- a/src/stored/acquire.c +++ b/src/stored/acquire.c @@ -490,6 +490,8 @@ bool release_device(DCR *dcr) char tbuf[100]; int was_blocked = BST_NOT_BLOCKED; + Jmsg(jcr, M_INFO, 0, "Releasing device %s.\n", dev->print_name()); + /* * Capture job statistics now that we are done using this device. */ @@ -506,11 +508,8 @@ bool release_device(DCR *dcr) * which resulted in various locking problems. */ if (!job_canceled(jcr)) { - Jmsg(jcr, M_INFO, 0, "Flushing device %s.\n", dev->print_name()); if (!dev->flush(dcr)) { Jmsg(jcr, M_FATAL, 0, "Failed to flush device %s.\n", dev->print_name()); - } else { - Jmsg(jcr, M_INFO, 0, "Device %s flushed.\n", dev->print_name()); } } @@ -522,7 +521,7 @@ bool release_device(DCR *dcr) dev->set_blocked(BST_RELEASING); } lock_volumes(); - Dmsg2(100, "release_device device %s is %s\n", dev->print_name(), dev->is_tape() ? "tape" : "disk"); + Dmsg1(100, "releasing device %s\n", dev->print_name()); /* * If device is reserved, job never started, so release the reserve here From fa8f15e529ae870014178e24d887075fb89428f9 Mon Sep 17 00:00:00 2001 From: Joerg Steffens Date: Mon, 9 Jul 2018 12:52:34 +0200 Subject: [PATCH 13/13] write_block_to_dev: don't retry on EIO, only on EBUSY: cleanup --- src/stored/block.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/stored/block.c b/src/stored/block.c index 14ca5582309..4249f32304c 100644 --- a/src/stored/block.c +++ b/src/stored/block.c @@ -577,8 +577,9 @@ bool DCR::write_block_to_dev() #endif /* - * Do write here, make a somewhat feeble attempt to recover from - * I/O errors, or from the OS telling us it is busy. + * Do write here, + * make a somewhat feeble attempt to recover + * from the OS telling us it is busy. */ int retry = 0; errno = 0; @@ -592,8 +593,6 @@ bool DCR::write_block_to_dev() dev->clrerror(-1); } status = dev->write(block->buf, (size_t)wlen); - - //} while (status == -1 && (errno == EBUSY || errno == EIO) && retry++ < 3); } while (status == -1 && (errno == EBUSY) && retry++ < 3); if (debug_block_checksum) {