Skip to content

Commit

Permalink
Merge pull request #1732
Browse files Browse the repository at this point in the history
stored: fix authentication race condition / deadlock
  • Loading branch information
BareosBot committed Mar 18, 2024
2 parents 258f0b6 + f229099 commit 61febc7
Show file tree
Hide file tree
Showing 33 changed files with 314 additions and 326 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -66,6 +66,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- plugins: fix cancel handling crash [PR #1595]
- Fix bareos_tasks plugin for pgsql [PR #1659]
- core: Fix compile errors on GCC 14 [PR #1687]
- stored: fix authentication race condition / deadlock [PR #1732]

[PR #1538]: https://github.com/bareos/bareos/pull/1538
[PR #1581]: https://github.com/bareos/bareos/pull/1581
Expand Down Expand Up @@ -119,4 +120,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
[PR #1718]: https://github.com/bareos/bareos/pull/1718
[PR #1719]: https://github.com/bareos/bareos/pull/1719
[PR #1728]: https://github.com/bareos/bareos/pull/1728
[PR #1732]: https://github.com/bareos/bareos/pull/1732
[unreleased]: https://github.com/bareos/bareos/tree/master
3 changes: 2 additions & 1 deletion core/scripts/bareos-ctl-dir.in
Expand Up @@ -55,7 +55,8 @@ case "$1" in
stop)
if [ -x "${BAREOS_DIRECTOR_BINARY}" ]; then
printf "Stopping the Bareos Director daemon: "
killproc "${BAREOS_DIRECTOR_BINARY}" "${BAREOS_DIRECTOR_PORT}"
shift # shift away the stop
killproc "${BAREOS_DIRECTOR_BINARY}" "${BAREOS_DIRECTOR_PORT}" "${@}"
fi
;;

Expand Down
3 changes: 2 additions & 1 deletion core/scripts/bareos-ctl-fd.in
Expand Up @@ -54,7 +54,8 @@ case "$1" in
stop)
if [ -x ${BAREOS_FILEDAEMON_BINARY} ]; then
printf "Stopping the Bareos File daemon: "
killproc ${BAREOS_FILEDAEMON_BINARY} ${BAREOS_FD_PORT}
shift # shift away the stop
killproc ${BAREOS_FILEDAEMON_BINARY} ${BAREOS_FD_PORT} "${@}"
fi
;;

Expand Down
3 changes: 2 additions & 1 deletion core/scripts/bareos-ctl-sd.in
Expand Up @@ -53,7 +53,8 @@ case "$1" in
stop)
if [ -x ${BAREOS_STORAGEDAEMON_BINARY} ]; then
printf "Stopping the Bareos Storage daemon: "
killproc ${BAREOS_STORAGEDAEMON_BINARY} ${BAREOS_SD_PORT}
shift # shift away the stop
killproc ${BAREOS_STORAGEDAEMON_BINARY} ${BAREOS_SD_PORT} "${@}"
fi
;;

Expand Down
7 changes: 4 additions & 3 deletions core/src/dird/director_jcr_impl.h
Expand Up @@ -3,7 +3,7 @@
Copyright (C) 2000-2012 Free Software Foundation Europe e.V.
Copyright (C) 2011-2012 Planets Communications B.V.
Copyright (C) 2013-2023 Bareos GmbH & Co. KG
Copyright (C) 2013-2024 Bareos GmbH & Co. KG
This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
Expand All @@ -28,6 +28,8 @@
#include "dird/client_connection_handshake_mode.h"
#include "dird/job_trigger.h"

#include <condition_variable>

typedef struct s_tree_root TREE_ROOT;

class ConfigResourcesContainer;
Expand Down Expand Up @@ -105,7 +107,7 @@ struct DirectorJcrImpl {
std::shared_ptr<ConfigResourcesContainer> job_config_resources_container_;
pthread_t SD_msg_chan{}; /**< Message channel thread id */
bool SD_msg_chan_started{}; /**< Message channel thread started */
pthread_cond_t term_wait = PTHREAD_COND_INITIALIZER; /**< Wait for job termination */
std::condition_variable term_wait{}; /**< Wait for job termination */
pthread_cond_t nextrun_ready = PTHREAD_COND_INITIALIZER; /**< Wait for job next run to become ready */
Resources res; /**< Resources assigned */
TREE_ROOT* restore_tree_root{}; /**< Selected files to restore (some protocols need this info) */
Expand Down Expand Up @@ -151,7 +153,6 @@ struct DirectorJcrImpl {
int32_t max_concurrent_jobs{}; /**< Maximum concurrent jobs */
bool spool_data{}; /**< Spool data in SD */
bool acquired_resource_locks{}; /**< Set if resource locks acquired */
bool term_wait_inited{}; /**< Set when cond var inited */
bool nextrun_ready_inited{}; /**< Set when cond var inited */
bool fn_printed{}; /**< Printed filename */
bool needs_sd{}; /**< Set if SD needed by Job */
Expand Down
57 changes: 21 additions & 36 deletions core/src/dird/job.cc
Expand Up @@ -140,39 +140,30 @@ bool SetupJob(JobControlRecord* jcr, bool suppress_output)
{
int errstat;

jcr->lock();
{
std::unique_lock l(jcr->mutex_guard());

// See if we should suppress all output.
if (!suppress_output) {
InitMsg(jcr, jcr->dir_impl->res.messages, job_code_callback_director);
} else {
jcr->suppress_output = true;
}
// See if we should suppress all output.
if (!suppress_output) {
InitMsg(jcr, jcr->dir_impl->res.messages, job_code_callback_director);
} else {
jcr->suppress_output = true;
}

// Initialize termination condition variable
if ((errstat = pthread_cond_init(&jcr->dir_impl->term_wait, NULL)) != 0) {
BErrNo be;
Jmsg1(jcr, M_FATAL, 0, T_("Unable to init job cond variable: ERR=%s\n"),
be.bstrerror(errstat));
jcr->unlock();
goto bail_out;
}
jcr->dir_impl->term_wait_inited = true;
// Initialize nextrun ready condition variable
if ((errstat = pthread_cond_init(&jcr->dir_impl->nextrun_ready, NULL))
!= 0) {
BErrNo be;
Jmsg1(jcr, M_FATAL, 0,
T_("Unable to init job nextrun cond variable: ERR=%s\n"),
be.bstrerror(errstat));
goto bail_out;
}
jcr->dir_impl->nextrun_ready_inited = true;

// Initialize nextrun ready condition variable
if ((errstat = pthread_cond_init(&jcr->dir_impl->nextrun_ready, NULL)) != 0) {
BErrNo be;
Jmsg1(jcr, M_FATAL, 0,
T_("Unable to init job nextrun cond variable: ERR=%s\n"),
be.bstrerror(errstat));
jcr->unlock();
goto bail_out;
CreateUniqueJobName(jcr, jcr->dir_impl->res.job->resource_name_);
jcr->setJobStatusWithPriorityCheck(JS_Created);
}
jcr->dir_impl->nextrun_ready_inited = true;

CreateUniqueJobName(jcr, jcr->dir_impl->res.job->resource_name_);
jcr->setJobStatusWithPriorityCheck(JS_Created);
jcr->unlock();

// Open database
Dmsg0(100, "Open database\n");
Expand Down Expand Up @@ -654,13 +645,12 @@ static void* job_thread(void* arg)

void SdMsgThreadSendSignal(JobControlRecord* jcr, int sig)
{
jcr->lock();
std::unique_lock l(jcr->mutex_guard());
if (!jcr->dir_impl->sd_msg_thread_done && jcr->dir_impl->SD_msg_chan_started
&& !pthread_equal(jcr->dir_impl->SD_msg_chan, pthread_self())) {
Dmsg1(800, "Send kill to SD msg chan jid=%d\n", jcr->JobId);
pthread_kill(jcr->dir_impl->SD_msg_chan, sig);
}
jcr->unlock();
}

/**
Expand Down Expand Up @@ -1528,11 +1518,6 @@ void DirdFreeJcr(JobControlRecord* jcr)

DirdFreeJcrPointers(jcr);

if (jcr->dir_impl->term_wait_inited) {
pthread_cond_destroy(&jcr->dir_impl->term_wait);
jcr->dir_impl->term_wait_inited = false;
}

if (jcr->dir_impl->nextrun_ready_inited) {
pthread_cond_destroy(&jcr->dir_impl->nextrun_ready);
jcr->dir_impl->nextrun_ready_inited = false;
Expand Down
13 changes: 1 addition & 12 deletions core/src/dird/jobq.cc
Expand Up @@ -3,7 +3,7 @@
Copyright (C) 2003-2011 Free Software Foundation Europe e.V.
Copyright (C) 2011-2012 Planets Communications B.V.
Copyright (C) 2013-2023 Bareos GmbH & Co. KG
Copyright (C) 2013-2024 Bareos GmbH & Co. KG
This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
Expand Down Expand Up @@ -206,17 +206,6 @@ int JobqAdd(jobq_t* jq, JobControlRecord* jcr)
pthread_t id;
wait_pkt* sched_pkt;

if (!jcr->dir_impl->term_wait_inited) {
// Initialize termination condition variable
if ((status = pthread_cond_init(&jcr->dir_impl->term_wait, NULL)) != 0) {
BErrNo be;
Jmsg1(jcr, M_FATAL, 0, T_("Unable to init job cond variable: ERR=%s\n"),
be.bstrerror(status));
return status;
}
jcr->dir_impl->term_wait_inited = true;
}

Dmsg3(2300, "JobqAdd jobid=%d jcr=0x%x UseCount=%d\n", jcr->JobId, jcr,
jcr->UseCount());
if (jq->valid != JOBQ_VALID) {
Expand Down
43 changes: 20 additions & 23 deletions core/src/dird/msgchan.cc
Expand Up @@ -3,7 +3,7 @@
Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
Copyright (C) 2011-2016 Planets Communications B.V.
Copyright (C) 2013-2023 Bareos GmbH & Co. KG
Copyright (C) 2013-2024 Bareos GmbH & Co. KG
This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
Expand Down Expand Up @@ -50,8 +50,6 @@

namespace directordaemon {

static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

/* Commands sent to Storage daemon */
static char jobcmd[]
= "JobId=%s job=%s job_name=%s client_name=%s "
Expand Down Expand Up @@ -436,14 +434,17 @@ extern "C" void MsgThreadCleanup(void* arg)
JobControlRecord* jcr = (JobControlRecord*)arg;

jcr->db->EndTransaction(jcr); /* Terminate any open transaction */
jcr->lock();
jcr->dir_impl->sd_msg_thread_done = true;
jcr->dir_impl->SD_msg_chan_started = false;
jcr->unlock();
pthread_cond_broadcast(
&jcr->dir_impl->nextrun_ready); /* wakeup any waiting threads */

{
std::unique_lock l(jcr->mutex_guard());

jcr->dir_impl->sd_msg_thread_done = true;
jcr->dir_impl->SD_msg_chan_started = false;
}

pthread_cond_broadcast(
&jcr->dir_impl->term_wait); /* wakeup any waiting threads */
&jcr->dir_impl->nextrun_ready); /* wakeup any waiting threads */
jcr->dir_impl->term_wait.notify_all(); /* wakeup any waiting threads */
Dmsg2(100, "=== End msg_thread. JobId=%d usecnt=%d\n", jcr->JobId,
jcr->UseCount());
jcr->db->ThreadCleanup(); /* remove thread specific data */
Expand Down Expand Up @@ -519,29 +520,25 @@ void WaitForStorageDaemonTermination(JobControlRecord* jcr)
{
int cancel_count = 0;
/* Now wait for Storage daemon to Terminate our message thread */
while (!jcr->dir_impl->sd_msg_thread_done) {
struct timeval tv;
struct timezone tz;
struct timespec timeout;
std::unique_lock l(jcr->mutex_guard());

gettimeofday(&tv, &tz);
timeout.tv_nsec = 0;
timeout.tv_sec = tv.tv_sec + 5; /* wait 5 seconds */
/* Give SD 30 seconds to clean up after cancel */
auto timeout = std::chrono::system_clock::now() + std::chrono::seconds(30);
while (!jcr->dir_impl->sd_msg_thread_done) {
Dmsg0(400, "I'm waiting for message thread termination.\n");
lock_mutex(mutex);
pthread_cond_timedwait(&jcr->dir_impl->term_wait, &mutex, &timeout);
unlock_mutex(mutex);
if (jcr->IsJobCanceled()) {
if (jcr->dir_impl->term_wait.wait_until(l, timeout)
== std::cv_status::timeout) {
break;
} else if (jcr->IsJobCanceled()) {
if (jcr->dir_impl->SD_msg_chan_started) {
jcr->store_bsock->SetTimedOut();
jcr->store_bsock->SetTerminated();
SdMsgThreadSendSignal(jcr, TIMEOUT_SIGNAL);
}
cancel_count++;
}
/* Give SD 30 seconds to clean up after cancel */
if (cancel_count == 6) { break; }
}

jcr->setJobStatusWithPriorityCheck(JS_Terminated);
}

Expand Down
9 changes: 5 additions & 4 deletions core/src/dird/ndmp_fhdb_common.cc
@@ -1,7 +1,7 @@
/*
BAREOS® - Backup Archiving REcovery Open Sourced
Copyright (C) 2015-2023 Bareos GmbH & Co. KG
Copyright (C) 2015-2024 Bareos GmbH & Co. KG
This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
Expand Down Expand Up @@ -41,9 +41,10 @@ extern "C" int BndmpFhdbAddFile(struct ndmlog* ixlog,
{
NIS* nis = (NIS*)ixlog->ctx;

nis->jcr->lock();
nis->jcr->JobFiles++;
nis->jcr->unlock();
{
std::unique_lock l(nis->jcr->mutex_guard());
nis->jcr->JobFiles++;
}

if (nis->save_filehist) {
int8_t FileType = 0;
Expand Down
7 changes: 4 additions & 3 deletions core/src/dird/ndmp_fhdb_lmdb.cc
Expand Up @@ -159,9 +159,10 @@ extern "C" int bndmp_fhdb_lmdb_add_node(struct ndmlog* ixlog,
NIS* nis;
nis = (NIS*)ixlog->ctx;

nis->jcr->lock();
nis->jcr->JobFiles++;
nis->jcr->unlock();
{
std::unique_lock l(nis->jcr->mutex_guard());
nis->jcr->JobFiles++;
}

if (nis->save_filehist) {
int result;
Expand Down
9 changes: 5 additions & 4 deletions core/src/dird/ndmp_fhdb_mem.cc
Expand Up @@ -2,7 +2,7 @@
BAREOS® - Backup Archiving REcovery Open Sourced
Copyright (C) 2015-2015 Planets Communications B.V.
Copyright (C) 2015-2023 Bareos GmbH & Co. KG
Copyright (C) 2015-2024 Bareos GmbH & Co. KG
This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
Expand Down Expand Up @@ -576,9 +576,10 @@ extern "C" int bndmp_fhdb_mem_add_node(struct ndmlog* ixlog,
{
NIS* nis = (NIS*)ixlog->ctx;

nis->jcr->lock();
nis->jcr->JobFiles++;
nis->jcr->unlock();
{
std::unique_lock l(nis->jcr->mutex_guard());
nis->jcr->JobFiles++;
}

if (nis->save_filehist) {
int attr_size;
Expand Down
9 changes: 5 additions & 4 deletions core/src/dird/next_vol.cc
Expand Up @@ -3,7 +3,7 @@
Copyright (C) 2001-2012 Free Software Foundation Europe e.V.
Copyright (C) 2011-2016 Planets Communications B.V.
Copyright (C) 2013-2023 Bareos GmbH & Co. KG
Copyright (C) 2013-2024 Bareos GmbH & Co. KG
This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
Expand Down Expand Up @@ -357,7 +357,8 @@ void CheckIfVolumeValidOrRecyclable(JobControlRecord* jcr,
}
}

static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
// Only one thread at a time can pull from the scratch pool
static pthread_mutex_t scratch_volume_mutex = PTHREAD_MUTEX_INITIALIZER;

bool GetScratchVolume(JobControlRecord* jcr,
bool InChanger,
Expand All @@ -370,7 +371,7 @@ bool GetScratchVolume(JobControlRecord* jcr,
bool found = false;

// Only one thread at a time can pull from the scratch pool
lock_mutex(mutex);
lock_mutex(scratch_volume_mutex);

/* Get Pool record for Scratch Pool
* choose between ScratchPoolId and Scratch
Expand Down Expand Up @@ -447,7 +448,7 @@ bool GetScratchVolume(JobControlRecord* jcr,
}

bail_out:
unlock_mutex(mutex);
unlock_mutex(scratch_volume_mutex);
return ok;
}
} /* namespace directordaemon */
11 changes: 6 additions & 5 deletions core/src/filed/backup.cc
Expand Up @@ -1597,11 +1597,12 @@ bool EncodeAndSendAttributes(JobControlRecord* jcr,
Dmsg3(300, "File %s\nattribs=%s\nattribsEx=%s\n", ff_pkt->fname,
attribs.c_str(), attribsEx);

jcr->lock();
jcr->JobFiles++; /* increment number of files sent */
ff_pkt->FileIndex = jcr->JobFiles; /* return FileIndex */
PmStrcpy(jcr->fd_impl->last_fname, ff_pkt->fname);
jcr->unlock();
{
std::unique_lock l(jcr->mutex_guard());
jcr->JobFiles++; /* increment number of files sent */
ff_pkt->FileIndex = jcr->JobFiles; /* return FileIndex */
PmStrcpy(jcr->fd_impl->last_fname, ff_pkt->fname);
}

// Debug code: check if we must hangup
if (hangup && (jcr->JobFiles > (uint32_t)hangup)) {
Expand Down

0 comments on commit 61febc7

Please sign in to comment.